Skip to content

Commit 627ecf1

Browse files
committed
Effciently find sentinel candidates and fill perfect hash table with it when there are empty slots
1 parent 18f18d1 commit 627ecf1

File tree

2 files changed

+50
-4
lines changed

2 files changed

+50
-4
lines changed

Src/FastData/Internal/Misc/HashData.cs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
namespace Genbox.FastData.Internal.Misc;
44

55
/// <summary>Used internally in FastData to store hash codes and their properties.</summary>
6-
internal record HashData(ulong[] HashCodes, int CapacityFactor, bool HashCodesUnique, bool HashCodesPerfect)
6+
internal record HashData(ulong[] HashCodes, int CapacityFactor, bool HashCodesUnique, bool HashCodesPerfect, ulong MinHashCode, ulong MaxHashCode)
77
{
88
internal static HashData Create<T>(T[] data, int capacityFactor, HashFunc<T> func)
99
{
@@ -21,19 +21,24 @@ internal static HashData Create<T>(T[] data, int capacityFactor, HashFunc<T> fun
2121

2222
bool uniq = true;
2323
bool perfect = true;
24+
ulong minHashCode = ulong.MaxValue;
25+
ulong maxHashCode = ulong.MinValue;
2426

2527
for (int i = 0; i < data.Length; i++)
2628
{
2729
ulong hash = func(data[i]);
2830
hashCodes[i] = hash;
2931

32+
minHashCode = hash < minHashCode ? hash : minHashCode;
33+
maxHashCode = hash > maxHashCode ? hash : maxHashCode;
34+
3035
if (uniq && !uniqSet.Add(hash)) //The unique check is first so that when it is false, we don't try the other conditions
3136
uniq = false;
3237

3338
if (perfect && !perfectSet.Add(hash % size))
3439
perfect = false;
3540
}
3641

37-
return new HashData(hashCodes, capacityFactor, uniq, perfect);
42+
return new HashData(hashCodes, capacityFactor, uniq, perfect, minHashCode, maxHashCode);
3843
}
3944
}

Src/FastData/Internal/Structures/HashTablePerfectStructure.cs

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,20 @@ public HashTablePerfectContext<TKey, TValue> Create(TKey[] keys, TValue[]? value
1414
throw new InvalidOperationException("HashSetPerfectStructure can only be created with a perfect hash function.");
1515

1616
ulong size = (ulong)(keys.Length * hashData.CapacityFactor);
17-
17+
bool hasEmptySlots = size != (ulong)keys.Length;
18+
bool storeHashCode = !keyType.IsIdentityHash() || hasEmptySlots;
1819
ulong[] hashCodes = hashData.HashCodes;
1920
KeyValuePair<TKey, ulong>[] pairs = new KeyValuePair<TKey, ulong>[size];
2021
TValue[]? denseValues = values == null ? null : new TValue[size];
2122

23+
if (storeHashCode && hasEmptySlots)
24+
{
25+
ulong sentinel = GetSentinel(hashData, hashCodes, keys.Length);
26+
27+
for (ulong i = 0; i < size; i++)
28+
pairs[i] = new KeyValuePair<TKey, ulong>(default!, sentinel);
29+
}
30+
2231
//We need to reorder the data to match hashes
2332
for (int i = 0; i < keys.Length; i++)
2433
{
@@ -29,6 +38,38 @@ public HashTablePerfectContext<TKey, TValue> Create(TKey[] keys, TValue[]? value
2938
denseValues[index] = values![i];
3039
}
3140

32-
return new HashTablePerfectContext<TKey, TValue>(pairs, !keyType.IsIdentityHash(), denseValues);
41+
return new HashTablePerfectContext<TKey, TValue>(pairs, storeHashCode, denseValues);
42+
}
43+
44+
private static ulong GetSentinel(HashData hashData, ulong[] hashCodes, int count)
45+
{
46+
if (hashData.MaxHashCode != ulong.MaxValue)
47+
return hashData.MaxHashCode + 1;
48+
49+
if (hashData.MinHashCode != 0)
50+
return hashData.MinHashCode - 1;
51+
52+
ulong candidate = 1;
53+
while (true)
54+
{
55+
bool found = false;
56+
57+
for (int i = 0; i < count; i++)
58+
{
59+
if (hashCodes[i] == candidate)
60+
{
61+
found = true;
62+
break;
63+
}
64+
}
65+
66+
if (!found)
67+
return candidate;
68+
69+
candidate++;
70+
71+
if (candidate == 0)
72+
throw new InvalidOperationException("Unable to find a sentinel hash value.");
73+
}
3374
}
3475
}

0 commit comments

Comments
 (0)