Skip to content

Commit 545c7d4

Browse files
committed
Add an indicator for HashSets that store hash code, that when using identity hashing, we don't need to store the hash code
1 parent 938205b commit 545c7d4

File tree

8 files changed

+30
-17
lines changed

8 files changed

+30
-17
lines changed

Src/FastData.InternalShared/Helpers/TestHelper.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@ public static GeneratorSpec Generate<T>(Func<string, ICodeGenerator> func, TestV
9999
if (vector.Type == typeof(EytzingerSearchStructure<>))
100100
return Generate(func, props, vector, new EytzingerSearchStructure<T>(props.DataType, StringComparison.Ordinal));
101101
if (vector.Type == typeof(HashSetChainStructure<>))
102-
return Generate(func, props, vector, new HashSetChainStructure<T>(HashData.Create(data, props.DataType, 1)));
102+
return Generate(func, props, vector, new HashSetChainStructure<T>(HashData.Create(data, props.DataType, 1), props.DataType));
103103
if (vector.Type == typeof(HashSetPerfectStructure<>))
104-
return Generate(func, props, vector, new HashSetPerfectStructure<T>(HashData.Create(data, props.DataType, 1)));
104+
return Generate(func, props, vector, new HashSetPerfectStructure<T>(HashData.Create(data, props.DataType, 1), props.DataType));
105105
if (vector.Type == typeof(HashSetLinearStructure<>))
106106
return Generate(func, props, vector, new HashSetLinearStructure<T>(HashData.Create(data, props.DataType, 1)));
107107
if (vector.Type == typeof(KeyLengthStructure<>))

Src/FastData.Testbed/Tests/GPerfTest.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,11 @@ public static void ProduceOutputs(string path)
4343
StringProperties props = DataAnalyzer.GetStringProperties(data);
4444

4545
GPerfAnalyzer analyzer = new GPerfAnalyzer(data.Length, props, new GPerfAnalyzerConfig(), new Simulator(new SimulatorConfig()), factory.CreateLogger<GPerfAnalyzer>());
46+
4647
// Candidate hashFunc = analyzer.GetCandidates(data).First(); //TODO: use
4748

4849
HashData hashData = HashData.Create(data, DataType.String, 1);
49-
HashSetPerfectStructure<string> structure = new HashSetPerfectStructure<string>(hashData);
50+
HashSetPerfectStructure<string> structure = new HashSetPerfectStructure<string>(hashData, DataType.String);
5051
structure.Create(ref data);
5152
}
5253
}

Src/FastData/FastDataGenerator.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ public static string Generate<T>(ReadOnlySpan<T> data, FastDataConfig fdCfg, ICo
130130
HashData hashData = HashData.Create(data, props.DataType, fdCfg.HashCapacityFactor);
131131

132132
if (hashData.HashCodesPerfect)
133-
return Generate(generator, genCfg, new HashSetPerfectStructure<T>(hashData), data);
133+
return Generate(generator, genCfg, new HashSetPerfectStructure<T>(hashData, props.DataType), data);
134134

135-
return Generate(generator, genCfg, new HashSetChainStructure<T>(hashData), data);
135+
return Generate(generator, genCfg, new HashSetChainStructure<T>(hashData, props.DataType), data);
136136
}
137137
case StructureType.Array:
138138
return Generate(generator, genCfg, new ArrayStructure<T>(), data);
@@ -145,9 +145,9 @@ public static string Generate<T>(ReadOnlySpan<T> data, FastDataConfig fdCfg, ICo
145145
HashData hashData = HashData.Create(data, props.DataType, fdCfg.HashCapacityFactor);
146146

147147
if (hashData.HashCodesPerfect)
148-
return Generate(generator, genCfg, new HashSetPerfectStructure<T>(hashData), data);
148+
return Generate(generator, genCfg, new HashSetPerfectStructure<T>(hashData, props.DataType), data);
149149

150-
return Generate(generator, genCfg, new HashSetChainStructure<T>(hashData), data);
150+
return Generate(generator, genCfg, new HashSetChainStructure<T>(hashData, props.DataType), data);
151151
}
152152
default:
153153
throw new InvalidOperationException($"Unsupported DataStructure {fdCfg.StructureType}");

Src/FastData/Generators/Contexts/HashSetChainContext.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@ namespace Genbox.FastData.Generators.Contexts;
77
/// <typeparam name="T">The type of elements in the data array.</typeparam>
88
/// <param name="buckets">The array of bucket indices.</param>
99
/// <param name="entries">The array of hash set entries.</param>
10-
public sealed class HashSetChainContext<T>(int[] buckets, HashSetEntry<T>[] entries) : IContext<T>
10+
/// <param name="storeHashCode">If set to true, you should only generate a hash set that checks the value.</param>
11+
public sealed class HashSetChainContext<T>(int[] buckets, HashSetEntry<T>[] entries, bool storeHashCode) : IContext<T>
1112
{
1213
/// <summary>Gets the array of bucket indices.</summary>
1314
public int[] Buckets { get; } = buckets;
1415

1516
/// <summary>Gets the array of hash set entries.</summary>
1617
public HashSetEntry<T>[] Entries { get; } = entries;
18+
19+
/// <summary>Indicates whether the hash set should store the hash code or only the value.</summary>
20+
public bool StoreHashCode { get; } = storeHashCode;
1721
}

Src/FastData/Generators/Contexts/HashSetPerfectContext.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@ namespace Genbox.FastData.Generators.Contexts;
55
/// <summary>Provides a context for perfect hash set-based data structures.</summary>
66
/// <typeparam name="T">The type of keys in the key-value pairs.</typeparam>
77
/// <param name="data">The array of key-value pairs and their hash codes.</param>
8-
public sealed class HashSetPerfectContext<T>(KeyValuePair<T, ulong>[] data) : IContext<T>
8+
/// <param name="storeHashCode">If set to true, you should only generate a hash set that checks the value.</param>
9+
public sealed class HashSetPerfectContext<T>(KeyValuePair<T, ulong>[] data, bool storeHashCode) : IContext<T>
910
{
1011
/// <summary>Gets the array of items and their hash codes.</summary>
1112
public KeyValuePair<T, ulong>[] Data { get; } = data;
13+
14+
/// <summary>Indicates whether the hash set should store the hash code or only the value.</summary>
15+
public bool StoreHashCode { get; } = storeHashCode;
1216
}

Src/FastData/Internal/Structures/HashSetChainStructure.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1+
using Genbox.FastData.Enums;
12
using Genbox.FastData.Generators.Contexts;
23
using Genbox.FastData.Generators.Contexts.Misc;
4+
using Genbox.FastData.Generators.Extensions;
35
using Genbox.FastData.Internal.Abstracts;
46
using Genbox.FastData.Internal.Misc;
57

68
namespace Genbox.FastData.Internal.Structures;
79

8-
internal sealed class HashSetChainStructure<T>(HashData hashData) : IStructure<T, HashSetChainContext<T>>
10+
internal sealed class HashSetChainStructure<T>(HashData hashData, DataType dataType) : IStructure<T, HashSetChainContext<T>>
911
{
1012
public HashSetChainContext<T> Create(ref ReadOnlySpan<T> data)
1113
{
@@ -27,6 +29,6 @@ public HashSetChainContext<T> Create(ref ReadOnlySpan<T> data)
2729
bucket = i + 1;
2830
}
2931

30-
return new HashSetChainContext<T>(buckets, entries);
32+
return new HashSetChainContext<T>(buckets, entries, !dataType.IsIdentityHash());
3133
}
3234
}

Src/FastData/Internal/Structures/HashSetPerfectStructure.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
using Genbox.FastData.Enums;
12
using Genbox.FastData.Generators.Contexts;
3+
using Genbox.FastData.Generators.Extensions;
24
using Genbox.FastData.Internal.Abstracts;
35
using Genbox.FastData.Internal.Misc;
46

57
namespace Genbox.FastData.Internal.Structures;
68

7-
internal sealed class HashSetPerfectStructure<T>(HashData hashData) : IStructure<T, HashSetPerfectContext<T>>
9+
internal sealed class HashSetPerfectStructure<T>(HashData hashData, DataType dataType) : IStructure<T, HashSetPerfectContext<T>>
810
{
911
public HashSetPerfectContext<T> Create(ref ReadOnlySpan<T> data)
1012
{
@@ -20,6 +22,6 @@ public HashSetPerfectContext<T> Create(ref ReadOnlySpan<T> data)
2022
for (int i = 0; i < data.Length; i++)
2123
pairs[hashCodes[i] % size] = new KeyValuePair<T, ulong>(data[i], hashCodes[i]);
2224

23-
return new HashSetPerfectContext<T>(pairs);
25+
return new HashSetPerfectContext<T>(pairs, !dataType.IsIdentityHash());
2426
}
2527
}

Src/FastData/PublicAPI.Unshipped.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ Genbox.FastData.Generators.Contexts.EytzingerSearchContext<T>.EytzingerSearchCon
6060
Genbox.FastData.Generators.Contexts.HashSetChainContext<T>
6161
Genbox.FastData.Generators.Contexts.HashSetChainContext<T>.Buckets.get -> int[]!
6262
Genbox.FastData.Generators.Contexts.HashSetChainContext<T>.Entries.get -> Genbox.FastData.Generators.Contexts.Misc.HashSetEntry<T>[]!
63-
Genbox.FastData.Generators.Contexts.HashSetChainContext<T>.HashSetChainContext() -> void
64-
Genbox.FastData.Generators.Contexts.HashSetChainContext<T>.HashSetChainContext(int[]! buckets, Genbox.FastData.Generators.Contexts.Misc.HashSetEntry<T>[]! entries) -> void
63+
Genbox.FastData.Generators.Contexts.HashSetChainContext<T>.HashSetChainContext(int[]! buckets, Genbox.FastData.Generators.Contexts.Misc.HashSetEntry<T>[]! entries, bool storeHashCode) -> void
64+
Genbox.FastData.Generators.Contexts.HashSetChainContext<T>.StoreHashCode.get -> bool
6565
Genbox.FastData.Generators.Contexts.HashSetLinearContext<T>
6666
Genbox.FastData.Generators.Contexts.HashSetLinearContext<T>.Buckets.get -> Genbox.FastData.Generators.Contexts.Misc.HashSetBucket[]!
6767
Genbox.FastData.Generators.Contexts.HashSetLinearContext<T>.Data.get -> T[]!
@@ -70,8 +70,8 @@ Genbox.FastData.Generators.Contexts.HashSetLinearContext<T>.HashSetLinearContext
7070
Genbox.FastData.Generators.Contexts.HashSetLinearContext<T>.HashSetLinearContext(T[]! data, Genbox.FastData.Generators.Contexts.Misc.HashSetBucket[]! buckets, ulong[]! hashCodes) -> void
7171
Genbox.FastData.Generators.Contexts.HashSetPerfectContext<T>
7272
Genbox.FastData.Generators.Contexts.HashSetPerfectContext<T>.Data.get -> System.Collections.Generic.KeyValuePair<T, ulong>[]!
73-
Genbox.FastData.Generators.Contexts.HashSetPerfectContext<T>.HashSetPerfectContext() -> void
74-
Genbox.FastData.Generators.Contexts.HashSetPerfectContext<T>.HashSetPerfectContext(System.Collections.Generic.KeyValuePair<T, ulong>[]! data) -> void
73+
Genbox.FastData.Generators.Contexts.HashSetPerfectContext<T>.HashSetPerfectContext(System.Collections.Generic.KeyValuePair<T, ulong>[]! data, bool storeHashCode) -> void
74+
Genbox.FastData.Generators.Contexts.HashSetPerfectContext<T>.StoreHashCode.get -> bool
7575
Genbox.FastData.Generators.Contexts.KeyLengthContext<T>
7676
Genbox.FastData.Generators.Contexts.KeyLengthContext<T>.KeyLengthContext() -> void
7777
Genbox.FastData.Generators.Contexts.KeyLengthContext<T>.KeyLengthContext(System.Collections.Generic.List<string!>?[]! lengths, bool lengthsAreUniq, uint minLength, uint maxLength) -> void

0 commit comments

Comments
 (0)