Skip to content

Commit 1e88a32

Browse files
committed
Fix GPerf hashing
Fix bug where hashing would increment idx twice Make a version of the hash that does not allocate a new string
1 parent f46822f commit 1e88a32

File tree

11 files changed

+74
-72
lines changed

11 files changed

+74
-72
lines changed

Src/FastData.Generator.CPlusPlus.Tests/Generated/PerfectHashGPerf_String_100.output

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// This file is auto-generated. Do not edit manually.
2+
// Structure: PerfectHashGPerf
13
#include <string>
24
#include <array>
35
#include <cstdint>
@@ -95,4 +97,6 @@ public:
9597

9698

9799
static constexpr int item_count = 100;
100+
static constexpr int min_length = 1;
101+
static constexpr int max_length = 2;
98102
};

Src/FastData.Generator.CPlusPlus.Tests/Generated/PerfectHashGPerf_String_3.output

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// This file is auto-generated. Do not edit manually.
2+
// Structure: PerfectHashGPerf
13
#include <string>
24
#include <array>
35
#include <cstdint>
@@ -58,4 +60,6 @@ public:
5860

5961

6062
static constexpr int item_count = 3;
63+
static constexpr int min_length = 5;
64+
static constexpr int max_length = 5;
6165
};

Src/FastData.Generator.CPlusPlus.Tests/Generated/PerfectHashGPerf_String_7.output

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// This file is auto-generated. Do not edit manually.
2+
// Structure: PerfectHashGPerf
13
#include <string>
24
#include <array>
35
#include <cstdint>
@@ -81,4 +83,6 @@ public:
8183

8284

8385
static constexpr int item_count = 7;
86+
static constexpr int min_length = 3;
87+
static constexpr int max_length = 10;
8488
};

Src/FastData.Generator.CSharp.Tests/Generated/PerfectHashGPerf_String_100.output

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
// <auto-generated />
22
// Structure: PerfectHashGPerf
33
#nullable enable
4-
using Genbox.FastData.Abstracts;
5-
using Genbox.FastData.Generator.CSharp.Abstracts;
6-
using Genbox.FastData.Helpers;
7-
using Genbox.FastData;
4+
using System;
85
using System.Runtime.CompilerServices;
96
using System.Runtime.InteropServices;
10-
using System.Text;
11-
using System;
7+
using Genbox.FastData.Helpers;
8+
using Genbox.FastData.Generator.CSharp.Abstracts;
129

1310

1411
internal partial class PerfectHashGPerf_String_100 : IFastSet<String>

Src/FastData.Generator.CSharp.Tests/Generated/PerfectHashGPerf_String_3.output

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
// <auto-generated />
22
// Structure: PerfectHashGPerf
33
#nullable enable
4-
using Genbox.FastData.Abstracts;
5-
using Genbox.FastData.Generator.CSharp.Abstracts;
6-
using Genbox.FastData.Helpers;
7-
using Genbox.FastData;
4+
using System;
85
using System.Runtime.CompilerServices;
96
using System.Runtime.InteropServices;
10-
using System.Text;
11-
using System;
7+
using Genbox.FastData.Helpers;
8+
using Genbox.FastData.Generator.CSharp.Abstracts;
129

1310

1411
internal partial class PerfectHashGPerf_String_3 : IFastSet<String>

Src/FastData.Generator.CSharp.Tests/Generated/PerfectHashGPerf_String_7.output

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
// <auto-generated />
22
// Structure: PerfectHashGPerf
33
#nullable enable
4-
using Genbox.FastData.Abstracts;
5-
using Genbox.FastData.Generator.CSharp.Abstracts;
6-
using Genbox.FastData.Helpers;
7-
using Genbox.FastData;
4+
using System;
85
using System.Runtime.CompilerServices;
96
using System.Runtime.InteropServices;
10-
using System.Text;
11-
using System;
7+
using Genbox.FastData.Helpers;
8+
using Genbox.FastData.Generator.CSharp.Abstracts;
129

1310

1411
internal partial class PerfectHashGPerf_String_7 : IFastSet<String>

Src/FastData.Tests/HashSpecTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,6 @@ public static IEnumerable<object[]> GetSpecs()
2727
yield return [new GeneticHashSpec(2, 1, 2, 1, [new StringSegment(0, -1, Alignment.Left)]), 401880771];
2828

2929
yield return [new HeuristicHashSpec([1]), 101];
30-
yield return [new HeuristicHashSpec([0, 1]), 104];
30+
yield return [new HeuristicHashSpec([0, 1]), 1765];
3131
}
3232
}

Src/FastData/HashFunctions/PJWHash.cs

Lines changed: 0 additions & 46 deletions
This file was deleted.

Src/FastData/Internal/Analysis/Analyzers/Heuristics/HeuristicAnalyzer.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ the hash is a perfect hash.
4141
*/
4242

4343
/// <summary>Finds the least number of positions in a string that hashes to a unique value for all inputs.</summary>
44+
[SuppressMessage("Performance", "MA0159:Use \'Order\' instead of \'OrderBy\'")]
4445
internal class HeuristicAnalyzer(object[] data, StringProperties props, HeuristicAnalyzerConfig config, Simulator simulator) : IHashAnalyzer<HeuristicHashSpec>
4546
{
4647
public Candidate<HeuristicHashSpec> Run()
@@ -232,7 +233,6 @@ private void MergePositions(int max, HashSet<int> mandatory, ref HashSet<int> cu
232233

233234
private double CalculateFitness(HashSet<int> set) => CalculateFitnessInternal(set).Fitness * -1; //The algorithm here works with less fitness = better. At least for now.
234235

235-
[SuppressMessage("Performance", "MA0159:Use \'Order\' instead of \'OrderBy\'")]
236236
private Candidate<HeuristicHashSpec> CalculateFitnessInternal(HashSet<int> set)
237237
{
238238
Candidate<HeuristicHashSpec> cand = new Candidate<HeuristicHashSpec>(new HeuristicHashSpec(set.OrderBy(x => x).ToArray()));
@@ -241,7 +241,6 @@ private Candidate<HeuristicHashSpec> CalculateFitnessInternal(HashSet<int> set)
241241
}
242242

243243
[Conditional("DebugPrint")]
244-
[SuppressMessage("Performance", "MA0159:Use \'Order\' instead of \'OrderBy\'")]
245244
private void Print(string stage, HashSet<int> set)
246245
{
247246
Candidate<HeuristicHashSpec> cand = CalculateFitnessInternal(set);

Src/FastData/Internal/Analysis/Analyzers/Heuristics/HeuristicHashSpec.cs

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,57 @@ internal readonly record struct HeuristicHashSpec(int[] Positions) : IHashSpec
1010
public HashFunc GetHashFunction()
1111
{
1212
int[] localPos = Positions;
13-
return x => PJWHash.Hash(x, localPos);
13+
return x => Hash(x, localPos);
1414
}
1515

1616
public EqualFunc GetEqualFunction()
1717
{
1818
int[] localPos = Positions;
19-
return (a, b) => PJWHash.GetString(a, localPos) == PJWHash.GetString(b, localPos);
19+
return (a, b) => Equal(a, b, localPos);
20+
}
21+
22+
private static bool Equal(string a, string b, int[] positions)
23+
{
24+
foreach (int pos in positions)
25+
{
26+
if (pos == -1) //This if-case should come first, or else it will overlap with the next
27+
{
28+
if (a[a.Length - 1] != b[b.Length - 1])
29+
return false;
30+
}
31+
else if (pos <= a.Length - 1 && pos <= b.Length - 1)
32+
{
33+
if (a[pos] != b[pos])
34+
return false;
35+
}
36+
}
37+
38+
return true;
39+
}
40+
41+
private static uint Hash(string input, int[] positions)
42+
{
43+
//This hash function is PJW hash
44+
45+
uint h = 0;
46+
foreach (int pos in positions)
47+
{
48+
char c;
49+
50+
if (pos == -1)
51+
c = input[input.Length - 1];
52+
else if (pos <= input.Length - 1)
53+
c = input[pos];
54+
else
55+
continue;
56+
57+
h = (h << 4) + c;
58+
59+
uint high = h & 0xf0000000;
60+
61+
if (high != 0)
62+
h = h ^ (high >> 24) ^ high;
63+
}
64+
return h;
2065
}
2166
}

0 commit comments

Comments
 (0)