Skip to content

Commit 2be9d0e

Browse files
committed
Use unsigned lengths and refactor to use lengthmap
1 parent c0cbbcc commit 2be9d0e

File tree

16 files changed

+47
-77
lines changed

16 files changed

+47
-77
lines changed

Src/FastData.Tests/KeyAnalyzerTests.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ public void GetStringProperties_LengthMap_Test(string[] data)
5757

5858
foreach (string str in data)
5959
{
60-
Assert.True(map.Get(str.Length));
60+
Assert.True(map.Get((uint)str.Length));
6161
}
6262

63-
Assert.Equal((uint)data.Min(x => x.Length), res.LengthData.Min);
64-
Assert.Equal((uint)data.Max(x => x.Length), res.LengthData.Max);
63+
Assert.Equal((uint)data.Min(x => x.Length), res.LengthData.LengthMap.Min);
64+
Assert.Equal((uint)data.Max(x => x.Length), res.LengthData.LengthMap.Max);
6565
}
6666

6767
[Theory]

Src/FastData.Tests/LengthBitArrayTests.cs

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,10 @@ public void DefaultBitsAreFalse()
2222
public void CustomLength_AllBitsFalseWithinRange()
2323
{
2424
LengthBitArray bits = new LengthBitArray(128);
25-
for (int i = 0; i < 128; i += 15)
25+
for (uint i = 0; i < 128; i += 15)
2626
Assert.False(bits.Get(i));
2727
}
2828

29-
[Theory]
30-
[InlineData(-1)]
31-
[InlineData(-100)]
32-
public void Get_OutOfRange_Throws(int index)
33-
{
34-
LengthBitArray bits = new LengthBitArray();
35-
Assert.Throws<ArgumentException>(() => bits.Get(index));
36-
}
37-
38-
[Theory]
39-
[InlineData(-1)]
40-
[InlineData(-50)]
41-
public void SetTrue_NegativeIndex_Throws(int index)
42-
{
43-
LengthBitArray bits = new LengthBitArray();
44-
Assert.Throws<ArgumentException>(() => bits.SetTrue(index));
45-
}
46-
4729
[Fact]
4830
public void SetTrue_SetsBitAndReturnsCorrectFlag()
4931
{

Src/FastData/FastDataGenerator.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ private static string GenerateStringInternal<TValue>(ReadOnlyMemory<string> keys
120120
keySpan = keyMemory.Span;
121121
}
122122

123-
LogMinMaxLength(logger, strProps.LengthData.Min, strProps.LengthData.Max);
123+
LogMinMaxLength(logger, strProps.LengthData.LengthMap.Min, strProps.LengthData.LengthMap.Max);
124124

125125
HashDetails hashDetails = new HashDetails();
126126
GeneratorConfig<string> genCfg = new GeneratorConfig<string>(fdCfg.StructureType, keyType, (uint)keySpan.Length, strProps, DefaultStringComparison, hashDetails, generator.Encoding, strProps.CharacterData.AllAscii ? GeneratorFlags.AllAreASCII : GeneratorFlags.None, trimPrefix, trimSuffix);
@@ -133,7 +133,7 @@ private static string GenerateStringInternal<TValue>(ReadOnlyMemory<string> keys
133133
return GenerateWrapper(generator, genCfg, new SingleValueStructure<string, TValue>(), keyMemory, values);
134134

135135
// For small amounts of data, logic is the fastest. However, it increases the assembly size, so we want to try some special cases first.
136-
double density = (double)keySpan.Length / (strProps.LengthData.Max - strProps.LengthData.Min + 1);
136+
double density = (double)keySpan.Length / (strProps.LengthData.LengthMap.Max - strProps.LengthData.LengthMap.Min + 1);
137137

138138
// Use KeyLengthStructure only when string lengths are unique and density >= 75%
139139
if (strProps.LengthData.Unique && density >= 0.75)
@@ -346,7 +346,7 @@ internal static Candidate GetBestHash(ReadOnlySpan<string> data, StringPropertie
346346
perfect.Sort(static (a, b) => b.Fitness.CompareTo(a.Fitness));
347347
notPerfect.Sort(static (a, b) => b.Fitness.CompareTo(a.Fitness));
348348

349-
string test = new string('a', (int)props.LengthData.Max);
349+
string test = new string('a', (int)props.LengthData.LengthMap.Max);
350350
byte[] testBytes = encoding == GeneratorEncoding.UTF8 ? Encoding.UTF8.GetBytes(test) : Encoding.Unicode.GetBytes(test);
351351

352352
//We start with the perfect results (if any)

Src/FastData/Generators/GeneratorConfig.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ private static Constants<T> CreateConstants(KeyProperties<T> props, uint itemCou
7979
private static Constants<T> CreateConstants(StringProperties props, uint itemCount)
8080
{
8181
Constants<T> constants = new Constants<T>(itemCount);
82-
constants.MinStringLength = props.LengthData.Min;
83-
constants.MaxStringLength = props.LengthData.Max;
82+
constants.MinStringLength = props.LengthData.LengthMap.Min;
83+
constants.MaxStringLength = props.LengthData.LengthMap.Max;
8484
return constants;
8585
}
8686

@@ -108,7 +108,7 @@ private static IEnumerable<IEarlyExit> GetEarlyExits(StringProperties props, uin
108108
uint minByteCount = enc == GeneratorEncoding.UTF8 ? lengthData.MinUtf8ByteCount : lengthData.MinUtf16ByteCount;
109109
uint maxByteCount = enc == GeneratorEncoding.UTF8 ? lengthData.MaxUtf8ByteCount : lengthData.MaxUtf16ByteCount;
110110

111-
yield return new MinMaxLengthEarlyExit(lengthData.Min, lengthData.Max, minByteCount, maxByteCount); //Also handles same lengths
111+
yield return new MinMaxLengthEarlyExit(lengthData.LengthMap.Min, lengthData.LengthMap.Max, minByteCount, maxByteCount); //Also handles same lengths
112112
}
113113
}
114114

@@ -136,7 +136,7 @@ private static bool ShouldUseBitSet(LengthData lengthData)
136136
if (lengthData.LengthMap.Values.Length > MaxLengthBitSetWords)
137137
return false;
138138

139-
uint range = lengthData.Max - lengthData.Min + 1;
139+
uint range = lengthData.LengthMap.Max - lengthData.LengthMap.Min + 1;
140140
double density = lengthData.LengthMap.BitCount / (double)range;
141141
return density <= MaxLengthBitSetDensity;
142142
}

Src/FastData/Internal/Analysis/Analyzers/GPerfAnalyzer.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ internal sealed partial class GPerfAnalyzer(int dataLength, StringProperties pro
3636
public IEnumerable<Candidate> GetCandidates(ReadOnlySpan<string> data)
3737
{
3838
// We cannot work on empty strings
39-
if (props.LengthData.Min == 0)
39+
if (props.LengthData.LengthMap.Min == 0)
4040
return [];
4141

4242
// Step1: Find positions
@@ -47,7 +47,7 @@ public IEnumerable<Candidate> GetCandidates(ReadOnlySpan<string> data)
4747
if (positions.Length == 0)
4848
return [];
4949

50-
int maxLen = (int)props.LengthData.Max;
50+
int maxLen = (int)props.LengthData.LengthMap.Max;
5151

5252
// TODO: For now, we keep regenerating state within Keyword. In the future, I hope to do this more efficiently
5353
List<Keyword> keywords = new List<Keyword>(data.Length);
@@ -136,7 +136,7 @@ public IEnumerable<Candidate> GetCandidates(ReadOnlySpan<string> data)
136136
#endif
137137

138138
// We convert keywords to KeyValuePair to keep Keyword internal
139-
GPerfStringHash stringHash = new GPerfStringHash(table.Values, alphaInc, positions.OrderByDescending(x => x).ToArray(), props.LengthData.Min);
139+
GPerfStringHash stringHash = new GPerfStringHash(table.Values, alphaInc, positions.OrderByDescending(x => x).ToArray(), props.LengthData.LengthMap.Min);
140140

141141
Candidate candidate = sim.Run(data, stringHash);
142142
LogCandidate(logger, candidate.Fitness, candidate.Collisions);
@@ -180,7 +180,7 @@ the hash is a perfect hash.
180180
- Case-sensitivity (alpha_unify)
181181
*/
182182

183-
int max = (int)Math.Min(props.LengthData.Max - 1, config.MaxPositions - 1);
183+
int max = (int)Math.Min(props.LengthData.LengthMap.Max - 1, config.MaxPositions - 1);
184184

185185
// Stage 1: Find all positions that are mandatory. If two items are the same length, but differ only on one character, then we must include that character.
186186
DirectMap mandatory = new DirectMap(max + 1);

Src/FastData/Internal/Analysis/Data/LengthBitArray.cs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ internal sealed class LengthBitArray(int length = 64)
77
{
88
private readonly SwitchingBitSet _tracker = new SwitchingBitSet(length, true);
99

10-
public int Min { get; private set; } = int.MaxValue;
11-
public int Max { get; private set; } = int.MinValue;
10+
public uint Min { get; private set; } = uint.MaxValue;
11+
public uint Max { get; private set; } = uint.MinValue;
1212

1313
internal ulong[] Values => _tracker.BitSet;
1414
internal int BitCount { get; private set; }
@@ -26,16 +26,11 @@ internal bool Consecutive
2626
}
2727

2828
[MethodImpl(MethodImplOptions.AggressiveInlining)]
29-
internal bool Get(int index) => _tracker.Contains(index);
29+
internal bool Get(uint index) => _tracker.Contains(index);
3030

3131
[MethodImpl(MethodImplOptions.AggressiveInlining)]
32-
internal bool SetTrue(int index)
32+
internal bool SetTrue(uint index)
3333
{
34-
if (index < 0)
35-
throw new ArgumentException("Index must be non-negative: " + index, nameof(index));
36-
37-
unchecked
38-
{
3934
bool added = _tracker.Add(index);
4035

4136
if (added)
@@ -46,6 +41,5 @@ internal bool SetTrue(int index)
4641
}
4742

4843
return !added;
49-
}
5044
}
5145
}

Src/FastData/Internal/Analysis/Data/LengthData.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
namespace Genbox.FastData.Internal.Analysis.Data;
44

55
[StructLayout(LayoutKind.Auto)]
6-
internal record struct LengthData(uint Min, uint Max, uint MinUtf8ByteCount, uint MaxUtf8ByteCount, uint MinUtf16ByteCount, uint MaxUtf16ByteCount, bool Unique, LengthBitArray LengthMap);
6+
internal record struct LengthData(uint MinUtf8ByteCount, uint MaxUtf8ByteCount, uint MinUtf16ByteCount, uint MaxUtf16ByteCount, bool Unique, LengthBitArray LengthMap);

Src/FastData/Internal/Analysis/KeyAnalyzer.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ internal static StringProperties GetStringProperties(ReadOnlySpan<string> keys,
6161
minUtf16ByteLength = Math.Min(utf16ByteCount, minUtf16ByteLength);
6262
maxUtf16ByteLength = Math.Max(utf16ByteCount, maxUtf16ByteLength);
6363

64-
uniq &= !lengthMap.SetTrue(str.Length);
64+
uniq &= !lengthMap.SetTrue((uint)str.Length);
6565

6666
foreach (char c in str)
6767
{
@@ -145,7 +145,7 @@ internal static StringProperties GetStringProperties(ReadOnlySpan<string> keys,
145145
// }
146146
}
147147

148-
return new StringProperties(new LengthData((uint)lengthMap.Min, (uint)maxStr.Length, (uint)minUtf8ByteLength, (uint)maxUtf8ByteLength, (uint)minUtf16ByteLength, (uint)maxUtf16ByteLength, uniq, lengthMap), new DeltaData(left, right), new CharacterData(allAscii));
148+
return new StringProperties(new LengthData((uint)minUtf8ByteLength, (uint)maxUtf8ByteLength, (uint)minUtf16ByteLength, (uint)maxUtf16ByteLength, uniq, lengthMap), new DeltaData(left, right), new CharacterData(allAscii));
149149
}
150150

151151
private static KeyProperties<char> GetCharProperties(ReadOnlySpan<char> keys)

Src/FastData/Internal/Analysis/SegmentGenerators/BruteForceGenerator.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ internal sealed class BruteForceGenerator(int maxLength) : ISegmentGenerator
1212

1313
public IEnumerable<ArraySegment> Generate(StringProperties props)
1414
{
15-
int max = (int)Math.Min(props.LengthData.Min, maxLength); //We cannot segment above the shortest string.
15+
int max = (int)Math.Min(props.LengthData.LengthMap.Min, maxLength); //We cannot segment above the shortest string.
1616

1717
//Generates:
1818
//[t]est

Src/FastData/Internal/Analysis/SegmentGenerators/DeltaGenerator.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public IEnumerable<ArraySegment> Generate(StringProperties props)
7373
foreach (ArraySegment segment in CalculateSegments(props.DeltaData.Left))
7474
{
7575
// Left Alignment: offset + length <= Min
76-
int maxLength = (int)(props.LengthData.Min - segment.Offset);
76+
int maxLength = (int)(props.LengthData.LengthMap.Min - segment.Offset);
7777
int length = maxLength < 0 ? 0 : Math.Min(segment.Length, maxLength);
7878

7979
if (length > 0)
@@ -87,7 +87,7 @@ public IEnumerable<ArraySegment> Generate(StringProperties props)
8787
foreach (ArraySegment segment in CalculateSegments(props.DeltaData.Right))
8888
{
8989
// Right Alignment: offset + length <= Min
90-
int maxLength = (int)(props.LengthData.Min - segment.Offset);
90+
int maxLength = (int)(props.LengthData.LengthMap.Min - segment.Offset);
9191
int length = maxLength < 0 ? 0 : Math.Min(segment.Length, maxLength);
9292

9393
if (length > 0)

0 commit comments

Comments
 (0)