Genbox
diff --git a/‎Imports/Library.props‎
Lines changed: 1 addition & 1 deletion b/‎Imports/Library.props‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 40 additions & 20 deletions b/‎README.md‎
Lines changed: 40 additions & 20 deletions
diff --git a/‎Src/FastData.Benchmarks/Benchmarks/ArrayVsHashSetBenchmarks.cs‎
Lines changed: 1 addition & 4 deletions b/‎Src/FastData.Benchmarks/Benchmarks/ArrayVsHashSetBenchmarks.cs‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎Src/FastData.Benchmarks/Benchmarks/DogsBenchmark.cs‎
Lines changed: 2 additions & 4 deletions b/‎Src/FastData.Benchmarks/Benchmarks/DogsBenchmark.cs‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎Src/FastData.Benchmarks/Benchmarks/GetHashCodeBenchmarks.cs‎
Lines changed: 1 addition & 0 deletions b/‎Src/FastData.Benchmarks/Benchmarks/GetHashCodeBenchmarks.cs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Src/FastData.Benchmarks/Benchmarks/HashBenchmarks.cs‎
Lines changed: 3 additions & 1 deletion b/‎Src/FastData.Benchmarks/Benchmarks/HashBenchmarks.cs‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎Src/FastData.Benchmarks/Benchmarks/PriorityStructureBenchmarks.cs‎
Lines changed: 10 additions & 2 deletions b/‎Src/FastData.Benchmarks/Benchmarks/PriorityStructureBenchmarks.cs‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎Src/FastData.Benchmarks/Benchmarks/SegmentGeneratorsBenchmarks.cs‎
Lines changed: 4 additions & 3 deletions b/‎Src/FastData.Benchmarks/Benchmarks/SegmentGeneratorsBenchmarks.cs‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎Src/FastData.Cli/Program.cs‎
Lines changed: 2 additions & 0 deletions b/‎Src/FastData.Cli/Program.cs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎Src/FastData.Generator.CPlusPlus.Benchmarks/Program.cs‎
Lines changed: 2 additions & 0 deletions b/‎Src/FastData.Generator.CPlusPlus.Benchmarks/Program.cs‎
Lines changed: 2 additions & 0 deletions
@@ -20,7 +20,7 @@
   </ItemGroup>
 
   <ItemGroup Condition="'$(IncludeAnalyzers)' == 'True'">
-<!--    <PackageReference Include="Microsoft.CodeAnalysis.PublicApiAnalyzers" PrivateAssets="all" />-->
+    <!--    <PackageReference Include="Microsoft.CodeAnalysis.PublicApiAnalyzers" PrivateAssets="all" />-->
     <PackageReference Include="ConfigureAwaitChecker.Analyzer" PrivateAssets="all" />
     <PackageReference Include="IDisposableAnalyzers" PrivateAssets="all" />
     <PackageReference Include="Roslynator.Analyzers" PrivateAssets="all" />
 
@@ -5,12 +5,14 @@
 
 ## Description
 
-FastData is a code generator that analyzes your data and creates high-performance, read-only lookup data structures for static data. It can output the data structures
+FastData is a code generator that analyzes your data and creates high-performance, read-only lookup data structures for
+static data. It can output the data structures
 in many different languages (C#, C++, Rust, etc.), ready for inclusion in your project with zero dependencies.
 
 ## Use case
 
-Imagine a scenario where you have a predefined list of words (e.g., dog breeds) and need to check whether a specific dog breed exists in the set.
+Imagine a scenario where you have a predefined list of words (e.g., dog breeds) and need to check whether a specific dog
+breed exists in the set.
 Usually you create an array and look up the value. However, this is far from optimal and is missing a few optimizations.
 
 ```csharp
@@ -140,15 +142,17 @@ Each output language has different settings. Type `fastdata <lang> --help` to se
 
 ### Data structures
 
-By default, FastData chooses the optimal data structure for your data, but you can also set it manually with `fastdata -s <type>`. See the details of each structure type below.
+By default, FastData chooses the optimal data structure for your data, but you can also set it manually with
+`fastdata -s <type>`. See the details of each structure type below.
 
 #### SingleValue
 
 * Memory: Low
 * Latency: Low
 * Complexity: O(1)
 
-This data structure only supports a single value. It is much faster than an array with a single item and has no overhead associated with it.
+This data structure only supports a single value. It is much faster than an array with a single item and has no overhead
+associated with it.
 FastData always selects this data structure whenever your dataset only contains one item.
 
 #### Conditional
@@ -157,9 +161,11 @@ FastData always selects this data structure whenever your dataset only contains
 * Latency: Low
 * Complexity: O(n)
 
-This data structure relies on built-in logic in the programming language. It produces if/switch statements which ultimately become machine instructions on the CPU, rather than data
+This data structure relies on built-in logic in the programming language. It produces if/switch statements which
+ultimately become machine instructions on the CPU, rather than data
 that resides in memory.
-Latency is therefore incredibly low, but the higher number of instructions bloat the assembly, and at a certain point it becomes more efficient to have
+Latency is therefore incredibly low, but the higher number of instructions bloat the assembly, and at a certain point it
+becomes more efficient to have
 the data reside in memory.
 
 #### Array
@@ -168,33 +174,39 @@ the data reside in memory.
 * Latency: Low
 * Complexity: O(n)
 
-This data structure uses an array as the backing store. It is often faster than a normal array due to efficient early exits (value/length range checks).
-It works well for small amounts of data since the array is scanned linearly, but for larger datasets, the O(n) complexity hurts performance a lot.
+This data structure uses an array as the backing store. It is often faster than a normal array due to efficient early
+exits (value/length range checks).
+It works well for small amounts of data since the array is scanned linearly, but for larger datasets, the O(n)
+complexity hurts performance a lot.
 
 #### BinarySearch
 
 * Memory: Low
 * Latency: Medium
 * Complexity: O(log n)
 
-This data structure sorts your data and does a binary search on it. Since data is sorted at compile time, there is no overhead at runtime. Each lookup
-has a higher latency than a simple array, but once the dataset gets to a few hundred items, it beats the array due to a lower complexity.
+This data structure sorts your data and does a binary search on it. Since data is sorted at compile time, there is no
+overhead at runtime. Each lookup
+has a higher latency than a simple array, but once the dataset gets to a few hundred items, it beats the array due to a
+lower complexity.
 
 #### EytzingerSearch
 
 * Memory: Low
 * Latency: Medium
 * Complexity: O(n*log(n))
 
-This data structure sorts data using an Eytzinger layout. It has better cache-locality than binary search. Under some circumstances it has better performance.
+This data structure sorts data using an Eytzinger layout. It has better cache-locality than binary search. Under some
+circumstances it has better performance.
 
 #### KeyLength
 
 * Memory: Low
 * Latency: Low
 * Complexity: O(1)
 
-This data structure only works on strings, but it indexes them after their length, rather than a hash. In the case all the strings have unique lengths, the
+This data structure only works on strings, but it indexes them after their length, rather than a hash. In the case all
+the strings have unique lengths, the
 data structure further optimizes for latency.
 
 #### HashSetChain
@@ -203,7 +215,8 @@ data structure further optimizes for latency.
 * Latency: Medium
 * Complexity: O(1)
 
-This data structure is based on a hash table with separate chaining collision resolution. It uses a separate array for buckets to stay cache coherent, but it also uses more
+This data structure is based on a hash table with separate chaining collision resolution. It uses a separate array for
+buckets to stay cache coherent, but it also uses more
 memory since it needs to keep track of indices.
 
 #### HashSetLinear
@@ -220,7 +233,8 @@ This data structure is also a hash table, but with linear collision resolution.
 * Latency: Low
 * Complexity: O(1)
 
-This data structure tries to create a perfect hash for the dataset. It does so by brute-forcing a seed for a simple hash function
+This data structure tries to create a perfect hash for the dataset. It does so by brute-forcing a seed for a simple hash
+function
 until it hits the right combination. If the dataset is small enough, it can even produce a minimal perfect hash.
 
 #### PerfectHashGPerf
@@ -229,12 +243,15 @@ until it hits the right combination. If the dataset is small enough, it can even
 * Latency: Low
 * Complexity: O(1)
 
-This data structure uses the same algorithm as gperf to derive a perfect hash. It uses Richard J. Cichelli's method for creating an associative table,
-which is augmented using alpha increments to resolve collisions. It only works on strings, but it is great for medium-sized datasets.
+This data structure uses the same algorithm as gperf to derive a perfect hash. It uses Richard J. Cichelli's method for
+creating an associative table,
+which is augmented using alpha increments to resolve collisions. It only works on strings, but it is great for
+medium-sized datasets.
 
 ## How does it work?
 
-The idea behind the project is to generate a data-dependent optimized data structure for read-only lookup. When data is known beforehand, the algorithm can select from a set
+The idea behind the project is to generate a data-dependent optimized data structure for read-only lookup. When data is
+known beforehand, the algorithm can select from a set
 of different data structures, indexing, and comparison methods that are tailor-built for the data.
 
 ### Compile-time generation
@@ -256,12 +273,15 @@ FastData uses advanced data analysis techniques to generate optimized data struc
 * Character mapping
 * Encoding analysis
 
-It uses the analysis to create so-called early-exits, which are fast `O(1)` checks on your input before doing any `O(n)` checks on the actual dataset.
+It uses the analysis to create so-called early-exits, which are fast `O(1)` checks on your input before doing any `O(n)`
+checks on the actual dataset.
 
 #### Hash function generators
 
-Hash functions come in many flavors. Some are designed for low latency, some for throughput, others for low collision rate.
-Programming language runtimes come with a hash function that is a tradeoff between these parameters. FastData builds a hash function specifically tailored to the dataset.
+Hash functions come in many flavors. Some are designed for low latency, some for throughput, others for low collision
+rate.
+Programming language runtimes come with a hash function that is a tradeoff between these parameters. FastData builds a
+hash function specifically tailored to the dataset.
 It has support for several techniques:
 
 1. **Default:** If no technique is selected, FastData uses a hash function by Daniel Bernstein (DJB2)
 
@@ -3,10 +3,7 @@
 
 namespace Genbox.FastData.Benchmarks.Benchmarks;
 
-/// <summary>
-/// Benchmark used to illustrate the algorithmic complexity differences between Array and HashSet.
-/// Needed for the Readme.
-/// </summary>
+/// <summary>Benchmark used to illustrate the algorithmic complexity differences between Array and HashSet. Needed for the Readme.</summary>
 [Orderer(SummaryOrderPolicy.FastestToSlowest)]
 public class ArrayVsHashSetBenchmarks
 {
 
@@ -2,9 +2,7 @@
 
 namespace Genbox.FastData.Benchmarks.Benchmarks;
 
-/// <summary>
-/// Benchmark used in Readme
-/// </summary>
+/// <summary>Benchmark used in Readme</summary>
 [Orderer(SummaryOrderPolicy.FastestToSlowest)]
 public class DogsBenchmark
 {
@@ -20,7 +18,7 @@ private static class Dogs
     {
         public static bool Contains(string value)
         {
-            if ((49280UL & (1UL << (value.Length - 1) % 64)) == 0)
+            if ((49280UL & (1UL << ((value.Length - 1) % 64))) == 0)
                 return false;
 
             return value switch
 
@@ -1,5 +1,6 @@
 using System.Runtime.CompilerServices;
 using BenchmarkDotNet.Configs;
+// ReSharper disable All
 
 namespace Genbox.FastData.Benchmarks.Benchmarks;
 
 
@@ -42,7 +42,9 @@ public ulong XXHashTest()
         ulong value = 0;
 
         foreach (string s in _array)
+        {
             value += XxHash.ComputeHash(s);
+        }
 
         return value;
     }
@@ -67,7 +69,7 @@ private static uint ComputeHash(ref char ptr, int length, ulong seed = PRIME64_5
             while (length >= 4)
             {
                 hash1 ^= Round(0, ptr64);
-                hash1 = (RotateLeft(hash1, 27) * PRIME64_1) + PRIME64_4;
+                hash1 = RotateLeft(hash1, 27) * PRIME64_1 + PRIME64_4;
                 ptr64 = ref Unsafe.Add(ref ptr64, 1);
                 length -= 4;
             }
 
@@ -5,9 +5,9 @@ namespace Genbox.FastData.Benchmarks.Benchmarks;
 [InvocationCount(1_000_000)]
 public class PriorityStructureBenchmarks
 {
-    private readonly MinHeap<bool> _heap = new MinHeap<bool>(10);
     private readonly RingBuffer _buffer = new RingBuffer(10);
     private readonly FixedSet _fixedSet = new FixedSet(10);
+    private readonly MinHeap<bool> _heap = new MinHeap<bool>(10);
     private readonly SortedSet<double> _sorted = new SortedSet<double>();
 
     [IterationCleanup]
@@ -23,28 +23,36 @@ public void Cleanup()
     public void MinHeapTest()
     {
         for (double i = 0; i < 100; i++)
+        {
             _heap.Add(i, true);
+        }
     }
 
     [Benchmark]
     public void RingBufferTest()
     {
         for (double i = 0; i < 100; i++)
+        {
             _buffer.Add(i);
+        }
     }
 
     [Benchmark]
     public void FixedSetTest()
     {
         for (double i = 0; i < 100; i++)
+        {
             _fixedSet.Add(i);
+        }
     }
 
     [Benchmark]
     public void SortedSetTest()
     {
         for (double i = 0; i < 100; i++)
+        {
             _sorted.Add(i);
+        }
     }
 
     private sealed class FixedSet(int capacity)
@@ -81,8 +89,8 @@ private sealed class RingBuffer(int capacity)
     {
         private readonly double[] _buffer = new double[capacity];
         private int _count;
-        private int _next;
         private int _minIndex = -1;
+        private int _next;
 
         public void Add(double value)
         {
 
@@ -8,13 +8,14 @@ namespace Genbox.FastData.Benchmarks.Benchmarks;
 [MemoryDiagnoser]
 public class SegmentGeneratorsBenchmarks
 {
-    //We start at 8 and go up to 100 to cover as many cases as possible
-    private readonly StringProperties _props = DataAnalyzer.GetStringProperties(Enumerable.Range(8, 100).Select(x => TestHelper.GenerateRandomString(Random.Shared, x)).ToArray());
     private readonly BruteForceGenerator _bfGen = new BruteForceGenerator(8);
-    private readonly EdgeGramGenerator _egGen = new EdgeGramGenerator(8);
     private readonly DeltaGenerator _deltaGen = new DeltaGenerator();
+    private readonly EdgeGramGenerator _egGen = new EdgeGramGenerator(8);
     private readonly OffsetGenerator _ofGen = new OffsetGenerator();
 
+    //We start at 8 and go up to 100 to cover as many cases as possible
+    private readonly StringProperties _props = DataAnalyzer.GetStringProperties(Enumerable.Range(8, 100).Select(x => TestHelper.GenerateRandomString(Random.Shared, x)).ToArray());
+
     [Benchmark]
     public object BruteForceGenerator() => _bfGen.Generate(_props).ToArray();
 
 
@@ -143,7 +143,9 @@ private static async IAsyncEnumerable<object> ReadFile(string file, DataType dat
         Func<string, object> func = GetTypeFunc(dataType);
 
         await foreach (string line in File.ReadLinesAsync(file))
+        {
             yield return func(line);
+        }
     }
 
     private static Func<string, object> GetTypeFunc(DataType dataType) => dataType switch
 
@@ -57,7 +57,9 @@ private static string PrintQueries(ITestData data, string identifier)
         StringBuilder sb = new StringBuilder();
 
         for (int i = 0; i < 25; i++)
+        {
             sb.AppendLine(CultureInfo.InvariantCulture, $"        DoNotOptimize({identifier}::contains({data.GetValueLabel(helper)}));");
+        }
 
         return sb.ToString();
     }
Original file line number	Diff line number	Diff line change
`@@ -2,9 +2,7 @@`
`2`	`2`
`3`	`3`	`namespace Genbox.FastData.Benchmarks.Benchmarks;`
`4`	`4`
`5`		`-/// <summary>`
`6`		`-/// Benchmark used in Readme`
`7`		`-/// </summary>`
	`5`	`+/// <summary>Benchmark used in Readme</summary>`
`8`	`6`	`[Orderer(SummaryOrderPolicy.FastestToSlowest)]`
`9`	`7`	`public class DogsBenchmark`
`10`	`8`	`{`
`@@ -20,7 +18,7 @@ private static class Dogs`
`20`	`18`	`{`
`21`	`19`	`public static bool Contains(string value)`
`22`	`20`	`{`
`23`		`- if ((49280UL & (1UL << (value.Length - 1) % 64)) == 0)`
	`21`	`+ if ((49280UL & (1UL << ((value.Length - 1) % 64))) == 0)`
`24`	`22`	`return false;`
`25`	`23`
`26`	`24`	`return value switch`
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,9 @@ public ulong XXHashTest()`
`42`	`42`	`ulong value = 0;`
`43`	`43`
`44`	`44`	`foreach (string s in _array)`
	`45`	`+ {`
`45`	`46`	`value += XxHash.ComputeHash(s);`
	`47`	`+ }`
`46`	`48`
`47`	`49`	`return value;`
`48`	`50`	`}`
`@@ -67,7 +69,7 @@ private static uint ComputeHash(ref char ptr, int length, ulong seed = PRIME64_5`
`67`	`69`	`while (length >= 4)`
`68`	`70`	`{`
`69`	`71`	`hash1 ^= Round(0, ptr64);`
`70`		`- hash1 = (RotateLeft(hash1, 27) * PRIME64_1) + PRIME64_4;`
	`72`	`+ hash1 = RotateLeft(hash1, 27) * PRIME64_1 + PRIME64_4;`
`71`	`73`	`ptr64 = ref Unsafe.Add(ref ptr64, 1);`
`72`	`74`	`length -= 4;`
`73`	`75`	`}`
Original file line number	Diff line number	Diff line change
`@@ -5,9 +5,9 @@ namespace Genbox.FastData.Benchmarks.Benchmarks;`
`5`	`5`	`[InvocationCount(1_000_000)]`
`6`	`6`	`public class PriorityStructureBenchmarks`
`7`	`7`	`{`
`8`		`- private readonly MinHeap<bool> _heap = new MinHeap<bool>(10);`
`9`	`8`	`private readonly RingBuffer _buffer = new RingBuffer(10);`
`10`	`9`	`private readonly FixedSet _fixedSet = new FixedSet(10);`
	`10`	`+ private readonly MinHeap<bool> _heap = new MinHeap<bool>(10);`
`11`	`11`	`private readonly SortedSet<double> _sorted = new SortedSet<double>();`
`12`	`12`
`13`	`13`	`[IterationCleanup]`
`@@ -23,28 +23,36 @@ public void Cleanup()`
`23`	`23`	`public void MinHeapTest()`
`24`	`24`	`{`
`25`	`25`	`for (double i = 0; i < 100; i++)`
	`26`	`+ {`
`26`	`27`	`_heap.Add(i, true);`
	`28`	`+ }`
`27`	`29`	`}`
`28`	`30`
`29`	`31`	`[Benchmark]`
`30`	`32`	`public void RingBufferTest()`
`31`	`33`	`{`
`32`	`34`	`for (double i = 0; i < 100; i++)`
	`35`	`+ {`
`33`	`36`	`_buffer.Add(i);`
	`37`	`+ }`
`34`	`38`	`}`
`35`	`39`
`36`	`40`	`[Benchmark]`
`37`	`41`	`public void FixedSetTest()`
`38`	`42`	`{`
`39`	`43`	`for (double i = 0; i < 100; i++)`
	`44`	`+ {`
`40`	`45`	`_fixedSet.Add(i);`
	`46`	`+ }`
`41`	`47`	`}`
`42`	`48`
`43`	`49`	`[Benchmark]`
`44`	`50`	`public void SortedSetTest()`
`45`	`51`	`{`
`46`	`52`	`for (double i = 0; i < 100; i++)`
	`53`	`+ {`
`47`	`54`	`_sorted.Add(i);`
	`55`	`+ }`
`48`	`56`	`}`
`49`	`57`
`50`	`58`	`private sealed class FixedSet(int capacity)`
`@@ -81,8 +89,8 @@ private sealed class RingBuffer(int capacity)`
`81`	`89`	`{`
`82`	`90`	`private readonly double[] _buffer = new double[capacity];`
`83`	`91`	`private int _count;`
`84`		`- private int _next;`
`85`	`92`	`private int _minIndex = -1;`
	`93`	`+ private int _next;`
`86`	`94`
`87`	`95`	`public void Add(double value)`
`88`	`96`	`{`
Original file line number	Diff line number	Diff line change
`@@ -143,7 +143,9 @@ private static async IAsyncEnumerable<object> ReadFile(string file, DataType dat`
`143`	`143`	`Func<string, object> func = GetTypeFunc(dataType);`
`144`	`144`
`145`	`145`	`await foreach (string line in File.ReadLinesAsync(file))`
	`146`	`+ {`
`146`	`147`	`yield return func(line);`
	`148`	`+ }`
`147`	`149`	`}`
`148`	`150`
`149`	`151`	`private static Func<string, object> GetTypeFunc(DataType dataType) => dataType switch`
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,9 @@ private static string PrintQueries(ITestData data, string identifier)`
`57`	`57`	`StringBuilder sb = new StringBuilder();`
`58`	`58`
`59`	`59`	`for (int i = 0; i < 25; i++)`
	`60`	`+ {`
`60`	`61`	`sb.AppendLine(CultureInfo.InvariantCulture, $" DoNotOptimize({identifier}::contains({data.GetValueLabel(helper)}));");`
	`62`	`+ }`
`61`	`63`
`62`	`64`	`return sb.ToString();`
`63`	`65`	`}`