Skip to content

Commit 937c9cc

Browse files
committed
Make encoding more explicit with an enum
1 parent c69c253 commit 937c9cc

File tree

17 files changed

+64
-46
lines changed

17 files changed

+64
-46
lines changed

Src/FastData.Benchmarks/Benchmarks/AnalyzerBenchmarks.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using Genbox.FastData.Enums;
12
using Genbox.FastData.Internal.Analysis;
23
using Genbox.FastData.Internal.Analysis.Analyzers;
34
using Genbox.FastData.Internal.Analysis.Properties;
@@ -18,7 +19,7 @@ public AnalyzerBenchmarks()
1819
_data = Enumerable.Range(1, 100).Select(_ => TestHelper.GenerateRandomString(rng, 50)).ToArray();
1920

2021
StringProperties props = DataAnalyzer.GetStringProperties(_data);
21-
_analyzer = new GPerfAnalyzer(_data.Length, props, new GPerfAnalyzerConfig(), new Simulator(_data.Length, true), NullLogger<GPerfAnalyzer>.Instance);
22+
_analyzer = new GPerfAnalyzer(_data.Length, props, new GPerfAnalyzerConfig(), new Simulator(_data.Length, GeneratorEncoding.UTF16), NullLogger<GPerfAnalyzer>.Instance);
2223
}
2324

2425
[Benchmark]

Src/FastData.Generator.CPlusPlus/Internal/Framework/CPlusPlusLanguageDef.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ namespace Genbox.FastData.Generator.CPlusPlus.Internal.Framework;
55

66
internal class CPlusPlusLanguageDef : ILanguageDef
77
{
8-
public bool UseUTF16Encoding => true;
8+
public GeneratorEncoding Encoding => GeneratorEncoding.UTF16;
99
public string ArraySizeType => "size_t";
1010

1111
public IList<ITypeDef> TypeDefinitions => new List<ITypeDef>

Src/FastData.Generator.CSharp/Internal/Framework/CSharpLanguageDef.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ namespace Genbox.FastData.Generator.CSharp.Internal.Framework;
55

66
internal class CSharpLanguageDef : ILanguageDef
77
{
8-
public bool UseUTF16Encoding => true;
8+
public GeneratorEncoding Encoding => GeneratorEncoding.UTF16;
99
public string ArraySizeType => "uint";
1010

1111
public IList<ITypeDef> TypeDefinitions => new List<ITypeDef>

Src/FastData.Generator.Rust/Internal/Framework/RustLanguageDef.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ namespace Genbox.FastData.Generator.Rust.Internal.Framework;
55

66
internal class RustLanguageDef : ILanguageDef
77
{
8-
public bool UseUTF16Encoding => false;
8+
public GeneratorEncoding Encoding => GeneratorEncoding.UTF8;
99
public string ArraySizeType => "usize";
1010

1111
public IList<ITypeDef> TypeDefinitions => new List<ITypeDef>

Src/FastData.Generator/Framework/CodeGenerator.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ protected CodeGenerator(ILanguageDef langDef, IConstantsDef constDef, IEarlyExit
3434

3535
protected SharedCode Shared { get; }
3636

37-
public bool UseUTF16Encoding => _langDef.UseUTF16Encoding;
37+
public GeneratorEncoding Encoding => _langDef.Encoding;
3838

3939
public virtual string Generate<T>(ReadOnlySpan<T> data, GeneratorConfig<T> genCfg, IContext<T> context) where T : notnull
4040
{
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
using Genbox.FastData.Enums;
2+
13
namespace Genbox.FastData.Generator.Framework.Interfaces;
24

35
public interface ILanguageDef
46
{
5-
bool UseUTF16Encoding { get; }
7+
GeneratorEncoding Encoding { get; }
68
IList<ITypeDef> TypeDefinitions { get; }
79
string ArraySizeType { get; }
810
}

Src/FastData.InternalShared/DummyGenerator.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using Genbox.FastData.Enums;
12
using Genbox.FastData.Generators;
23
using Genbox.FastData.Generators.Abstracts;
34
using Newtonsoft.Json;
@@ -6,9 +7,9 @@ namespace Genbox.FastData.InternalShared;
67

78
public readonly struct DummyGenerator : ICodeGenerator
89
{
9-
public bool UseUTF16Encoding => true;
10+
public GeneratorEncoding Encoding => GeneratorEncoding.Unknown;
1011

11-
public string Generate<T>(ReadOnlySpan<T> data, GeneratorConfig<T> genCfg, IContext<T> context)
12+
public string Generate<T>(ReadOnlySpan<T> data, GeneratorConfig<T> genCfg, IContext<T> context) where T : notnull
1213
{
1314
Combined<T> combined = new Combined<T>(data.ToArray(), context);
1415
return JsonConvert.SerializeObject(combined, new JsonSerializerSettings { Formatting = Formatting.Indented });

Src/FastData.InternalShared/Helpers/TestHelper.cs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ public static GeneratorSpec Generate<T>(Func<string, ICodeGenerator> func, TestV
107107
props = DataAnalyzer.GetValueProperties(span, dataType);
108108

109109
ICodeGenerator generator = func(vector.Identifier);
110-
bool useUTF16 = generator.UseUTF16Encoding;
110+
GeneratorEncoding encoding = generator.Encoding;
111111

112112
if (vector.Type == typeof(SingleValueStructure<>))
113113
return Generate(generator, vector, props, dataType, StructureType.Auto, new SingleValueStructure<T>());
@@ -120,25 +120,25 @@ public static GeneratorSpec Generate<T>(Func<string, ICodeGenerator> func, TestV
120120
if (vector.Type == typeof(EytzingerSearchStructure<>))
121121
return Generate(generator, vector, props, dataType, StructureType.BinarySearch, new EytzingerSearchStructure<T>(dataType, StringComparison.Ordinal));
122122
if (vector.Type == typeof(HashSetChainStructure<>))
123-
return Generate(generator, vector, props, dataType, StructureType.HashSet, new HashSetChainStructure<T>(GetHashData(vector, dataType, useUTF16), dataType));
123+
return Generate(generator, vector, props, dataType, StructureType.HashSet, new HashSetChainStructure<T>(GetHashData(vector, dataType, encoding), dataType));
124124
if (vector.Type == typeof(HashSetPerfectStructure<>))
125-
return Generate(generator, vector, props, dataType, StructureType.HashSet, new HashSetPerfectStructure<T>(GetHashData(vector, dataType, useUTF16), dataType));
125+
return Generate(generator, vector, props, dataType, StructureType.HashSet, new HashSetPerfectStructure<T>(GetHashData(vector, dataType, encoding), dataType));
126126
if (vector.Type == typeof(HashSetLinearStructure<>))
127-
return Generate(generator, vector, props, dataType, StructureType.HashSet, new HashSetLinearStructure<T>(GetHashData(vector, dataType, useUTF16)));
127+
return Generate(generator, vector, props, dataType, StructureType.HashSet, new HashSetLinearStructure<T>(GetHashData(vector, dataType, encoding)));
128128
if (vector.Type == typeof(KeyLengthStructure<>))
129129
return Generate(generator, vector, props, dataType, StructureType.Auto, new KeyLengthStructure<T>((StringProperties)props));
130130

131131
throw new InvalidOperationException("Unsupported structure type: " + vector.Type.Name);
132132
}
133133

134-
private static HashData GetHashData<T>(TestVector<T> vector, DataType dataType, bool useUTF16) where T : notnull
134+
private static HashData GetHashData<T>(TestVector<T> vector, DataType dataType, GeneratorEncoding genEnc) where T : notnull
135135
{
136136
HashData hashData;
137137

138138
if (dataType == DataType.String)
139139
{
140-
Encoding encoding = useUTF16 ? Encoding.Unicode : Encoding.UTF8;
141-
StringHashFunc func = DefaultStringHash.GetInstance(useUTF16).GetExpression().Compile();
140+
Encoding encoding = genEnc == GeneratorEncoding.UTF8 ? Encoding.UTF8 : Encoding.Unicode;
141+
StringHashFunc func = DefaultStringHash.GetInstance(genEnc).GetExpression().Compile();
142142

143143
hashData = HashData.Create<T>(vector.Values.AsSpan(), 1, obj =>
144144
{
@@ -161,7 +161,7 @@ private static GeneratorSpec Generate<T, TContext>(ICodeGenerator generator, Tes
161161
HashDetails hashDetails = new HashDetails();
162162

163163
if (props is StringProperties stringProps)
164-
genCfg = new GeneratorConfig<T>(structureType, dataType, (uint)vector.Values.Length, stringProps, StringComparison.Ordinal, hashDetails, generator.UseUTF16Encoding);
164+
genCfg = new GeneratorConfig<T>(structureType, dataType, (uint)vector.Values.Length, stringProps, StringComparison.Ordinal, hashDetails, generator.Encoding);
165165
else if (props is ValueProperties<T> valueProps)
166166
{
167167
hashDetails.HasZeroOrNaN = valueProps.HasZeroOrNaN;

Src/FastData.Testbed/Tests/AnalysisTest.cs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System.Diagnostics;
22
using System.Runtime.CompilerServices;
3+
using Genbox.FastData.Enums;
34
using Genbox.FastData.Generators.StringHash;
45
using Genbox.FastData.Internal.Analysis;
56
using Genbox.FastData.Internal.Analysis.Analyzers;
@@ -56,7 +57,7 @@ public static void TestBest()
5657
gcfg.PopulationSize = j;
5758

5859
sw.Restart();
59-
Candidate cand = FastDataGenerator.GetBestHash(Data, props, cfg, NullLoggerFactory.Instance, true, false);
60+
Candidate cand = FastDataGenerator.GetBestHash(Data, props, cfg, NullLoggerFactory.Instance, GeneratorEncoding.UTF16, false);
6061

6162
sw.Stop();
6263
Console.WriteLine($"{i.ToString(),-10}{j.ToString(),-10}{sw.ElapsedMilliseconds,-10:N0}{cand.Collisions,-10:N0}");
@@ -69,7 +70,7 @@ public static void TestNoAnalyzer()
6970
string[] data = RunFunc(Data, 5.0, PrependString).ToArray();
7071
Print(data, "DefaultHash");
7172

72-
Simulator sim = new Simulator(data.Length, true);
73+
Simulator sim = new Simulator(data.Length, GeneratorEncoding.UTF16);
7374
PrintCandidate(sim.Run(data, DefaultStringHash.UTF16Instance));
7475
}
7576

@@ -106,7 +107,7 @@ private static void RunBruteForce(string[] data, [CallerArgumentExpression(nameo
106107

107108
StringProperties props = DataAnalyzer.GetStringProperties(data);
108109
using SerilogLoggerFactory loggerFactory = new SerilogLoggerFactory(_logConf);
109-
BruteForceAnalyzer analyzer = new BruteForceAnalyzer(props, new BruteForceAnalyzerConfig(), new Simulator(data.Length, true), loggerFactory.CreateLogger<BruteForceAnalyzer>());
110+
BruteForceAnalyzer analyzer = new BruteForceAnalyzer(props, new BruteForceAnalyzerConfig(), new Simulator(data.Length, GeneratorEncoding.UTF16), loggerFactory.CreateLogger<BruteForceAnalyzer>());
110111
PrintCandidate(analyzer.GetCandidates(data).OrderByDescending(x => x.Fitness).FirstOrDefault());
111112
}
112113

@@ -116,7 +117,7 @@ private static void RunGeneticAnalysis(string[] data, [CallerArgumentExpression(
116117

117118
StringProperties props = DataAnalyzer.GetStringProperties(data);
118119
using SerilogLoggerFactory loggerFactory = new SerilogLoggerFactory(_logConf);
119-
GeneticAnalyzer analyzer = new GeneticAnalyzer(props, new GeneticAnalyzerConfig(), new Simulator(data.Length, true), loggerFactory.CreateLogger<GeneticAnalyzer>());
120+
GeneticAnalyzer analyzer = new GeneticAnalyzer(props, new GeneticAnalyzerConfig(), new Simulator(data.Length, GeneratorEncoding.UTF16), loggerFactory.CreateLogger<GeneticAnalyzer>());
120121
PrintCandidate(analyzer.GetCandidates(data).OrderByDescending(x => x.Fitness).FirstOrDefault());
121122
}
122123

@@ -126,7 +127,7 @@ private static void RunGPerfAnalysis(string[] data, [CallerArgumentExpression(na
126127

127128
StringProperties props = DataAnalyzer.GetStringProperties(data);
128129
using SerilogLoggerFactory loggerFactory = new SerilogLoggerFactory(_logConf);
129-
GPerfAnalyzer analyzer = new GPerfAnalyzer(data.Length, props, new GPerfAnalyzerConfig(), new Simulator(data.Length, true), loggerFactory.CreateLogger<GPerfAnalyzer>());
130+
GPerfAnalyzer analyzer = new GPerfAnalyzer(data.Length, props, new GPerfAnalyzerConfig(), new Simulator(data.Length, GeneratorEncoding.UTF16), loggerFactory.CreateLogger<GPerfAnalyzer>());
130131
PrintCandidate(analyzer.GetCandidates(data).OrderByDescending(x => x.Fitness).FirstOrDefault());
131132
}
132133

Src/FastData.Testbed/Tests/GPerfTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public static void ProduceOutputs(string path)
4444
ReadOnlySpan<string> data = File.ReadAllLines(file).AsSpan();
4545
StringProperties props = DataAnalyzer.GetStringProperties(data);
4646

47-
GPerfAnalyzer analyzer = new GPerfAnalyzer(data.Length, props, new GPerfAnalyzerConfig(), new Simulator(data.Length, true), factory.CreateLogger<GPerfAnalyzer>());
47+
GPerfAnalyzer analyzer = new GPerfAnalyzer(data.Length, props, new GPerfAnalyzerConfig(), new Simulator(data.Length, GeneratorEncoding.UTF16), factory.CreateLogger<GPerfAnalyzer>());
4848
Candidate cand = analyzer.GetCandidates(data).First();
4949
StringHashFunc func = cand.StringHash.GetExpression().Compile();
5050

0 commit comments

Comments
 (0)