Skip to content

Commit be363bc

Browse files
authored
[.NET] Improved parsing time (#336)
* [.NET] Add benchmarks * [.NET] Gherkin.Parser: avoid delegate creation * [.NET] GherkinLine.SplitCells: Avoid string allocations * [.NET] Optimize AstNode.subItems handling * [.NET] Remove old Mono workaround and use string.StartsWith with ordinal everywhere * [.NET] Gherkinline.GetTableCells: avoid string concatenation * [.NET] AstBuilder.GetCells: avoid unnesscary enumerator and array collections * [.NET] GherkinDialectProvider.ParseJsonContent: Use SourceCodeGenerator for System.Text.Json * [.NET] GherkinLine.GetTags: avoid calling RegEx
1 parent acd20c2 commit be363bc

14 files changed

+435
-113
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ This document is formatted according to the principles of [Keep A CHANGELOG](htt
1212

1313
### Fixed
1414
- [c] slight update to existing CMakeFiles.txt to propagate VERSION. Close #320 ([#328](https://github.com/cucumber/gherkin/pull/328))
15+
- [.NET] Improved parsing time
16+
- [.NET] Use string-ordinal comparison consistently and remove old Mono workaround
1517

1618
### Changed
1719
- [cpp] add generic support for ABI versioning with VERSION ([#328](https://github.com/cucumber/gherkin/pull/328))
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFrameworks>net8.0;net481</TargetFrameworks>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
</PropertyGroup>
9+
10+
<ItemGroup>
11+
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
12+
<PackageReference Include="System.Collections.Immutable" Version="8.0.0" />
13+
<PackageReference Include="System.Reflection.Metadata" Version="8.0.1" />
14+
</ItemGroup>
15+
16+
<ItemGroup>
17+
<ProjectReference Include="..\Gherkin\Gherkin.csproj" />
18+
</ItemGroup>
19+
20+
</Project>
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
using BenchmarkDotNet.Attributes;
2+
using Gherkin.Ast;
3+
using System.Text;
4+
5+
namespace Gherkin.Benchmarks;
6+
7+
public class GherkingParser
8+
{
9+
[Params("very_long.feature", "tags.feature")]
10+
public string? FeatureFile { get; set; }
11+
12+
readonly MemoryStream _TestData = new();
13+
readonly Parser _ParserReused = new();
14+
readonly TokenMatcher _TokenMatcher = new();
15+
StreamReader? _Reader;
16+
17+
[GlobalSetup]
18+
public void GlobalSetup()
19+
{
20+
var fullPathToTestFeatureFile = Path.Combine(TestFileProvider.GetTestFileFolder("good"), FeatureFile!);
21+
22+
using var fileStream = new FileStream(fullPathToTestFeatureFile, FileMode.Open, FileAccess.Read);
23+
24+
fileStream.CopyTo(_TestData);
25+
26+
_Reader = new StreamReader(_TestData, Encoding.UTF8, false, 4096, true);
27+
}
28+
29+
[Benchmark]
30+
public GherkinDocument Parser()
31+
{
32+
_TestData.Seek(0, SeekOrigin.Begin);
33+
var parser = new Parser();
34+
return parser.Parse(new TokenScanner(_Reader));
35+
}
36+
37+
[Benchmark]
38+
public GherkinDocument ParserReuse()
39+
{
40+
_TestData.Seek(0, SeekOrigin.Begin);
41+
return _ParserReused.Parse(new TokenScanner(_Reader), _TokenMatcher);
42+
}
43+
}

dotnet/Gherkin.Benchmarks/Program.cs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
using BenchmarkDotNet.Configs;
2+
using BenchmarkDotNet.Diagnosers;
3+
using BenchmarkDotNet.Environments;
4+
using BenchmarkDotNet.Jobs;
5+
using BenchmarkDotNet.Running;
6+
7+
namespace Gherkin.Benchmarks;
8+
9+
internal class Program
10+
{
11+
static void Main(string[] args)
12+
{
13+
#if DEBUG
14+
var config = new DebugInProcessConfig()
15+
#else
16+
var config = DefaultConfig.Instance
17+
.AddJob(Job.Default.WithRuntime(CoreRuntime.Core80))
18+
.AddJob(Job.Default.WithRuntime(ClrRuntime.Net481))
19+
#endif
20+
.AddDiagnoser(MemoryDiagnoser.Default)
21+
;
22+
_ = BenchmarkRunner.Run<GherkingParser>(config);
23+
}
24+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
namespace Gherkin.Benchmarks;
2+
3+
public class TestFileProvider
4+
{
5+
public static string GetTestFileFolder(string category)
6+
{
7+
var inputFolder = Environment.CurrentDirectory;
8+
#if DEBUG
9+
// Artefacts are not created in subdirectories, so we don't need to go any higher.
10+
#elif NET6_0_OR_GREATER
11+
inputFolder = Path.Combine(inputFolder, "..", "..", "..", "..");
12+
#endif
13+
return Path.GetFullPath(Path.Combine(inputFolder, "..", "..", "..", "..", "..", "testdata", category));
14+
}
15+
}

dotnet/Gherkin.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
1818
Makefile = Makefile
1919
EndProjectSection
2020
EndProject
21+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Gherkin.Benchmarks", "Gherkin.Benchmarks\Gherkin.Benchmarks.csproj", "{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}"
22+
EndProject
2123
Global
2224
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2325
Debug|Any CPU = Debug|Any CPU
@@ -32,6 +34,10 @@ Global
3234
{A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Debug|Any CPU.Build.0 = Debug|Any CPU
3335
{A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Release|Any CPU.ActiveCfg = Release|Any CPU
3436
{A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Release|Any CPU.Build.0 = Release|Any CPU
37+
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
38+
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Debug|Any CPU.Build.0 = Debug|Any CPU
39+
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Release|Any CPU.ActiveCfg = Release|Any CPU
40+
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Release|Any CPU.Build.0 = Release|Any CPU
3541
EndGlobalSection
3642
GlobalSection(SolutionProperties) = preSolution
3743
HideSolutionNode = FALSE

dotnet/Gherkin/AstBuilder.cs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,9 @@ protected virtual void CheckCellCountConsistency(TableRow[] rows)
279279
return;
280280

281281
int cellCount = rows[0].Cells.Count();
282-
foreach (var row in rows)
282+
for (int i = 1; i < rows.Length; i++)
283283
{
284+
var row = rows[i];
284285
if (row.Cells.Count() != cellCount)
285286
{
286287
HandleAstError("inconsistent cell count within the table", row.Location);
@@ -295,9 +296,13 @@ protected virtual void HandleAstError(string message, Location location)
295296

296297
private TableCell[] GetCells(Token tableRowToken)
297298
{
298-
return tableRowToken.MatchedItems
299-
.Select(cellItem => CreateTableCell(GetLocation(tableRowToken, cellItem.Column), cellItem.Text))
300-
.ToArray();
299+
var cells = new TableCell[tableRowToken.MatchedItems.Length];
300+
for (int i = 0; i < cells.Length; i++)
301+
{
302+
var cellItem = tableRowToken.MatchedItems[i];
303+
cells[i] = CreateTableCell(GetLocation(tableRowToken, cellItem.Column), cellItem.Text);
304+
}
305+
return cells;
301306
}
302307

303308
private static Step[] GetSteps(AstNode scenarioDefinitionNode)

dotnet/Gherkin/AstNode.cs

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ namespace Gherkin;
22

33
public class AstNode(RuleType ruleType)
44
{
5-
private readonly Dictionary<RuleType, IList<object>> subItems = new Dictionary<RuleType, IList<object>>();
5+
private readonly Dictionary<RuleType, object> subItems = new Dictionary<RuleType, object>();
66

77
public RuleType RuleType { get; } = ruleType;
88

@@ -18,17 +18,50 @@ public IEnumerable<Token> GetTokens(TokenType tokenType)
1818

1919
public T GetSingle<T>(RuleType ruleType)
2020
{
21-
return GetItems<T>(ruleType).SingleOrDefault();
21+
if (!subItems.TryGetValue(ruleType, out var items))
22+
return default;
23+
if (items is List<object> list)
24+
{
25+
T ret = default;
26+
bool foundOne = false;
27+
foreach (var item in list)
28+
{
29+
if (item is T tItem)
30+
{
31+
if (foundOne)
32+
throw new InvalidOperationException();
33+
ret = tItem;
34+
foundOne = true;
35+
}
36+
}
37+
if (foundOne)
38+
return ret;
39+
else
40+
throw new InvalidOperationException();
41+
}
42+
else if (items is T tItem)
43+
{
44+
return tItem;
45+
}
46+
return default;
2247
}
2348

2449
public IEnumerable<T> GetItems<T>(RuleType ruleType)
2550
{
26-
IList<object> items;
27-
if (!subItems.TryGetValue(ruleType, out items))
51+
if (!subItems.TryGetValue(ruleType, out var items))
52+
yield break;
53+
if (items is List<object> list)
54+
{
55+
foreach (var item in list)
56+
{
57+
if (item is T tItem)
58+
yield return tItem;
59+
}
60+
}
61+
else if (items is T tItem)
2862
{
29-
return Enumerable.Empty<T>();
63+
yield return tItem;
3064
}
31-
return items.Cast<T>();
3265
}
3366

3467
public void SetSingle<T>(RuleType ruleType, T value)
@@ -46,12 +79,18 @@ public void AddRange<T>(RuleType ruleType, IEnumerable<T> values)
4679

4780
public void Add<T>(RuleType ruleType, T obj)
4881
{
49-
IList<object> items;
50-
if (!subItems.TryGetValue(ruleType, out items))
82+
if (!subItems.TryGetValue(ruleType, out var items))
83+
{
84+
subItems.Add(ruleType, obj);
85+
}
86+
else if (items is List<object> list)
87+
{
88+
list.Add(obj);
89+
}
90+
else
5191
{
52-
items = new List<object>();
53-
subItems.Add(ruleType, items);
92+
list = [items, obj];
93+
subItems[ruleType] = list;
5494
}
55-
items.Add(obj);
5695
}
5796
}

dotnet/Gherkin/GherkinDialectProvider.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using Gherkin.Ast;
22
using System.Text.Json;
3+
using System.Text.Json.Serialization;
34

45
namespace Gherkin;
56

@@ -52,7 +53,7 @@ protected virtual Dictionary<string, GherkinLanguageSetting> LoadLanguageSetting
5253

5354
protected virtual Dictionary<string, GherkinLanguageSetting> ParseJsonContent(string languagesFileContent)
5455
{
55-
return JsonSerializer.Deserialize<Dictionary<string, GherkinLanguageSetting>>(languagesFileContent, new JsonSerializerOptions(JsonSerializerDefaults.Web));
56+
return JsonSerializer.Deserialize<Dictionary<string, GherkinLanguageSetting>>(languagesFileContent, new JsonSerializerOptions(JsonSerializerDefaults.Web) { TypeInfoResolver = SourceGenerationContext.Default });
5657
}
5758

5859
protected virtual bool TryGetDialect(string language, Dictionary<string, GherkinLanguageSetting> gherkinLanguageSettings, Location location, out GherkinDialect dialect)
@@ -113,6 +114,12 @@ protected static GherkinDialect GetFactoryDefault()
113114
}
114115
}
115116

117+
[JsonSourceGenerationOptions]
118+
[JsonSerializable(typeof(Dictionary<string, GherkinLanguageSetting>))]
119+
internal partial class SourceGenerationContext : JsonSerializerContext
120+
{
121+
}
122+
116123
public class GherkinLanguageSetting
117124
{
118125
public string Name { get; set; }

dotnet/Gherkin/GherkinLanguageConstants.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ public static class GherkinLanguageConstants
66
public const string COMMENT_PREFIX = "#";
77
public const string TITLE_KEYWORD_SEPARATOR = ":";
88
public const string TABLE_CELL_SEPARATOR = "|";
9+
public const char TABLE_CELL_SEPARATOR_CHAR = '|';
910
public const char TABLE_CELL_ESCAPE_CHAR = '\\';
1011
public const char TABLE_CELL_NEWLINE_ESCAPE = 'n';
1112
public const string DOCSTRING_SEPARATOR = "\"\"\"";

0 commit comments

Comments
 (0)