Skip to content

Commit 619abd7

Browse files
authored
lexer optimizing refactoring (#211)
1 parent 3e0aec7 commit 619abd7

File tree

6 files changed

+21
-68
lines changed

6 files changed

+21
-68
lines changed

src/Domain/HydraScript.Domain.FrontEnd/Lexer/ILexer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@ public interface ILexer
44
{
55
public IStructure Structure { get; }
66

7-
public List<Token> GetTokens(string text);
7+
public IEnumerable<Token> GetTokens(string text);
88
}
Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,16 @@
1-
using System.Collections;
2-
using System.Diagnostics.CodeAnalysis;
31
using System.Text.RegularExpressions;
4-
using Cysharp.Text;
52

63
namespace HydraScript.Domain.FrontEnd.Lexer.Impl;
74

8-
public class RegexLexer(IStructure structure, ITextCoordinateSystemComputer computer) : ILexer, IEnumerable<Token>
5+
public class RegexLexer(IStructure structure, ITextCoordinateSystemComputer computer) : ILexer
96
{
10-
private IReadOnlyList<int> _lines = [];
11-
private string _text = "";
12-
137
public IStructure Structure { get; } = structure;
148

15-
public List<Token> GetTokens(string text)
9+
public IEnumerable<Token> GetTokens(string text)
1610
{
17-
_text = text;
18-
_lines = computer.GetLines(_text);
19-
20-
return this.ToList();
21-
}
11+
var lines = computer.GetLines(text);
2212

23-
public IEnumerator<Token> GetEnumerator()
24-
{
25-
foreach (Match match in Structure.Regex.Matches(_text))
13+
foreach (Match match in Structure.Regex.Matches(text))
2614
{
2715
for (var i = 0; i < Structure.Count; i++)
2816
{
@@ -33,8 +21,8 @@ public IEnumerator<Token> GetEnumerator()
3321

3422
var value = group.Value;
3523
var segment = new Segment(
36-
computer.GetCoordinates(group.Index, _lines),
37-
computer.GetCoordinates(absoluteIndex: group.Index + group.Length, _lines));
24+
computer.GetCoordinates(group.Index, lines),
25+
computer.GetCoordinates(absoluteIndex: group.Index + group.Length, lines));
3826
var token = new Token(type, segment, value);
3927

4028
if (type.Error()) throw new LexerException(token);
@@ -45,9 +33,4 @@ public IEnumerator<Token> GetEnumerator()
4533

4634
yield return new EndToken();
4735
}
48-
49-
[ExcludeFromCodeCoverage]
50-
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
51-
52-
public override string ToString() => ZString.Join<Token>('\n', this);
5336
}

src/Domain/HydraScript.Domain.FrontEnd/Parser/Impl/TokensStream.cs

Lines changed: 0 additions & 28 deletions
This file was deleted.

src/Domain/HydraScript.Domain.FrontEnd/Parser/Impl/TopDownParser.cs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,14 @@
1414

1515
namespace HydraScript.Domain.FrontEnd.Parser.Impl;
1616

17-
public class TopDownParser : IParser
17+
public class TopDownParser(ILexer lexer) : IParser
1818
{
19-
private TokensStream _tokens = new List<Token>();
20-
private readonly ILexer _lexer;
21-
22-
public TopDownParser(ILexer lexer) =>
23-
_lexer = lexer;
19+
private IEnumerator<Token> _tokens = Enumerable.Empty<Token>().GetEnumerator();
2420

2521
public IAbstractSyntaxTree Parse(string text)
2622
{
27-
_tokens = _lexer.GetTokens(text);
23+
_tokens = lexer.GetTokens(text).GetEnumerator();
24+
_tokens.MoveNext();
2825

2926
var root = Script();
3027
Expect(Eop.Tag);
@@ -45,7 +42,7 @@ private Token Expect(string expectedTag, string? expectedValue = null)
4542
}
4643

4744
private bool CurrentIs(string tag) =>
48-
_tokens.Current.Type == _lexer.Structure.FindByTag(tag);
45+
_tokens.Current.Type == lexer.Structure.FindByTag(tag);
4946

5047
private bool CurrentIsLiteral() =>
5148
CurrentIs("NullLiteral") ||

src/Infrastructure/HydraScript.Infrastructure/Dumping/DumpingLexer.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System.Diagnostics.CodeAnalysis;
2+
using Cysharp.Text;
23
using HydraScript.Domain.FrontEnd.Lexer;
34
using Microsoft.Extensions.DependencyInjection;
45

@@ -12,10 +13,10 @@ internal class DumpingLexer(
1213
[ExcludeFromCodeCoverage]
1314
public IStructure Structure => lexer.Structure;
1415

15-
public List<Token> GetTokens(string text)
16+
public IEnumerable<Token> GetTokens(string text)
1617
{
17-
var tokens = lexer.GetTokens(text);
18-
dumpingService.Dump(lexer.ToString(), "tokens");
18+
var tokens = lexer.GetTokens(text).ToList();
19+
dumpingService.Dump(ZString.Join('\n', tokens), "tokens");
1920
return tokens;
2021
}
2122
}

tests/HydraScript.UnitTests/Domain/FrontEnd/RegexLexerTests.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ public void LexerDoesNotThrowTest(string text) =>
2020
[Theory]
2121
[ClassData(typeof(LexerFailData))]
2222
public void LexerThrowsErrorTest(string text) =>
23-
Assert.Throws<LexerException>(() => _regexLexer.GetTokens(text));
23+
Assert.Throws<LexerException>(() => _regexLexer.GetTokens(text).ToList());
2424

2525
[Fact]
2626
public void LexerToStringCorrectTest()
2727
{
2828
const string text = "8";
29-
var tokens = _regexLexer.GetTokens(text);
30-
Assert.Contains("EOP", _regexLexer.ToString());
31-
Assert.Equal("IntegerLiteral (1, 1)-(1, 2): 8", tokens.First().ToString());
29+
var tokens = _regexLexer.GetTokens(text).ToList();
30+
Assert.Contains("EOP", tokens[^1].ToString());
31+
Assert.Equal("IntegerLiteral (1, 1)-(1, 2): 8", tokens[0].ToString());
3232
}
3333

3434
[Fact]
@@ -80,7 +80,7 @@ public void GetTokens_MockedRegex_ValidOutput(
8080
structure.Count.Returns(tokenTypes.Count);
8181
structure[Arg.Any<int>()].Returns(callInfo => tokenTypes[callInfo.Arg<int>()]);
8282

83-
var tokens = lexer.GetTokens(input.ToString());
83+
var tokens = lexer.GetTokens(input.ToString()).ToList();
8484
for (var i = 0; i < input.Count; i++)
8585
{
8686
output.WriteLine(tokens[i].ToString());

0 commit comments

Comments
 (0)