Skip to content

Commit b78aba4

Browse files
committed
New template tokenizer WIP
1 parent 62df7a9 commit b78aba4

File tree

5 files changed

+257
-33
lines changed

5 files changed

+257
-33
lines changed

src/Serilog.Expressions/Expressions/Parsing/ExpressionToken.cs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,18 @@ enum ExpressionToken
127127
Else,
128128

129129
[Token(Category = "keyword", Example = "ci")]
130-
CI
130+
CI,
131+
132+
// Template syntax
133+
134+
[Token(Description = "text")]
135+
Text,
136+
137+
138+
[Token(Example = "{{")]
139+
LBraceEscape,
140+
141+
[Token(Example = "}}")]
142+
RBraceEscape,
131143
}
132144
}

src/Serilog.Expressions/Expressions/Parsing/ExpressionTokenizer.cs

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ namespace Serilog.Expressions.Parsing
77
{
88
class ExpressionTokenizer : Tokenizer<ExpressionToken>
99
{
10-
static readonly ExpressionToken[] SingleCharOps = new ExpressionToken[128];
10+
readonly ExpressionToken[] _singleCharOps = new ExpressionToken[128];
1111

12-
static readonly ExpressionKeyword[] Keywords =
12+
readonly ExpressionKeyword[] _keywords =
1313
{
1414
new ExpressionKeyword("and", ExpressionToken.And),
1515
new ExpressionKeyword("in", ExpressionToken.In),
@@ -26,28 +26,28 @@ class ExpressionTokenizer : Tokenizer<ExpressionToken>
2626
new ExpressionKeyword("ci", ExpressionToken.CI)
2727
};
2828

29-
static ExpressionTokenizer()
29+
public ExpressionTokenizer()
3030
{
31-
SingleCharOps['+'] = ExpressionToken.Plus;
32-
SingleCharOps['-'] = ExpressionToken.Minus;
33-
SingleCharOps['*'] = ExpressionToken.Asterisk;
34-
SingleCharOps['/'] = ExpressionToken.ForwardSlash;
35-
SingleCharOps['%'] = ExpressionToken.Percent;
36-
SingleCharOps['^'] = ExpressionToken.Caret;
37-
SingleCharOps['<'] = ExpressionToken.LessThan;
38-
SingleCharOps['>'] = ExpressionToken.GreaterThan;
39-
SingleCharOps['='] = ExpressionToken.Equal;
40-
SingleCharOps[','] = ExpressionToken.Comma;
41-
SingleCharOps['.'] = ExpressionToken.Period;
42-
SingleCharOps['('] = ExpressionToken.LParen;
43-
SingleCharOps[')'] = ExpressionToken.RParen;
44-
SingleCharOps['{'] = ExpressionToken.LBrace;
45-
SingleCharOps['}'] = ExpressionToken.RBrace;
46-
SingleCharOps[':'] = ExpressionToken.Colon;
47-
SingleCharOps['['] = ExpressionToken.LBracket;
48-
SingleCharOps[']'] = ExpressionToken.RBracket;
49-
SingleCharOps['*'] = ExpressionToken.Asterisk;
50-
SingleCharOps['?'] = ExpressionToken.QuestionMark;
31+
_singleCharOps['+'] = ExpressionToken.Plus;
32+
_singleCharOps['-'] = ExpressionToken.Minus;
33+
_singleCharOps['*'] = ExpressionToken.Asterisk;
34+
_singleCharOps['/'] = ExpressionToken.ForwardSlash;
35+
_singleCharOps['%'] = ExpressionToken.Percent;
36+
_singleCharOps['^'] = ExpressionToken.Caret;
37+
_singleCharOps['<'] = ExpressionToken.LessThan;
38+
_singleCharOps['>'] = ExpressionToken.GreaterThan;
39+
_singleCharOps['='] = ExpressionToken.Equal;
40+
_singleCharOps[','] = ExpressionToken.Comma;
41+
_singleCharOps['.'] = ExpressionToken.Period;
42+
_singleCharOps['('] = ExpressionToken.LParen;
43+
_singleCharOps[')'] = ExpressionToken.RParen;
44+
_singleCharOps['{'] = ExpressionToken.LBrace;
45+
_singleCharOps['}'] = ExpressionToken.RBrace;
46+
_singleCharOps[':'] = ExpressionToken.Colon;
47+
_singleCharOps['['] = ExpressionToken.LBracket;
48+
_singleCharOps[']'] = ExpressionToken.RBracket;
49+
_singleCharOps['*'] = ExpressionToken.Asterisk;
50+
_singleCharOps['?'] = ExpressionToken.QuestionMark;
5151
}
5252

5353
public TokenList<ExpressionToken> GreedyTokenize(TextSpan textSpan)
@@ -60,6 +60,11 @@ public TokenList<ExpressionToken> GreedyTokenize(TextSpan textSpan)
6060
.ToArray());
6161
}
6262

63+
public IEnumerable<Result<ExpressionToken>> LazyTokenize(TextSpan span)
64+
{
65+
return Tokenize(span);
66+
}
67+
6368
protected override IEnumerable<Result<ExpressionToken>> Tokenize(TextSpan stringSpan)
6469
{
6570
var next = SkipWhiteSpace(stringSpan);
@@ -147,9 +152,9 @@ protected override IEnumerable<Result<ExpressionToken>> Tokenize(TextSpan string
147152
yield return Result.Value(compoundOp.Value, compoundOp.Location, compoundOp.Remainder);
148153
next = compoundOp.Remainder.ConsumeChar();
149154
}
150-
else if (next.Value < SingleCharOps.Length && SingleCharOps[next.Value] != ExpressionToken.None)
155+
else if (next.Value < _singleCharOps.Length && _singleCharOps[next.Value] != ExpressionToken.None)
151156
{
152-
yield return Result.Value(SingleCharOps[next.Value], next.Location, next.Remainder);
157+
yield return Result.Value(_singleCharOps[next.Value], next.Location, next.Remainder);
153158
next = next.Remainder.ConsumeChar();
154159
}
155160
else
@@ -163,16 +168,16 @@ protected override IEnumerable<Result<ExpressionToken>> Tokenize(TextSpan string
163168
} while (next.HasValue);
164169
}
165170

166-
static bool IsDelimiter(Result<char> next)
171+
bool IsDelimiter(Result<char> next)
167172
{
168173
return !next.HasValue ||
169174
char.IsWhiteSpace(next.Value) ||
170-
next.Value < SingleCharOps.Length && SingleCharOps[next.Value] != ExpressionToken.None;
175+
next.Value < _singleCharOps.Length && _singleCharOps[next.Value] != ExpressionToken.None;
171176
}
172177

173-
static bool TryGetKeyword(TextSpan span, out ExpressionToken keyword)
178+
bool TryGetKeyword(TextSpan span, out ExpressionToken keyword)
174179
{
175-
foreach (var kw in Keywords)
180+
foreach (var kw in _keywords)
176181
{
177182
if (span.EqualsValueIgnoreCase(kw.Text))
178183
{

src/Serilog.Expressions/Templates/Parsing/TemplateParser.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ namespace Serilog.Templates.Parsing
1313
{
1414
static class TemplateParser
1515
{
16-
static ExpressionTokenizer Tokenizer { get; } = new ExpressionTokenizer();
17-
1816
public static bool TryParse(
1917
string template,
2018
[MaybeNullWhen(false)] out Template parsed,
2119
[MaybeNullWhen(true)] out string error)
2220
{
2321
if (template == null) throw new ArgumentNullException(nameof(template));
2422

23+
var tokenizer = new ExpressionTokenizer();
24+
2525
parsed = null;
2626
var elements = new List<Template>();
2727

@@ -47,7 +47,7 @@ public static bool TryParse(
4747
else
4848
{
4949
// No line/column tracking
50-
var tokens = Tokenizer.GreedyTokenize(new TextSpan(template, new Position(i, 0, 0), template.Length - i));
50+
var tokens = tokenizer.GreedyTokenize(new TextSpan(template, new Position(i, 0, 0), template.Length - i));
5151
var expr = ExpressionTokenParsers.TryPartialParse(tokens);
5252
if (!expr.HasValue)
5353
{
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Serilog.Expressions.Parsing;
4+
using Superpower;
5+
using Superpower.Model;
6+
7+
namespace Serilog.Templates.Parsing
8+
{
9+
class TemplateTokenizer : Tokenizer<ExpressionToken>
10+
{
11+
readonly ExpressionTokenizer _expressionTokenizer = new ExpressionTokenizer();
12+
13+
protected override IEnumerable<Result<ExpressionToken>> Tokenize(TextSpan span)
14+
{
15+
var start = span;
16+
var rem = start;
17+
do
18+
{
19+
var next = rem.ConsumeChar();
20+
if (!next.HasValue)
21+
{
22+
if (rem != start)
23+
yield return Result.Value(ExpressionToken.Text, start, rem);
24+
25+
yield break;
26+
}
27+
28+
if (next.Value == '{')
29+
{
30+
if (rem != start)
31+
yield return Result.Value(ExpressionToken.Text, start, rem);
32+
33+
var peek = next.Remainder.ConsumeChar();
34+
if (peek.HasValue && peek.Value == '{')
35+
{
36+
yield return Result.Value(ExpressionToken.LBraceEscape, next.Location, peek.Remainder);
37+
start = rem = peek.Remainder;
38+
}
39+
else
40+
{
41+
yield return Result.Value(ExpressionToken.LBrace, next.Location, next.Remainder);
42+
start = rem = next.Remainder;
43+
44+
foreach (var token in TokenizeHole(rem))
45+
{
46+
yield return token;
47+
start = rem = token.Remainder;
48+
}
49+
}
50+
}
51+
else if (next.Value == '}')
52+
{
53+
if (rem != start)
54+
yield return Result.Value(ExpressionToken.Text, start, rem);
55+
56+
var peek = next.Remainder.ConsumeChar();
57+
if (peek.HasValue && peek.Value == '}')
58+
{
59+
yield return Result.Value(ExpressionToken.RBraceEscape, next.Location, peek.Remainder);
60+
start = rem = peek.Remainder;
61+
}
62+
else
63+
{
64+
yield return Result.Empty<ExpressionToken>(next.Remainder, new[] {"escaped `}`"});
65+
yield break;
66+
}
67+
}
68+
else
69+
{
70+
rem = next.Remainder;
71+
}
72+
} while (true);
73+
}
74+
75+
IEnumerable<Result<ExpressionToken>> TokenizeHole(TextSpan span)
76+
{
77+
// Stack braces, brackets, and parens.
78+
// If we hit , or :, the stack is empty, and everything we've seen is balanced, we switch into
79+
// aligmment/width tokenization.
80+
// If we hit } and the stack is empty, and everything we've seen is balanced, we yield the final
81+
// '}' and return to literal text mode.
82+
83+
var toMatch = new Stack<ExpressionToken>();
84+
var unbalanced = false;
85+
86+
foreach (var token in _expressionTokenizer.LazyTokenize(span))
87+
{
88+
if (unbalanced)
89+
yield break;
90+
91+
yield return token;
92+
93+
if (!token.HasValue)
94+
break;
95+
96+
if (token.Value == ExpressionToken.LParen ||
97+
token.Value == ExpressionToken.LBrace ||
98+
token.Value == ExpressionToken.LBracket)
99+
{
100+
toMatch.Push(token.Value);
101+
}
102+
else if (toMatch.Count > 0)
103+
{
104+
if (token.Value == ExpressionToken.RParen)
105+
{
106+
if (toMatch.Peek() != ExpressionToken.LParen)
107+
unbalanced = true;
108+
else
109+
toMatch.Pop();
110+
}
111+
else if (token.Value == ExpressionToken.RBrace)
112+
{
113+
if (toMatch.Peek() != ExpressionToken.LBrace)
114+
unbalanced = true;
115+
else
116+
toMatch.Pop();
117+
}
118+
else if (token.Value == ExpressionToken.RBracket)
119+
{
120+
if (toMatch.Peek() != ExpressionToken.LBracket)
121+
unbalanced = true;
122+
else
123+
toMatch.Pop();
124+
}
125+
}
126+
else if (token.Value == ExpressionToken.RBrace)
127+
{
128+
break;
129+
}
130+
else if (token.Value == ExpressionToken.Comma ||
131+
token.Value == ExpressionToken.Colon)
132+
{
133+
throw new NotImplementedException("Tokenize alignment/format.");
134+
}
135+
}
136+
}
137+
}
138+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System.Collections.Generic;
2+
using System.Linq;
3+
using Serilog.Expressions.Parsing;
4+
using Serilog.Templates.Parsing;
5+
using Xunit;
6+
7+
using static Serilog.Expressions.Parsing.ExpressionToken;
8+
9+
namespace Serilog.Expressions.Tests
10+
{
11+
public class TemplateTokenizerTests
12+
{
13+
public static IEnumerable<object[]> ValidCases
14+
{
15+
get
16+
{
17+
return new[]
18+
{
19+
new object[]
20+
{
21+
"aa",
22+
new[] {Text}
23+
},
24+
new object[]
25+
{
26+
"{bb}",
27+
new[] {LBrace, Identifier, RBrace}
28+
},
29+
new object[]
30+
{
31+
"aa{bb}",
32+
new[] {Text, LBrace, Identifier, RBrace}
33+
},
34+
new object[]
35+
{
36+
"aa{{bb}}",
37+
new[] {Text, LBraceEscape, Text, RBraceEscape}
38+
},
39+
new object[]
40+
{
41+
"{ {b: b} }c",
42+
new[] {LBrace, LBrace, Identifier, Colon, Identifier, RBrace, RBrace, Text}
43+
},
44+
};
45+
}
46+
}
47+
48+
[Theory]
49+
[MemberData(nameof(ValidCases))]
50+
public void ValidTemplatesAreTokenized(string template, object expected)
51+
{
52+
var expectedTokens = (ExpressionToken[]) expected;
53+
var tokenizer = new TemplateTokenizer();
54+
var actual = tokenizer.Tokenize(template).Select(t => t.Kind);
55+
Assert.Equal(expectedTokens, actual);
56+
}
57+
58+
[Theory]
59+
[InlineData("aa{{bb}", "unexpected end of input, expected escaped `}`")]
60+
[InlineData("aa{ {b: 'b} }", "unexpected end of input, expected `'`")]
61+
public void InvalidTemplatesAreReported(string template, string fragment)
62+
{
63+
var tokenizer = new TemplateTokenizer();
64+
var err = tokenizer.TryTokenize(template);
65+
Assert.False(err.HasValue);
66+
Assert.Equal(fragment, err.FormatErrorMessageFragment());
67+
}
68+
}
69+
}

0 commit comments

Comments
 (0)