Skip to content

Commit 6323232

Browse files
committed
разделение литералов/слов на типы и рефакторинг лексера
1 parent f3bdb66 commit 6323232

File tree

2 files changed

+112
-108
lines changed

2 files changed

+112
-108
lines changed

src/OneScript.Language/LanguageDef.cs

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ This Source Code Form is subject to the terms of the
55
at http://mozilla.org/MPL/2.0/.
66
----------------------------------------------------------*/
77

8+
using OneScript.Language.LexicalAnalysis;
89
using System;
910
using System.Collections.Generic;
1011
using System.Runtime.CompilerServices;
11-
using OneScript.Language.LexicalAnalysis;
12+
using static OneScript.Language.LanguageDef;
1213

1314
namespace OneScript.Language
1415
{
@@ -21,14 +22,20 @@ public static class LanguageDef
2122

2223
private static readonly IdentifiersTrie<Token> _stringToToken = new IdentifiersTrie<Token>();
2324

24-
private static readonly IdentifiersTrie<bool> _undefined = new IdentifiersTrie<bool>();
25-
private static readonly IdentifiersTrie<bool> _booleans = new IdentifiersTrie<bool>();
26-
private static readonly IdentifiersTrie<bool> _logicalOp = new IdentifiersTrie<bool>();
27-
28-
private static readonly IdentifiersTrie<bool> _preprocImport = new IdentifiersTrie<bool>();
29-
3025
const int BUILTINS_INDEX = (int)Token.ByValParam;
3126

27+
public enum WordType
28+
{
29+
Undefined,
30+
Boolean,
31+
Logical,
32+
Null,
33+
Preproc,
34+
None
35+
};
36+
37+
private static readonly IdentifiersTrie<WordType> _specwords = new IdentifiersTrie<WordType>();
38+
3239
static LanguageDef()
3340
{
3441
_priority.Add(Token.Plus, 5);
@@ -52,21 +59,26 @@ static LanguageDef()
5259

5360
#region constants
5461

55-
_undefined.Add("Undefined", true);
56-
_undefined.Add("Неопределено", true);
62+
_specwords.Add("Undefined", WordType.Undefined);
63+
_specwords.Add("Неопределено", WordType.Undefined);
5764

58-
_booleans.Add("True", true);
59-
_booleans.Add("False", true);
60-
_booleans.Add("Истина", true);
61-
_booleans.Add("Ложь", true);
65+
_specwords.Add("True", WordType.Boolean);
66+
_specwords.Add("False", WordType.Boolean);
67+
_specwords.Add("Истина", WordType.Boolean);
68+
_specwords.Add("Ложь", WordType.Boolean);
6269

63-
_logicalOp.Add("And", true);
64-
_logicalOp.Add("Or", true);
65-
_logicalOp.Add("Not", true);
70+
_specwords.Add("And", WordType.Logical);
71+
_specwords.Add("Or", WordType.Logical);
72+
_specwords.Add("Not", WordType.Logical);
6673

67-
_logicalOp.Add("И", true);
68-
_logicalOp.Add("ИЛИ", true);
69-
_logicalOp.Add("НЕ", true);
74+
_specwords.Add("И", WordType.Logical);
75+
_specwords.Add("ИЛИ", WordType.Logical);
76+
_specwords.Add("НЕ", WordType.Logical);
77+
78+
_specwords.Add("NULL", WordType.Null);
79+
80+
_specwords.Add("Использовать", WordType.Preproc);
81+
_specwords.Add("Use", WordType.Preproc);
7082

7183
#endregion
7284

@@ -216,8 +228,6 @@ static LanguageDef()
216228

217229
#endregion
218230

219-
_preprocImport.Add("Использовать", true);
220-
_preprocImport.Add("Use", true);
221231
}
222232

223233
private static void AddToken(Token token, string name)
@@ -433,18 +443,24 @@ public static bool IsEndOfBlockToken(Token token)
433443
default:
434444
return false;
435445
}
446+
}
447+
448+
449+
public static WordType GetWordType(string value)
450+
{
451+
return _specwords.TryGetValue(value, out var wordType)? wordType : WordType.None;
436452
}
437453

438454
[MethodImpl(MethodImplOptions.AggressiveInlining)]
439455
public static bool IsBooleanLiteralString(string value)
440456
{
441-
return _booleans.TryGetValue(value, out var nodeIsFilled) && nodeIsFilled;
457+
return _specwords.TryGetValue(value, out var wordType) && wordType == WordType.Boolean;
442458
}
443459

444460
[MethodImpl(MethodImplOptions.AggressiveInlining)]
445461
public static bool IsUndefinedString(string value)
446462
{
447-
return _undefined.TryGetValue(value, out var nodeIsFilled) && nodeIsFilled;
463+
return _specwords.TryGetValue(value, out var wordType) && wordType == WordType.Undefined;
448464
}
449465

450466
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -456,13 +472,13 @@ public static bool IsNullString(string value)
456472
[MethodImpl(MethodImplOptions.AggressiveInlining)]
457473
public static bool IsLogicalOperatorString(string content)
458474
{
459-
return _logicalOp.TryGetValue(content, out var nodeIsFilled) && nodeIsFilled;
475+
return _specwords.TryGetValue(content, out var wordType) && wordType == WordType.Logical;
460476
}
461477

462478
[MethodImpl(MethodImplOptions.AggressiveInlining)]
463479
public static bool IsImportDirective(string value)
464480
{
465-
return _preprocImport.TryGetValue(value, out var nodeIsFilled) && nodeIsFilled;
481+
return _specwords.TryGetValue(value, out var wordType) && wordType == WordType.Preproc;
466482
}
467483
}
468484
}

src/OneScript.Language/LexicalAnalysis/WordLexerState.cs

Lines changed: 71 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -11,91 +11,79 @@ public class WordLexerState : LexerState
1111
{
1212
public override Lexem ReadNextLexem(SourceCodeIterator iterator)
1313
{
14-
bool isEndOfText = false;
15-
char cs = '\0';
16-
int start = iterator.Position;
17-
int currentLine = iterator.CurrentLine;
18-
int currentColumn = iterator.CurrentColumn;
19-
while (true)
20-
{
21-
if (!isEndOfText)
22-
{
23-
cs = iterator.CurrentSymbol;
24-
}
25-
26-
if (SpecialChars.IsDelimiter(cs) || isEndOfText)
27-
{
28-
var content = iterator.GetContents();
14+
var location = new CodeRange(iterator.CurrentLine, iterator.CurrentColumn);
2915

30-
Lexem lex;
31-
32-
if (LanguageDef.IsLogicalOperatorString(content))
33-
{
34-
lex = new Lexem()
35-
{
36-
Type = LexemType.Operator,
37-
Token = LanguageDef.GetToken(content),
38-
Content = content,
39-
Location = new CodeRange(currentLine, currentColumn)
40-
};
41-
}
42-
else if (LanguageDef.IsBooleanLiteralString(content))
43-
{
44-
lex = new Lexem()
45-
{
46-
Type = LexemType.BooleanLiteral,
47-
Content = content,
48-
Location = new CodeRange(currentLine, currentColumn)
49-
};
50-
}
51-
else if (LanguageDef.IsUndefinedString(content))
52-
{
53-
lex = new Lexem()
54-
{
55-
Type = LexemType.UndefinedLiteral,
56-
Content = content,
57-
Location = new CodeRange(currentLine, currentColumn)
58-
};
59-
60-
}
61-
else if (LanguageDef.IsNullString(content))
62-
{
63-
lex = new Lexem()
64-
{
65-
Type = LexemType.NullLiteral,
66-
Content = content,
67-
Location = new CodeRange(currentLine, currentColumn)
68-
};
69-
70-
}
71-
else
72-
{
73-
lex = new Lexem()
74-
{
75-
Type = LexemType.Identifier,
76-
Content = content,
77-
Token = LanguageDef.GetToken(content),
78-
Location = new CodeRange(currentLine, currentColumn)
79-
};
80-
81-
if (LanguageDef.IsBuiltInFunction(lex.Token))
82-
{
83-
iterator.SkipSpaces();
84-
if (iterator.CurrentSymbol != '(')
85-
{
86-
lex.Token = Token.NotAToken;
87-
}
88-
}
89-
}
90-
91-
return lex;
92-
}
93-
94-
if (!iterator.MoveNext())
95-
{
96-
isEndOfText = true;
97-
}
16+
do
17+
{
18+
if (SpecialChars.IsDelimiter(iterator.CurrentSymbol))
19+
break;
9820
}
21+
while (iterator.MoveNext());
22+
23+
var content = iterator.GetContents();
24+
Lexem lex;
25+
26+
switch (LanguageDef.GetWordType(content))
27+
{
28+
case LanguageDef.WordType.Logical:
29+
lex = new Lexem()
30+
{
31+
Type = LexemType.Operator,
32+
Token = LanguageDef.GetToken(content),
33+
Content = content,
34+
Location = location
35+
};
36+
break;
37+
38+
case LanguageDef.WordType.Boolean:
39+
lex = new Lexem()
40+
{
41+
Type = LexemType.BooleanLiteral,
42+
Content = content,
43+
Location = location
44+
};
45+
break;
46+
47+
case LanguageDef.WordType.Undefined:
48+
lex = new Lexem()
49+
{
50+
Type = LexemType.UndefinedLiteral,
51+
Content = content,
52+
Location = location
53+
};
54+
break;
55+
56+
case LanguageDef.WordType.Null:
57+
lex = new Lexem()
58+
{
59+
Type = LexemType.NullLiteral,
60+
Content = content,
61+
Location = location
62+
};
63+
break;
64+
65+
default:
66+
var tok = LanguageDef.GetToken(content);
67+
if (LanguageDef.IsBuiltInFunction(tok))
68+
{
69+
iterator.SkipSpaces();
70+
if (iterator.CurrentSymbol != '(')
71+
{
72+
tok = Token.NotAToken;
73+
}
74+
}
75+
76+
lex = new Lexem()
77+
{
78+
Type = LexemType.Identifier,
79+
Content = content,
80+
Token = tok,
81+
Location = location
82+
};
83+
break;
84+
}
85+
86+
return lex;
9987
}
10088
}
10189
}

0 commit comments

Comments
 (0)