diff --git a/src/OneScript.Language/IdentifiersTrie.cs b/src/OneScript.Language/IdentifiersTrie.cs index df4b68a19..5f014ba6f 100644 --- a/src/OneScript.Language/IdentifiersTrie.cs +++ b/src/OneScript.Language/IdentifiersTrie.cs @@ -17,26 +17,20 @@ public class IdentifiersTrie : IDictionary private class TrieNode { - public char charL; - public char charU; - public TrieNode sibl; - public TrieNode next; + internal char charL; + internal char charU; + internal TrieNode sibl; + internal TrieNode next; - private T _value; + internal T value; - public T Value - { - get => _value; - set - { - HasValue = true; - _value = value; - } - } - - public bool HasValue { get; private set; } - - public TrieNode Find(char ch) + internal bool hasValue; + + internal TrieNode() { } + internal TrieNode(char ch) + { charL = char.ToLower(ch); charU = char.ToUpper(ch); } + + internal TrieNode Find(char ch) { var node = sibl; while (node != null) @@ -47,47 +41,46 @@ public TrieNode Find(char ch) } return null; } - - } - + } + public void Add(string str, T val) { var node = _root; + TrieNode key = node; foreach (char ch in str) { - var key = node.Find(ch); - if (key == null) + if (node == null) { - key = new TrieNode + node = new TrieNode(ch); + key.next = node; + key = node; + node = null; + } + else + { + TrieNode last = node; + key = node; + while (key != null && key.charL != ch && key.charU != ch) + { + last = key; + key = key.sibl; + } + if (key == null) { - charL = char.ToLower(ch), - charU = char.ToUpper(ch), - Value = default(T), - sibl = node.sibl - }; - node.sibl = key; - key.next = new TrieNode(); + key = new TrieNode(ch); + last.sibl = key; + } + node = key.next; } - node = key.next; } - node.Value = val; + key.value = val; + key.hasValue = true; } - public bool ContainsKey(string key) - { - var node = _root; - foreach (char ch in key) - { - var keyNode = node.Find(ch); - if (keyNode == null) - { - return false; - } - node = keyNode.next; - } - - return node.next == null && node.HasValue; + public bool ContainsKey(string str) + { + return TryGetValue(str, out _); } public bool Remove(string key) @@ -96,22 +89,10 @@ public bool Remove(string key) } public T Get(string str) - { - var node = _root; - foreach (char ch in str) - { - TrieNode key = node.Find(ch); - if (key == null) - throw new KeyNotFoundException(); - - node = key.next; - } - - if (!node.HasValue) - throw new KeyNotFoundException(); - - return node.Value; - } + { + return TryGetValue(str, out var value) ? value + : throw new KeyNotFoundException(); + } public T this[string index] { @@ -124,27 +105,34 @@ public T this[string index] public bool TryGetValue(string str, out T value) { - var node = _root; + TrieNode key = _root; + var node = key.sibl; foreach (char ch in str) - { - var key = node.Find(ch); - if (key == null) - { - value = default; - return false; - } - + { + while (node != null && node.charL != ch && node.charU != ch) + { + node = node.sibl; + } + if (node == null) + { + value = default; + return false; + } + + key = node; node = key.next; } - if (!node.HasValue) - { - value = default; - return false; + if (key.hasValue) + { + value = key.value; + return true; + } + else + { + value = default; + return false; } - - value = node.Value; - return true; } public IEnumerator> GetEnumerator() diff --git a/src/OneScript.Language/LanguageDef.cs b/src/OneScript.Language/LanguageDef.cs index 1583db1b0..954101361 100644 --- a/src/OneScript.Language/LanguageDef.cs +++ b/src/OneScript.Language/LanguageDef.cs @@ -5,10 +5,10 @@ This Source Code Form is subject to the terms of the at http://mozilla.org/MPL/2.0/. ----------------------------------------------------------*/ +using OneScript.Language.LexicalAnalysis; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; -using OneScript.Language.LexicalAnalysis; namespace OneScript.Language { @@ -21,14 +21,20 @@ public static class LanguageDef private static readonly IdentifiersTrie _stringToToken = new IdentifiersTrie(); - private static readonly IdentifiersTrie _undefined = new IdentifiersTrie(); - private static readonly IdentifiersTrie _booleans = new IdentifiersTrie(); - private static readonly IdentifiersTrie _logicalOp = new IdentifiersTrie(); - - private static readonly IdentifiersTrie _preprocImport = new IdentifiersTrie(); - const int BUILTINS_INDEX = (int)Token.ByValParam; + public enum WordType + { + Undefined, + Boolean, + Logical, + Null, + Preproc, + None + }; + + private static readonly IdentifiersTrie _specwords = new IdentifiersTrie(); + static LanguageDef() { _priority.Add(Token.Plus, 5); @@ -52,21 +58,26 @@ static LanguageDef() #region constants - _undefined.Add("Undefined", true); - _undefined.Add("Неопределено", true); + _specwords.Add("Undefined", WordType.Undefined); + _specwords.Add("Неопределено", WordType.Undefined); - _booleans.Add("True", true); - _booleans.Add("False", true); - _booleans.Add("Истина", true); - _booleans.Add("Ложь", true); + _specwords.Add("True", WordType.Boolean); + _specwords.Add("False", WordType.Boolean); + _specwords.Add("Истина", WordType.Boolean); + _specwords.Add("Ложь", WordType.Boolean); - _logicalOp.Add("And", true); - _logicalOp.Add("Or", true); - _logicalOp.Add("Not", true); + _specwords.Add("And", WordType.Logical); + _specwords.Add("Or", WordType.Logical); + _specwords.Add("Not", WordType.Logical); - _logicalOp.Add("И", true); - _logicalOp.Add("ИЛИ", true); - _logicalOp.Add("НЕ", true); + _specwords.Add("И", WordType.Logical); + _specwords.Add("ИЛИ", WordType.Logical); + _specwords.Add("НЕ", WordType.Logical); + + _specwords.Add("NULL", WordType.Null); + + _specwords.Add("Использовать", WordType.Preproc); + _specwords.Add("Use", WordType.Preproc); #endregion @@ -216,8 +227,6 @@ static LanguageDef() #endregion - _preprocImport.Add("Использовать", true); - _preprocImport.Add("Use", true); } private static void AddToken(Token token, string name) @@ -247,6 +256,7 @@ public static string GetTokenName(Token token) return Enum.GetName(typeof(Token), token); } + public static string GetTokenAlias(Token token) { if (_keywords.TryGetValue(token,out var strings)) @@ -257,11 +267,9 @@ public static string GetTokenAlias(Token token) return Enum.GetName(typeof(Token), token); } - public static Token GetToken(string tokText) { - Token result; - if (_stringToToken.TryGetValue(tokText, out result)) + if (_stringToToken.TryGetValue(tokText, out Token result)) { return result; } @@ -284,22 +292,28 @@ public static bool IsBuiltInFunction(Token token) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsBinaryOperator(Token token) - { - return token == Token.Plus - || token == Token.Minus - || token == Token.Multiply - || token == Token.Division - || token == Token.Modulo - || token == Token.And - || token == Token.Or - || token == Token.LessThan - || token == Token.LessOrEqual - || token == Token.MoreThan - || token == Token.MoreOrEqual - || token == Token.Equal - || token == Token.NotEqual; - } - + { + switch (token) + { + case Token.Plus: + case Token.Minus: + case Token.Multiply: + case Token.Division: + case Token.Modulo: + case Token.Equal: + case Token.LessThan: + case Token.LessOrEqual: + case Token.MoreThan: + case Token.MoreOrEqual: + case Token.NotEqual: + case Token.And: + case Token.Or: + return true; + default: + return false; + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsLogicalBinaryOperator(Token token) { @@ -315,24 +329,42 @@ public static bool IsUnaryOperator(Token token) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsLiteral(in Lexem lex) { - return lex.Type == LexemType.StringLiteral - || lex.Type == LexemType.NumberLiteral - || lex.Type == LexemType.BooleanLiteral - || lex.Type == LexemType.DateLiteral - || lex.Type == LexemType.UndefinedLiteral - || lex.Type == LexemType.NullLiteral; + switch (lex.Type) + { + case LexemType.StringLiteral: + case LexemType.NumberLiteral: + case LexemType.BooleanLiteral: + case LexemType.DateLiteral: + case LexemType.UndefinedLiteral: + case LexemType.NullLiteral: + return true; + default: + return false; + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsValidPropertyName(in Lexem lex) { - return lex.Type == LexemType.Identifier - || lex.Type == LexemType.BooleanLiteral - || lex.Type == LexemType.NullLiteral - || lex.Type == LexemType.UndefinedLiteral - || lex.Token == Token.And - || lex.Token == Token.Or - || lex.Token == Token.Not; + switch (lex.Type) + { + case LexemType.Identifier: + case LexemType.BooleanLiteral: + case LexemType.NullLiteral: + case LexemType.UndefinedLiteral: + return true; + + default: + switch (lex.Token) + { + case Token.And: + case Token.Or: + case Token.Not: + return true; + default: + return false; + } + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -394,29 +426,40 @@ public static bool IsBeginOfStatement(Token token) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsEndOfBlockToken(Token token) + { + switch (token) + { + case Token.EndIf: + case Token.EndProcedure: + case Token.EndFunction: + case Token.Else: + case Token.EndLoop: + case Token.EndTry: + case Token.EndOfText: + case Token.ElseIf: + case Token.Exception: + return true; + default: + return false; + } + } + + + public static WordType GetWordType(string value) { - return token == Token.EndIf - || token == Token.EndProcedure - || token == Token.EndFunction - || token == Token.Else - || token == Token.EndLoop - || token == Token.EndTry - || token == Token.EndOfText - || token == Token.ElseIf - || token == Token.Exception - ; + return _specwords.TryGetValue(value, out var wordType)? wordType : WordType.None; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsBooleanLiteralString(string value) { - return _booleans.TryGetValue(value, out var nodeIsFilled) && nodeIsFilled; + return _specwords.TryGetValue(value, out var wordType) && wordType == WordType.Boolean; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsUndefinedString(string value) { - return _undefined.TryGetValue(value, out var nodeIsFilled) && nodeIsFilled; + return _specwords.TryGetValue(value, out var wordType) && wordType == WordType.Undefined; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -428,13 +471,13 @@ public static bool IsNullString(string value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsLogicalOperatorString(string content) { - return _logicalOp.TryGetValue(content, out var nodeIsFilled) && nodeIsFilled; + return _specwords.TryGetValue(content, out var wordType) && wordType == WordType.Logical; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsImportDirective(string value) { - return _preprocImport.TryGetValue(value, out var nodeIsFilled) && nodeIsFilled; + return _specwords.TryGetValue(value, out var wordType) && wordType == WordType.Preproc; } } } diff --git a/src/OneScript.Language/LexicalAnalysis/SourceCodeIterator.cs b/src/OneScript.Language/LexicalAnalysis/SourceCodeIterator.cs index d82d9e2c9..720ebf271 100644 --- a/src/OneScript.Language/LexicalAnalysis/SourceCodeIterator.cs +++ b/src/OneScript.Language/LexicalAnalysis/SourceCodeIterator.cs @@ -18,6 +18,7 @@ public class SourceCodeIterator : ISourceCodeIndexer private int _lineCounter; private int _index; private int _startPosition; + private int _codeLength; private List _lineBounds; private bool _onNewLine; @@ -44,6 +45,7 @@ internal SourceCodeIterator() private void InitOnString(string code) { _code = code; + _codeLength = code.Length; int cap = code.Length < 512 ? 32 : 512; _lineBounds = new List(cap); _index = OUT_OF_TEXT; @@ -68,18 +70,8 @@ private void InitOnString(string code) public int CurrentLine => _lineCounter; public int CurrentColumn - { - get - { - if (_startPosition == OUT_OF_TEXT) - { - return OUT_OF_TEXT; - } - - int start = GetLineBound(CurrentLine); - return _index - start + 1; - } - } + => _startPosition == OUT_OF_TEXT ? OUT_OF_TEXT : _index - _lineBounds[^1] + 1; + // CurrentLine's start is last in _lineBounds public char CurrentSymbol => _currentSymbol; @@ -87,14 +79,13 @@ public int CurrentColumn public bool MoveNext() { _index++; - if (_index < _code.Length) + if (_index < _codeLength) { _currentSymbol = _code[_index]; if (_currentSymbol == '\n') { _lineCounter++; - if (_index < _code.Length) - _lineBounds.Add(_index + 1); + _lineBounds.Add(_index + 1); } return true; @@ -109,7 +100,7 @@ public bool MoveNext() public char PeekNext() { char result = '\0'; - if (_index + 1 < _code.Length) + if (_index + 1 < _codeLength) { result = _code[_index + 1]; } @@ -155,14 +146,32 @@ public bool SkipSpaces() } } - if (_index >= _code.Length) + if (_index >= _codeLength) { return false; } return true; + } + + public char ReadNextChar() + { + while (Char.IsWhiteSpace(_currentSymbol)) + { + if (_currentSymbol == '\n') + { + _onNewLine = true; + } + if (!MoveNext()) + { + break; + } + } + + return _currentSymbol; } + public string ReadToLineEnd() { while (_currentSymbol != '\n' && MoveNext()) @@ -180,6 +189,9 @@ public string ReadToLineEnd() public string GetCodeLine(int lineNumber) { int start = GetLineBound(lineNumber); + if (start >= _code.Length) + return String.Empty; + int end = _code.IndexOf('\n', start); if (end >= 0) { @@ -200,7 +212,7 @@ public ReadOnlyMemory GetContentSpan() { int len; - if (_startPosition == _index && _startPosition < _code.Length) + if (_startPosition == _index && _startPosition < _codeLength) { len = 1; } diff --git a/src/OneScript.Language/LexicalAnalysis/StringLexerState.cs b/src/OneScript.Language/LexicalAnalysis/StringLexerState.cs index 3afe4c4df..b9163315d 100644 --- a/src/OneScript.Language/LexicalAnalysis/StringLexerState.cs +++ b/src/OneScript.Language/LexicalAnalysis/StringLexerState.cs @@ -1,97 +1,96 @@ -/*---------------------------------------------------------- -This Source Code Form is subject to the terms of the -Mozilla Public License, v.2.0. If a copy of the MPL -was not distributed with this file, You can obtain one -at http://mozilla.org/MPL/2.0/. -----------------------------------------------------------*/ - -using System.Text; - -namespace OneScript.Language.LexicalAnalysis -{ - public class StringLexerState : LexerState - { - private void SkipSpacesAndComments(SourceCodeIterator iterator) - { - while (true) - { /* Пропускаем все пробелы и комментарии */ - iterator.SkipSpaces(); - - if (iterator.CurrentSymbol == '/') - { - if (!iterator.MoveNext()) - throw CreateExceptionOnCurrentLine("Некорректный символ", iterator); - - if (iterator.CurrentSymbol != '/') - throw CreateExceptionOnCurrentLine("Некорректный символ", iterator); - - do - { - if (!iterator.MoveNext()) - break; - - } while (iterator.CurrentSymbol != '\n'); - - } - else - break; - } - } - - public override Lexem ReadNextLexem(SourceCodeIterator iterator) - { +/*---------------------------------------------------------- +This Source Code Form is subject to the terms of the +Mozilla Public License, v.2.0. If a copy of the MPL +was not distributed with this file, You can obtain one +at http://mozilla.org/MPL/2.0/. +----------------------------------------------------------*/ + +using System.Text; + +namespace OneScript.Language.LexicalAnalysis +{ + public class StringLexerState : LexerState + { + private void SkipSpacesAndComments(SourceCodeIterator iterator) + { + while (true) + { /* Пропускаем все пробелы и комментарии */ + if (iterator.ReadNextChar() == '/') + { + if (!iterator.MoveNext()) + throw CreateExceptionOnCurrentLine("Некорректный символ", iterator); + + if (iterator.CurrentSymbol != '/') + throw CreateExceptionOnCurrentLine("Некорректный символ", iterator); + + do + { + if (!iterator.MoveNext()) + break; + + } while (iterator.CurrentSymbol != '\n'); + + } + else + break; + } + } + + public override Lexem ReadNextLexem(SourceCodeIterator iterator) + { StringBuilder contentBuilder = new StringBuilder(); - - while (iterator.MoveNext()) - { - var cs = iterator.CurrentSymbol; - - if (cs == SpecialChars.StringQuote) - { - if (iterator.MoveNext()) - { - if (iterator.CurrentSymbol == SpecialChars.StringQuote) - { - /* Двойная кавычка */ - contentBuilder.Append("\""); - continue; - } - - /* Завершение строки */ - SkipSpacesAndComments(iterator); - - if (iterator.CurrentSymbol == SpecialChars.StringQuote) - { - /* Сразу же началась новая строка */ - contentBuilder.Append('\n'); - continue; - } - } - - var lex = new Lexem - { - Type = LexemType.StringLiteral, - Content = contentBuilder.ToString() - }; - return lex; - } - - if (cs == '\n') - { - iterator.MoveNext(); - SkipSpacesAndComments(iterator); - - if (iterator.CurrentSymbol != '|') - throw CreateExceptionOnCurrentLine("Некорректный строковый литерал!", iterator); - - contentBuilder.Append('\n'); - } - else if(cs != '\r') - contentBuilder.Append(cs); - - } - - throw CreateExceptionOnCurrentLine("Незавершённый строковой интервал!", iterator); - } - } -} + + while (iterator.MoveNext()) + { + var cs = iterator.CurrentSymbol; + + if (cs == SpecialChars.StringQuote) + { + if (iterator.MoveNext()) + { + if (iterator.CurrentSymbol == SpecialChars.StringQuote) + { + /* Двойная кавычка */ + contentBuilder.Append('"'); + + continue; + } + + /* Завершение строки */ + SkipSpacesAndComments(iterator); + + if (iterator.CurrentSymbol == SpecialChars.StringQuote) + { + /* Сразу же началась новая строка */ + contentBuilder.Append('\n'); + + continue; + } + } + + return new Lexem + { + Type = LexemType.StringLiteral, + Content = contentBuilder.ToString() + }; + } + + if (cs == '\n') + { + iterator.MoveNext(); + SkipSpacesAndComments(iterator); + + if (iterator.CurrentSymbol != '|') + throw CreateExceptionOnCurrentLine("Некорректный строковый литерал", iterator); + + contentBuilder.Append('\n'); + } + else if (cs != '\r') + contentBuilder.Append(cs); + + } + + throw CreateExceptionOnCurrentLine("Незавершённый строковый литерал", iterator); + } + } +} diff --git a/src/OneScript.Language/LexicalAnalysis/Token.cs b/src/OneScript.Language/LexicalAnalysis/Token.cs index aca7738f1..681fdf324 100644 --- a/src/OneScript.Language/LexicalAnalysis/Token.cs +++ b/src/OneScript.Language/LexicalAnalysis/Token.cs @@ -43,13 +43,15 @@ public enum Token RemoveHandler, Async, Await, - Goto, - - // operators + Goto, + + // operators + UnaryPlus, + UnaryMinus, + // binary begin + // recommend to be in continuous block Plus, Minus, - UnaryPlus, - UnaryMinus, Multiply, Division, Modulo, @@ -61,6 +63,7 @@ public enum Token NotEqual, And, Or, + // binary end Not, Dot, OpenPar, diff --git a/src/OneScript.Language/LexicalAnalysis/WordLexerState.cs b/src/OneScript.Language/LexicalAnalysis/WordLexerState.cs index c9249ca46..3fa944d63 100644 --- a/src/OneScript.Language/LexicalAnalysis/WordLexerState.cs +++ b/src/OneScript.Language/LexicalAnalysis/WordLexerState.cs @@ -11,91 +11,78 @@ public class WordLexerState : LexerState { public override Lexem ReadNextLexem(SourceCodeIterator iterator) { - bool isEndOfText = false; - char cs = '\0'; - int start = iterator.Position; - int currentLine = iterator.CurrentLine; - int currentColumn = iterator.CurrentColumn; - while (true) - { - if (!isEndOfText) - { - cs = iterator.CurrentSymbol; - } - - if (SpecialChars.IsDelimiter(cs) || isEndOfText) - { - var content = iterator.GetContents(); + var location = new CodeRange(iterator.CurrentLine, iterator.CurrentColumn); - Lexem lex; - - if (LanguageDef.IsLogicalOperatorString(content)) - { - lex = new Lexem() - { - Type = LexemType.Operator, - Token = LanguageDef.GetToken(content), - Content = content, - Location = new CodeRange(currentLine, currentColumn) - }; - } - else if (LanguageDef.IsBooleanLiteralString(content)) - { - lex = new Lexem() - { - Type = LexemType.BooleanLiteral, - Content = content, - Location = new CodeRange(currentLine, currentColumn) - }; - } - else if (LanguageDef.IsUndefinedString(content)) - { - lex = new Lexem() - { - Type = LexemType.UndefinedLiteral, - Content = content, - Location = new CodeRange(currentLine, currentColumn) - }; - - } - else if (LanguageDef.IsNullString(content)) - { - lex = new Lexem() - { - Type = LexemType.NullLiteral, - Content = content, - Location = new CodeRange(currentLine, currentColumn) - }; - - } - else - { - lex = new Lexem() - { - Type = LexemType.Identifier, - Content = content, - Token = LanguageDef.GetToken(content), - Location = new CodeRange(currentLine, currentColumn) - }; - - if (LanguageDef.IsBuiltInFunction(lex.Token)) - { - iterator.SkipSpaces(); - if (iterator.CurrentSymbol != '(') - { - lex.Token = Token.NotAToken; - } - } - } - - return lex; - } - - if (!iterator.MoveNext()) - { - isEndOfText = true; - } + do + { + if (SpecialChars.IsDelimiter(iterator.CurrentSymbol)) + break; } + while (iterator.MoveNext()); + + var content = iterator.GetContents(); + Lexem lex; + + switch (LanguageDef.GetWordType(content)) + { + case LanguageDef.WordType.Logical: + lex = new Lexem() + { + Type = LexemType.Operator, + Token = LanguageDef.GetToken(content), + Content = content, + Location = location + }; + break; + + case LanguageDef.WordType.Boolean: + lex = new Lexem() + { + Type = LexemType.BooleanLiteral, + Content = content, + Location = location + }; + break; + + case LanguageDef.WordType.Undefined: + lex = new Lexem() + { + Type = LexemType.UndefinedLiteral, + Content = content, + Location = location + }; + break; + + case LanguageDef.WordType.Null: + lex = new Lexem() + { + Type = LexemType.NullLiteral, + Content = content, + Location = location + }; + break; + + default: + var tok = LanguageDef.GetToken(content); + if (LanguageDef.IsBuiltInFunction(tok)) + { + if (iterator.ReadNextChar() != '(') + { + tok = Token.NotAToken; + } + } + + lex = new Lexem() + { + Type = LexemType.Identifier, + Content = content, + Token = tok, + Location = location + }; + break; + } + + return lex; } } } diff --git a/src/OneScript.Language/ScriptException.cs b/src/OneScript.Language/ScriptException.cs index 89048f73b..329285557 100644 --- a/src/OneScript.Language/ScriptException.cs +++ b/src/OneScript.Language/ScriptException.cs @@ -98,7 +98,11 @@ public override string Message { var sb = new StringBuilder(MessageWithoutCodeFragment); sb.AppendLine(); - var codeLine = Code?.Replace('\t', ' ').TrimEnd(); + var codeLine = Code?.Replace('\t', ' ')?.TrimEnd() ?? String.Empty; + if (ColumnNumber > codeLine.Length) + { + ColumnNumber = codeLine.Length; + } if (ColumnNumber != ErrorPositionInfo.OUT_OF_TEXT) {