From a1784dcf16e63b7f5f369cf9c17cbebad6d33f11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20K=C3=BChner?= Date: Mon, 1 Nov 2021 19:55:30 +0100 Subject: [PATCH 1/4] bigint & numeric seperator support --- src/Esprima/Ast/BigIntLiteral.cs | 16 +++++ src/Esprima/Ast/Literal.cs | 4 +- src/Esprima/Character.cs | 2 +- src/Esprima/JavascriptParser.cs | 23 +++++++ src/Esprima/Messages.cs | 4 +- src/Esprima/Scanner.cs | 105 +++++++++++++++---------------- src/Esprima/Token.cs | 8 ++- 7 files changed, 101 insertions(+), 61 deletions(-) create mode 100644 src/Esprima/Ast/BigIntLiteral.cs diff --git a/src/Esprima/Ast/BigIntLiteral.cs b/src/Esprima/Ast/BigIntLiteral.cs new file mode 100644 index 00000000..2007bcee --- /dev/null +++ b/src/Esprima/Ast/BigIntLiteral.cs @@ -0,0 +1,16 @@ +using System.Numerics; + +namespace Esprima.Ast +{ + public sealed class BigIntLiteral : Literal + { + public readonly string BigInt; + + public BigInteger? BigIntValue => (BigInteger?) Value; + + public BigIntLiteral(BigInteger value, string raw) : base(TokenType.BigIntLiteral, value, raw) + { + BigInt = raw; + } + } +} diff --git a/src/Esprima/Ast/Literal.cs b/src/Esprima/Ast/Literal.cs index 94d8cfb5..c1944027 100644 --- a/src/Esprima/Ast/Literal.cs +++ b/src/Esprima/Ast/Literal.cs @@ -1,14 +1,16 @@ +using System.Numerics; using System.Text.RegularExpressions; using Esprima.Utils; namespace Esprima.Ast { - public sealed class Literal : Expression + public class Literal : Expression { public string? StringValue => TokenType == TokenType.StringLiteral ? Value as string : null; public readonly double NumericValue; public bool BooleanValue => TokenType == TokenType.BooleanLiteral && NumericValue != 0; public Regex? RegexValue => TokenType == TokenType.RegularExpression ? (Regex?) Value : null; + public BigInteger? BigIntValue => TokenType == TokenType.BigIntLiteral ? (BigInteger?) Value : null; public readonly RegexValue? Regex; public readonly object? Value; diff --git a/src/Esprima/Character.cs b/src/Esprima/Character.cs index 562782e2..b8cc79c7 100644 --- a/src/Esprima/Character.cs +++ b/src/Esprima/Character.cs @@ -100,7 +100,7 @@ public static bool IsHexDigit(char cp) return cp >= '0' && cp <= '9' || cp >= 'A' && cp <= 'F' || cp >= 'a' && cp <= 'f'; - } + } public static bool IsOctalDigit(char cp) { diff --git a/src/Esprima/JavascriptParser.cs b/src/Esprima/JavascriptParser.cs index fd2b8869..bb3cda2e 100644 --- a/src/Esprima/JavascriptParser.cs +++ b/src/Esprima/JavascriptParser.cs @@ -589,6 +589,19 @@ private Expression ParsePrimaryExpression() token = NextToken(); raw = GetTokenRaw(token); expr = Finalize(node, new Literal(token.NumericValue, raw)); + break; + + case TokenType.BigIntLiteral: + if (_context.Strict && _lookahead.Octal) + { + TolerateUnexpectedToken(_lookahead, Messages.StrictOctalLiteral); + } + + _context.IsAssignmentTarget = false; + _context.IsBindingElement = false; + token = NextToken(); + raw = GetTokenRaw(token); + expr = Finalize(node, new BigIntLiteral(token.BigIntValue.Value, raw)); break; case TokenType.BooleanLiteral: @@ -859,6 +872,16 @@ private Expression ParseObjectPropertyKey() raw = GetTokenRaw(token); key = Finalize(node, new Literal(token.NumericValue, raw)); + break; + + case TokenType.BigIntLiteral: + if (_context.Strict && token.Octal) + { + TolerateUnexpectedToken(token, Messages.StrictOctalLiteral); + } + + raw = GetTokenRaw(token); + key = Finalize(node, new BigIntLiteral(token.BigIntValue.Value, raw)); break; case TokenType.Identifier: diff --git a/src/Esprima/Messages.cs b/src/Esprima/Messages.cs index 71d06005..dcd8ee9e 100644 --- a/src/Esprima/Messages.cs +++ b/src/Esprima/Messages.cs @@ -40,7 +40,9 @@ public static class Messages public const string MultipleDefaultsInSwitch = "More than one default clause in switch statement"; public const string NewlineAfterThrow = "Illegal newline after throw"; public const string NoAsAfterImportNamespace = "Unexpected token"; - public const string NoCatchOrFinally = "Missing catch or finally after try"; + public const string NoCatchOrFinally = "Missing catch or finally after try"; + public const string NumericSeperatorOneUnderscore = "Numeric separator must be exactly one underscore"; + public const string NumericSeperatorNotAllowedHere = "Numeric separator is not allowed here"; public const string ParameterAfterRestParameter = "Rest parameter must be last formal parameter"; public const string PropertyAfterRestProperty = "Unexpected token"; public const string Redeclaration = "{0} \"{1}\" has already been declared"; diff --git a/src/Esprima/Scanner.cs b/src/Esprima/Scanner.cs index 0bf16811..2b672de5 100644 --- a/src/Esprima/Scanner.cs +++ b/src/Esprima/Scanner.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Globalization; +using System.Numerics; using System.Runtime.CompilerServices; using System.Text; using System.Text.RegularExpressions; @@ -888,19 +889,7 @@ static string SafeSubstring(string s, int startIndex, int length) public Token ScanHexLiteral(int start) { - var index = Index; - - while (!Eof()) - { - if (!Character.IsHexDigit(Source.CharCodeAt(Index))) - { - break; - } - - Index++; - } - - var number = Source.Substring(index, Index - index); + var number = this.ScanLiteralPart(Character.IsHexDigit); if (number.Length == 0) { @@ -957,23 +946,10 @@ public Token ScanHexLiteral(int start) } public Token ScanBinaryLiteral(int start) - { + { char ch; - var index = Index; - - while (!Eof()) - { - ch = Source[Index]; - if (ch != '0' && ch != '1') - { - break; - } - - Index++; - } - - var number = Source.Substring(index, Index - index); - + var number = this.ScanLiteralPart(c => c == '0' || c == '1'); + if (number.Length == 0) { // only 0b or 0B @@ -1015,18 +991,9 @@ public Token ScanOctalLiteral(char prefix, int start) else { ++Index; - } - - while (!Eof()) - { - if (!Character.IsOctalDigit(Source.CharCodeAt(Index))) - { - break; - } - - sb.Append(Source[Index++]); - } - + } + + sb.Append(this.ScanLiteralPart(Character.IsOctalDigit)); var number = sb.ToString(); if (!octal && number.Length == 0) @@ -1084,6 +1051,27 @@ public bool IsImplicitOctalLiteral() return true; } + private string ScanLiteralPart(Func check) + { + string num = ""; + + if (Source.CharCodeAt(Index) == '_') + ThrowUnexpectedToken(Messages.NumericSeperatorNotAllowedHere); + + while (!Eof() && (check(Source.CharCodeAt(Index)) || Source.CharCodeAt(Index) == '_')) { + if (Source.CharCodeAt(Index) != '_') + num += Source.CharCodeAt(Index); + Index++; + if (Source.CharCodeAt(Index-1) == '_' && Source.CharCodeAt(Index) == '_') + ThrowUnexpectedToken(Messages.NumericSeperatorOneUnderscore); + } + + if (Source.CharCodeAt(Index-1) == '_') + ThrowUnexpectedToken(Messages.NumericSeperatorNotAllowedHere); + + return num; + } + public Token ScanNumericLiteral() { var sb = GetStringBuilder(); @@ -1095,7 +1083,6 @@ public Token ScanNumericLiteral() if (ch != '.') { var first = Source[Index++]; - sb.Append(first); ch = Source.CharCodeAt(Index); // Hex number starts with '0x'. @@ -1130,21 +1117,15 @@ public Token ScanNumericLiteral() } } - while (Character.IsDecimalDigit(Source.CharCodeAt(Index))) - { - sb.Append(Source[Index++]); - } - + --Index; + sb.Append(this.ScanLiteralPart(Character.IsDecimalDigit)); ch = Source.CharCodeAt(Index); } if (ch == '.') { - sb.Append(Source[Index++]); - while (Character.IsDecimalDigit(Source.CharCodeAt(Index))) - { - sb.Append(Source[Index++]); - } + sb.Append(Source[Index++]); + sb.Append(this.ScanLiteralPart(Character.IsDecimalDigit)); ch = Source.CharCodeAt(Index); } @@ -1161,15 +1142,27 @@ public Token ScanNumericLiteral() if (Character.IsDecimalDigit(Source.CharCodeAt(Index))) { - while (Character.IsDecimalDigit(Source.CharCodeAt(Index))) - { - sb.Append(Source[Index++]); - } + sb.Append(this.ScanLiteralPart(Character.IsDecimalDigit)); } else { ThrowUnexpectedToken(); } + } + else if (ch == 'n') + { + Index++; + var bigInt = BigInteger.Parse(sb.ToString()); + return new Token + { + Type = TokenType.BigIntLiteral, + Value = bigInt, + BigIntValue = bigInt, + LineNumber = LineNumber, + LineStart = LineStart, + Start = start, + End = Index + }; } if (Character.IsIdentifierStart(Source.CharCodeAt(Index))) diff --git a/src/Esprima/Token.cs b/src/Esprima/Token.cs index 4ddfd6d3..49048321 100644 --- a/src/Esprima/Token.cs +++ b/src/Esprima/Token.cs @@ -1,4 +1,5 @@ -using Esprima.Ast; +using System.Numerics; +using Esprima.Ast; namespace Esprima { @@ -13,7 +14,8 @@ public enum TokenType Punctuator, StringLiteral, RegularExpression, - Template + Template, + BigIntLiteral }; public class Token @@ -41,6 +43,7 @@ public class Token public double NumericValue; public object? Value; public RegexValue? RegexValue; + public BigInteger? BigIntValue; public void Clear() { @@ -59,6 +62,7 @@ public void Clear() NumericValue = 0; Value = null; RegexValue = null; + BigIntValue = null; } } } From 0a5726713f58b56dbb8f59d0f808177779f30103 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20K=C3=BChner?= Date: Tue, 2 Nov 2021 17:34:24 +0100 Subject: [PATCH 2/4] perfomance optimations and a test --- src/Esprima/Scanner.cs | 45 ++++++++++++++++------------ test/Esprima.Tests/Fixtures.cs | 34 +++++++++++++++++++++ test/Esprima.Tests/SeparatorTests.cs | 27 +++++++++++++++++ 3 files changed, 87 insertions(+), 19 deletions(-) create mode 100644 test/Esprima.Tests/SeparatorTests.cs diff --git a/src/Esprima/Scanner.cs b/src/Esprima/Scanner.cs index 2b672de5..f0a02989 100644 --- a/src/Esprima/Scanner.cs +++ b/src/Esprima/Scanner.cs @@ -888,8 +888,10 @@ static string SafeSubstring(string s, int startIndex, int length) // https://tc39.github.io/ecma262/#sec-literals-numeric-literals public Token ScanHexLiteral(int start) - { - var number = this.ScanLiteralPart(Character.IsHexDigit); + { + var sb = GetStringBuilder(); + this.ScanLiteralPart(sb, Character.IsHexDigit); + var number = sb.ToString(); if (number.Length == 0) { @@ -948,8 +950,10 @@ public Token ScanHexLiteral(int start) public Token ScanBinaryLiteral(int start) { char ch; - var number = this.ScanLiteralPart(c => c == '0' || c == '1'); - + var sb = GetStringBuilder(); + this.ScanLiteralPart(sb, c => c == '0' || c == '1'); + var number = sb.ToString(); + if (number.Length == 0) { // only 0b or 0B @@ -993,7 +997,7 @@ public Token ScanOctalLiteral(char prefix, int start) ++Index; } - sb.Append(this.ScanLiteralPart(Character.IsOctalDigit)); + this.ScanLiteralPart(sb, Character.IsOctalDigit); var number = sb.ToString(); if (!octal && number.Length == 0) @@ -1051,25 +1055,28 @@ public bool IsImplicitOctalLiteral() return true; } - private string ScanLiteralPart(Func check) + private void ScanLiteralPart(StringBuilder sb, Func check) { - string num = ""; - - if (Source.CharCodeAt(Index) == '_') + var charCode = Source.CharCodeAt(Index); + if (charCode == '_') ThrowUnexpectedToken(Messages.NumericSeperatorNotAllowedHere); - while (!Eof() && (check(Source.CharCodeAt(Index)) || Source.CharCodeAt(Index) == '_')) { - if (Source.CharCodeAt(Index) != '_') - num += Source.CharCodeAt(Index); + while ((check(charCode) || charCode == '_')) + { + if (charCode != '_') + sb.Append(charCode); Index++; - if (Source.CharCodeAt(Index-1) == '_' && Source.CharCodeAt(Index) == '_') + var newCharCode = Source.CharCodeAt(Index); + if (charCode == '_' && newCharCode == '_') ThrowUnexpectedToken(Messages.NumericSeperatorOneUnderscore); + + if (Eof()) + break; + charCode = newCharCode; } - if (Source.CharCodeAt(Index-1) == '_') + if (charCode == '_') ThrowUnexpectedToken(Messages.NumericSeperatorNotAllowedHere); - - return num; } public Token ScanNumericLiteral() @@ -1118,14 +1125,14 @@ public Token ScanNumericLiteral() } --Index; - sb.Append(this.ScanLiteralPart(Character.IsDecimalDigit)); + this.ScanLiteralPart(sb, Character.IsDecimalDigit); ch = Source.CharCodeAt(Index); } if (ch == '.') { sb.Append(Source[Index++]); - sb.Append(this.ScanLiteralPart(Character.IsDecimalDigit)); + this.ScanLiteralPart(sb, Character.IsDecimalDigit); ch = Source.CharCodeAt(Index); } @@ -1142,7 +1149,7 @@ public Token ScanNumericLiteral() if (Character.IsDecimalDigit(Source.CharCodeAt(Index))) { - sb.Append(this.ScanLiteralPart(Character.IsDecimalDigit)); + this.ScanLiteralPart(sb, Character.IsDecimalDigit); } else { diff --git a/test/Esprima.Tests/Fixtures.cs b/test/Esprima.Tests/Fixtures.cs index ca8a329f..c7b5aaf8 100644 --- a/test/Esprima.Tests/Fixtures.cs +++ b/test/Esprima.Tests/Fixtures.cs @@ -12,6 +12,8 @@ namespace Esprima.Test { public class Fixtures { + const bool WriteBackExpectedTree = false; + [Fact] public void HoistingScopeShouldWork() { @@ -32,6 +34,19 @@ private static string ParseAndFormat(SourceType sourceType, string source, Parse .WithIncludingRange(true), indent ); + } + + private static bool CompareTreesInternal(string actual, string expected) + { + var actualJObject = JObject.Parse(actual); + var expectedJObject = JObject.Parse(expected); + + // Don't compare the tokens array as it's not in the generated AST + expectedJObject.Remove("tokens"); + expectedJObject.Remove("comments"); + expectedJObject.Remove("errors"); + + return JToken.DeepEquals(actualJObject, expectedJObject); } private static void CompareTrees(string actual, string expected, string path) @@ -57,6 +72,7 @@ private static void CompareTrees(string actual, string expected, string path) [MemberData(nameof(SourceFiles), "Fixtures")] public void ExecuteTestCase(string fixture) { + var options = new ParserOptions { Tokens = true }; string treeFilePath, failureFilePath, moduleFilePath; @@ -106,15 +122,33 @@ public void ExecuteTestCase(string fixture) { sourceType = SourceType.Module; expected = File.ReadAllText(moduleFilePath); + if (WriteBackExpectedTree) + { + var actual = ParseAndFormat(sourceType, script, options); + if (!CompareTreesInternal(actual, expected)) + File.WriteAllText(moduleFilePath, actual); + } } else if (File.Exists(treeFilePath)) { expected = File.ReadAllText(treeFilePath); + if (WriteBackExpectedTree) + { + var actual = ParseAndFormat(sourceType, script, options); + if (!CompareTreesInternal(actual, expected)) + File.WriteAllText(treeFilePath, actual); + } } else if (File.Exists(failureFilePath)) { invalid = true; expected = File.ReadAllText(failureFilePath); + if (WriteBackExpectedTree) + { + var actual = ParseAndFormat(sourceType, script, options); + if (!CompareTreesInternal(actual, expected)) + File.WriteAllText(failureFilePath, actual); + } } else { diff --git a/test/Esprima.Tests/SeparatorTests.cs b/test/Esprima.Tests/SeparatorTests.cs new file mode 100644 index 00000000..fa8084a6 --- /dev/null +++ b/test/Esprima.Tests/SeparatorTests.cs @@ -0,0 +1,27 @@ +using Esprima.Ast; +using Xunit; + +namespace Esprima.Tests +{ + public class SeparatorTests + { + [Fact] + public void CanParseSeperators() + { + var script = new JavaScriptParser("var foo=12_3_456").ParseScript(); + Assert.Equal(123456, (double) ((Literal) ((VariableDeclaration) script.Body[0]).Declarations[0].Init).Value); + } + + [Fact] + public void Fails1() + { + Assert.Throws(() => new JavaScriptParser("var foo=12__3_456").ParseScript()); + } + + [Fact] + public void Fails2() + { + Assert.Throws(() => new JavaScriptParser("var foo=12_3_456_").ParseScript()); + } + } +} From 18307e2872f351016a43314d03badfb2ac3b10a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20K=C3=BChner?= Date: Tue, 2 Nov 2021 17:40:20 +0100 Subject: [PATCH 3/4] fix brakets --- src/Esprima/Scanner.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Esprima/Scanner.cs b/src/Esprima/Scanner.cs index f0a02989..5fca05e0 100644 --- a/src/Esprima/Scanner.cs +++ b/src/Esprima/Scanner.cs @@ -1059,24 +1059,34 @@ private void ScanLiteralPart(StringBuilder sb, Func check) { var charCode = Source.CharCodeAt(Index); if (charCode == '_') + { ThrowUnexpectedToken(Messages.NumericSeperatorNotAllowedHere); + } while ((check(charCode) || charCode == '_')) { if (charCode != '_') + { sb.Append(charCode); + } Index++; var newCharCode = Source.CharCodeAt(Index); if (charCode == '_' && newCharCode == '_') + { ThrowUnexpectedToken(Messages.NumericSeperatorOneUnderscore); + } if (Eof()) + { break; + } charCode = newCharCode; } if (charCode == '_') + { ThrowUnexpectedToken(Messages.NumericSeperatorNotAllowedHere); + } } public Token ScanNumericLiteral() From 3cf308086c454e3d988aa1ecc0c5ac0a699a6780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Ros?= Date: Tue, 2 Nov 2021 16:52:32 -0700 Subject: [PATCH 4/4] Update Literal.cs --- src/Esprima/Ast/Literal.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Esprima/Ast/Literal.cs b/src/Esprima/Ast/Literal.cs index d6b5485e..ebad89c7 100644 --- a/src/Esprima/Ast/Literal.cs +++ b/src/Esprima/Ast/Literal.cs @@ -1,4 +1,5 @@ -using System.Text.RegularExpressions; +using System.Numerics; +using System.Text.RegularExpressions; using Esprima.Utils; namespace Esprima.Ast