Skip to content

Commit e16b5fe

Browse files
committed
Backport underscores in integer literals
1 parent 0ad2080 commit e16b5fe

File tree

5 files changed

+247
-325
lines changed

5 files changed

+247
-325
lines changed

Src/IronPython/Compiler/Tokenizer.cs

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -735,11 +735,16 @@ private Token ReadNumber(int start) {
735735
}
736736
isPrefix0 = true;
737737

738-
while (NextChar('0')) { } // skip leading zeroes
738+
// skip leading zeroes
739+
while (true) {
740+
NextChar('_');
741+
if (!NextChar('0')) break;
742+
}
739743
}
740744

741745
bool isFirstChar = true;
742746
while (true) {
747+
NextChar('_');
743748
int ch = NextChar();
744749

745750
switch (ch) {
@@ -755,7 +760,7 @@ private Token ReadNumber(int start) {
755760
MarkTokenEnd();
756761

757762
// TODO: parse in place
758-
return new ConstantValueToken(ParseInteger(GetTokenString(), 10));
763+
return new ConstantValueToken(ParseInteger(GetTokenSpan(), 10));
759764

760765
case 'j':
761766
case 'J':
@@ -784,7 +789,7 @@ private Token ReadNumber(int start) {
784789
}
785790

786791
// TODO: parse in place
787-
return new ConstantValueToken(ParseInteger(GetTokenString(), 10));
792+
return new ConstantValueToken(ParseInteger(GetTokenSpan(), 10));
788793
}
789794
isFirstChar = false;
790795
}
@@ -795,8 +800,9 @@ private Token ReadBinaryNumber() {
795800
int iVal = 0;
796801
bool useBigInt = false;
797802
BigInteger bigInt = BigInteger.Zero;
798-
bool first = true;
803+
bool isFirstChar = true;
799804
while (true) {
805+
NextChar('_');
800806
int ch = NextChar();
801807
switch (ch) {
802808
case '0':
@@ -812,7 +818,7 @@ private Token ReadBinaryNumber() {
812818
bigInt = (BigInteger)iVal;
813819
}
814820

815-
if (bits >= 32) {
821+
if (useBigInt) {
816822
bigInt = (bigInt << 1) | (ch - '0');
817823
} else {
818824
iVal = iVal << 1 | (ch - '0');
@@ -822,22 +828,21 @@ private Token ReadBinaryNumber() {
822828
BufferBack();
823829
MarkTokenEnd();
824830

825-
if (first) {
826-
ReportSyntaxError(
827-
new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1),
828-
BufferTokenEnd),
829-
Resources.InvalidToken, ErrorCodes.SyntaxError);
831+
if (isFirstChar) {
832+
var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1);
833+
ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.InvalidToken, ErrorCodes.SyntaxError);
830834
}
831835

832836
return new ConstantValueToken(useBigInt ? bigInt : (object)iVal);
833837
}
834-
first = false;
838+
isFirstChar = false;
835839
}
836840
}
837841

838842
private Token ReadOctalNumber() {
839-
bool first = true;
843+
bool isFirstChar = true;
840844
while (true) {
845+
NextChar('_');
841846
int ch = NextChar();
842847

843848
switch (ch) {
@@ -855,23 +860,24 @@ private Token ReadOctalNumber() {
855860
BufferBack();
856861
MarkTokenEnd();
857862

858-
if (first) {
859-
ReportSyntaxError(
860-
new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1),
861-
BufferTokenEnd),
862-
Resources.InvalidToken, ErrorCodes.SyntaxError);
863+
if (isFirstChar) {
864+
var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1);
865+
ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.InvalidToken, ErrorCodes.SyntaxError);
863866
}
864867

865868
// TODO: parse in place
866-
return new ConstantValueToken(ParseInteger(GetTokenSubstring(2), 8));
869+
var span = GetTokenSpan().Slice(2);
870+
if (!span.IsEmpty && span[0] == '_') span = span.Slice(1);
871+
return new ConstantValueToken(ParseInteger(span, 8));
867872
}
868-
first = false;
873+
isFirstChar = false;
869874
}
870875
}
871876

872877
private Token ReadHexNumber() {
873-
bool first = true;
878+
bool isFirstChar = true;
874879
while (true) {
880+
NextChar('_');
875881
int ch = NextChar();
876882

877883
switch (ch) {
@@ -903,17 +909,17 @@ private Token ReadHexNumber() {
903909
BufferBack();
904910
MarkTokenEnd();
905911

906-
if (first) {
907-
ReportSyntaxError(
908-
new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1),
909-
BufferTokenEnd),
910-
Resources.InvalidToken, ErrorCodes.SyntaxError);
912+
if (isFirstChar) {
913+
var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1);
914+
ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.InvalidToken, ErrorCodes.SyntaxError);
911915
}
912916

913917
// TODO: parse in place
914-
return new ConstantValueToken(ParseInteger(GetTokenSubstring(2), 16));
918+
var span = GetTokenSpan().Slice(2);
919+
if (!span.IsEmpty && span[0] == '_') span = span.Slice(1);
920+
return new ConstantValueToken(ParseInteger(span, 16));
915921
}
916-
first = false;
922+
isFirstChar = false;
917923
}
918924
}
919925

@@ -1431,10 +1437,8 @@ private void SetIndent(int spaces, StringBuilder chars) {
14311437
current = DoDedent(spaces, current);
14321438

14331439
if (spaces != current) {
1434-
ReportSyntaxError(
1435-
new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1),
1436-
BufferTokenEnd),
1437-
Resources.IndentationMismatch, ErrorCodes.IndentationError);
1440+
var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1);
1441+
ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.IndentationMismatch, ErrorCodes.IndentationError);
14381442
}
14391443
}
14401444
}
@@ -1448,12 +1452,11 @@ private int DoDedent(int spaces, int current) {
14481452
return current;
14491453
}
14501454

1451-
private object ParseInteger(string s, int radix) {
1452-
try {
1453-
return LiteralParser.ParseInteger(s, radix);
1454-
} catch (ArgumentException e) {
1455-
ReportSyntaxError(BufferTokenSpan, e.Message, ErrorCodes.SyntaxError);
1455+
private object ParseInteger(ReadOnlySpan<char> s, int radix) {
1456+
if (LiteralParser.TryParseIntegerSign(s, radix, out object result)) {
1457+
return result;
14561458
}
1459+
ReportSyntaxError(BufferTokenSpan, "invalid token", ErrorCodes.SyntaxError);
14571460
return ScriptingRuntimeHelpers.Int32ToObject(0);
14581461
}
14591462

@@ -1673,6 +1676,9 @@ private string GetTokenSubstring(int offset, int length) {
16731676
return new String(_buffer, _start + offset, length);
16741677
}
16751678

1679+
private ReadOnlySpan<char> GetTokenSpan()
1680+
=> _buffer.AsSpan(_start, _tokenEnd - _start);
1681+
16761682
[Conditional("DEBUG")]
16771683
[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1822:MarkMembersAsStatic")]
16781684
private void CheckInvariants() {

0 commit comments

Comments
 (0)