Skip to content

Commit f0a409b

Browse files
committed
Integer literal parsing refactored and fixed handling of underscores
1 parent 657c3c6 commit f0a409b

File tree

2 files changed

+65
-110
lines changed

2 files changed

+65
-110
lines changed

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_int.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
*graalpython.lib-python.3.test.test_int.IntTestCases.test_non_numeric_input_types
1515
*graalpython.lib-python.3.test.test_int.IntTestCases.test_small_ints
1616
*graalpython.lib-python.3.test.test_int.IntTestCases.test_string_float
17+
*graalpython.lib-python.3.test.test_int.IntTestCases.test_underscores

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinConstructors.java

Lines changed: 64 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,7 @@ public abstract static class IntNode extends PythonTernaryBuiltinNode {
11481148
private final BranchProfile bigIntegerProfile = BranchProfile.create();
11491149
private final BranchProfile primitiveIntProfile = BranchProfile.create();
11501150
private final BranchProfile fullIntProfile = BranchProfile.create();
1151+
private final BranchProfile notSimpleDecimalLiteralProfile = BranchProfile.create();
11511152

11521153
@Child private BytesNodes.ToBytesNode toByteArrayNode;
11531154
@Child private LookupAndCallUnaryNode callIntNode;
@@ -1157,9 +1158,8 @@ public abstract static class IntNode extends PythonTernaryBuiltinNode {
11571158

11581159
@TruffleBoundary
11591160
private static Object stringToIntInternal(String num, int base) {
1160-
String s = num.replace("_", "");
11611161
try {
1162-
BigInteger bi = asciiToBigInteger(s, base, false);
1162+
BigInteger bi = asciiToBigInteger(num, base);
11631163
if (bi.compareTo(BigInteger.valueOf(Integer.MAX_VALUE)) > 0 || bi.compareTo(BigInteger.valueOf(Integer.MIN_VALUE)) < 0) {
11641164
return bi;
11651165
} else {
@@ -1171,6 +1171,13 @@ private static Object stringToIntInternal(String num, int base) {
11711171
}
11721172

11731173
private Object stringToInt(VirtualFrame frame, Object cls, String number, int base, Object origObj) {
1174+
if (base == 0 || base == 10) {
1175+
Object value = parseSimpleDecimalLiteral(number);
1176+
if (value != null) {
1177+
return createInt(cls, value);
1178+
}
1179+
}
1180+
notSimpleDecimalLiteralProfile.enter();
11741181
Object value = stringToIntInternal(number, base);
11751182
if (value == null) {
11761183
invalidValueProfile.enter();
@@ -1221,8 +1228,8 @@ private void checkBase(int base) {
12211228
}
12221229
}
12231230

1224-
// Copied directly from Jython
1225-
private static BigInteger asciiToBigInteger(String str, int possibleBase, boolean isLong) throws NumberFormatException {
1231+
// Adapted from Jython
1232+
private static BigInteger asciiToBigInteger(String str, int possibleBase) throws NumberFormatException {
12261233
CompilerAsserts.neverPartOfCompilation();
12271234
int base = possibleBase;
12281235
int b = 0;
@@ -1236,44 +1243,49 @@ private static BigInteger asciiToBigInteger(String str, int possibleBase, boolea
12361243
e--;
12371244
}
12381245

1246+
boolean acceptUnderscore = false;
1247+
boolean raiseIfNotZero = false;
12391248
char sign = 0;
12401249
if (b < e) {
12411250
sign = str.charAt(b);
12421251
if (sign == '-' || sign == '+') {
12431252
b++;
1244-
while (b < e && Character.isWhitespace(str.charAt(b))) {
1245-
b++;
1246-
}
12471253
}
12481254

12491255
if (base == 16) {
12501256
if (str.charAt(b) == '0') {
12511257
if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'X') {
12521258
b += 2;
1259+
acceptUnderscore = true;
12531260
}
12541261
}
12551262
} else if (base == 0) {
12561263
if (str.charAt(b) == '0') {
12571264
if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'X') {
12581265
base = 16;
12591266
b += 2;
1267+
acceptUnderscore = true;
12601268
} else if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'O') {
12611269
base = 8;
12621270
b += 2;
1271+
acceptUnderscore = true;
12631272
} else if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'B') {
12641273
base = 2;
12651274
b += 2;
1275+
acceptUnderscore = true;
12661276
} else {
1267-
base = 8;
1277+
raiseIfNotZero = true;
12681278
}
12691279
}
12701280
} else if (base == 8) {
12711281
if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'O') {
12721282
b += 2;
1283+
acceptUnderscore = true;
12731284
}
12741285
} else if (base == 2) {
12751286
if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'B') {
12761287
b += 2;
1288+
acceptUnderscore = true;
12771289
}
12781290
}
12791291
}
@@ -1282,80 +1294,76 @@ private static BigInteger asciiToBigInteger(String str, int possibleBase, boolea
12821294
base = 10;
12831295
}
12841296

1285-
// if the base >= 22, then an 'l' or 'L' is a digit!
1286-
if (isLong && base < 22 && e > b && (str.charAt(e - 1) == 'L' || str.charAt(e - 1) == 'l')) {
1287-
e--;
1297+
int i = b;
1298+
while (i < e) {
1299+
if (str.charAt(i) == '_') {
1300+
if (!acceptUnderscore || i == e - 1) {
1301+
throw new NumberFormatException("Illegal underscore in int literal");
1302+
} else {
1303+
acceptUnderscore = false;
1304+
}
1305+
} else {
1306+
acceptUnderscore = true;
1307+
}
1308+
++i;
12881309
}
12891310

12901311
String s = str;
12911312
if (b > 0 || e < str.length()) {
12921313
s = str.substring(b, e);
12931314
}
1315+
s = s.replace("_", "");
12941316

12951317
BigInteger bi;
12961318
if (sign == '-') {
12971319
bi = new BigInteger("-" + s, base);
12981320
} else {
12991321
bi = new BigInteger(s, base);
13001322
}
1323+
1324+
if (raiseIfNotZero && !bi.equals(BigInteger.ZERO)) {
1325+
throw new NumberFormatException("Obsolete octal int literal");
1326+
}
13011327
return bi;
13021328
}
13031329

1304-
@TruffleBoundary
1305-
private static int parseInt(String arg, int base) {
1306-
if (arg.isEmpty() || base == 0) {
1307-
throw new NumberFormatException();
1330+
/**
1331+
* Fast path parser of integer literals. Accepts only a subset of allowed literals - no
1332+
* underscores, no leading zeros, no plus sign, no spaces, only ascii digits and the result
1333+
* must be small enough to fit into long.
1334+
*
1335+
* @param arg the string to parse
1336+
* @return parsed integer, long or null if the literal is not simple enough
1337+
*/
1338+
private static Object parseSimpleDecimalLiteral(String arg) {
1339+
if (arg.isEmpty()) {
1340+
return null;
13081341
}
1309-
boolean negative = arg.charAt(0) == '-';
1310-
int start = negative ? 1 : 0;
1311-
if (arg.length() <= start || arg.charAt(start) == '_') {
1312-
throw new NumberFormatException();
1342+
int start = arg.charAt(0) == '-' ? 1 : 0;
1343+
if (arg.length() <= start || arg.length() > 18 + start) {
1344+
return null;
13131345
}
1314-
long value = 0;
1315-
for (int i = start; i < arg.length(); i++) {
1316-
char c = arg.charAt(i);
1317-
if (c == '_') {
1318-
continue;
1319-
}
1320-
if (c < '0' || c > '9') {
1321-
throw new NumberFormatException();
1322-
}
1323-
value = value * base + (c - '0');
1324-
if (value > Integer.MAX_VALUE) {
1325-
throw new NumberFormatException();
1346+
if (arg.charAt(start) == '0') {
1347+
if (arg.length() > start + 1) {
1348+
return null;
13261349
}
1327-
}
1328-
return (int) (negative ? -value : value);
1329-
}
1330-
1331-
private static final long MAX_VALUE = (Long.MAX_VALUE - 10) / 10;
1332-
1333-
@TruffleBoundary
1334-
private static long parseLong(String arg, int base) {
1335-
if (arg.isEmpty() || base == 0) {
1336-
throw new NumberFormatException();
1337-
}
1338-
boolean negative = arg.charAt(0) == '-';
1339-
int start = negative ? 1 : 0;
1340-
if (arg.length() <= start || arg.charAt(start) == '_') {
1341-
throw new NumberFormatException();
1350+
return 0;
13421351
}
13431352
long value = 0;
13441353
for (int i = start; i < arg.length(); i++) {
13451354
char c = arg.charAt(i);
1346-
if (c == '_') {
1347-
continue;
1348-
}
13491355
if (c < '0' || c > '9') {
1350-
throw new NumberFormatException();
1351-
}
1352-
if (value >= MAX_VALUE) {
1353-
// overflow, this will not allow Long.MIN_VALUE to be parsed
1354-
throw new NumberFormatException();
1356+
return null;
13551357
}
1356-
value = value * base + (c - '0');
1358+
value = value * 10 + (c - '0');
13571359
}
1358-
return negative ? -value : value;
1360+
if (start != 0) {
1361+
value = -value;
1362+
}
1363+
if (value >= Integer.MIN_VALUE && value <= Integer.MAX_VALUE) {
1364+
return (int) value;
1365+
}
1366+
return value;
13591367
}
13601368

13611369
@Child private IsBuiltinClassProfile isPrimitiveProfile = IsBuiltinClassProfile.create();
@@ -1404,33 +1412,11 @@ Object createInt(Object cls, double arg, @SuppressWarnings("unused") PNone base,
14041412

14051413
// String
14061414

1407-
@Specialization(guards = {"isNoValue(base)", "isPrimitiveInt(cls)"}, rewriteOn = NumberFormatException.class)
1408-
int createIntBase10(@SuppressWarnings("unused") Object cls, String arg, @SuppressWarnings("unused") PNone base) throws NumberFormatException {
1409-
return parseInt(arg, 10);
1410-
}
1411-
1412-
@Specialization(guards = {"isNoValue(base)", "isPrimitiveInt(cls)"}, rewriteOn = NumberFormatException.class)
1413-
long createLongBase10(@SuppressWarnings("unused") Object cls, String arg, @SuppressWarnings("unused") PNone base) throws NumberFormatException {
1414-
return parseLong(arg, 10);
1415-
}
1416-
14171415
@Specialization(guards = "isNoValue(base)")
14181416
Object createInt(VirtualFrame frame, Object cls, String arg, @SuppressWarnings("unused") PNone base) {
14191417
return stringToInt(frame, cls, arg, 10, arg);
14201418
}
14211419

1422-
@Specialization(guards = "isPrimitiveInt(cls)", rewriteOn = NumberFormatException.class)
1423-
int parseInt(@SuppressWarnings("unused") Object cls, String arg, int base) throws NumberFormatException {
1424-
checkBase(base);
1425-
return parseInt(arg, base);
1426-
}
1427-
1428-
@Specialization(guards = "isPrimitiveInt(cls)", rewriteOn = NumberFormatException.class)
1429-
long parseLong(@SuppressWarnings("unused") Object cls, String arg, int base) throws NumberFormatException {
1430-
checkBase(base);
1431-
return parseLong(arg, base);
1432-
}
1433-
14341420
@Specialization
14351421
Object parsePIntError(VirtualFrame frame, Object cls, String number, int base) {
14361422
checkBase(base);
@@ -1447,16 +1433,6 @@ Object createIntError(VirtualFrame frame, Object cls, String number, Object base
14471433

14481434
// PIBytesLike
14491435

1450-
@Specialization(guards = "isPrimitiveInt(cls)", rewriteOn = NumberFormatException.class)
1451-
int parseInt(VirtualFrame frame, Object cls, PIBytesLike arg, int base) throws NumberFormatException {
1452-
return parseInt(cls, toString(frame, arg), base);
1453-
}
1454-
1455-
@Specialization(guards = "isPrimitiveInt(cls)", rewriteOn = NumberFormatException.class)
1456-
long parseLong(VirtualFrame frame, Object cls, PIBytesLike arg, int base) throws NumberFormatException {
1457-
return parseLong(cls, toString(frame, arg), base);
1458-
}
1459-
14601436
@Specialization
14611437
Object parseBytesError(VirtualFrame frame, Object cls, PIBytesLike arg, int base) {
14621438
checkBase(base);
@@ -1470,33 +1446,11 @@ Object parseBytesError(VirtualFrame frame, Object cls, PIBytesLike arg, @Suppres
14701446

14711447
// PString
14721448

1473-
@Specialization(guards = {"isNoValue(base)", "isPrimitiveInt(cls)"}, rewriteOn = NumberFormatException.class)
1474-
int createInt(@SuppressWarnings("unused") Object cls, PString arg, @SuppressWarnings("unused") PNone base) throws NumberFormatException {
1475-
return parseInt(arg.getValue(), 10);
1476-
}
1477-
1478-
@Specialization(guards = {"isNoValue(base)", "isPrimitiveInt(cls)"}, rewriteOn = NumberFormatException.class)
1479-
long createLong(@SuppressWarnings("unused") Object cls, PString arg, @SuppressWarnings("unused") PNone base) throws NumberFormatException {
1480-
return parseLong(arg.getValue(), 10);
1481-
}
1482-
14831449
@Specialization(guards = "isNoValue(base)")
14841450
Object parsePInt(VirtualFrame frame, Object cls, PString arg, @SuppressWarnings("unused") PNone base) {
14851451
return stringToInt(frame, cls, arg.getValue(), 10, arg);
14861452
}
14871453

1488-
@Specialization(guards = "isPrimitiveInt(cls)", rewriteOn = NumberFormatException.class)
1489-
int parseInt(@SuppressWarnings("unused") Object cls, PString arg, int base) throws NumberFormatException {
1490-
checkBase(base);
1491-
return parseInt(arg.getValue(), base);
1492-
}
1493-
1494-
@Specialization(guards = "isPrimitiveInt(cls)", rewriteOn = NumberFormatException.class)
1495-
long parseLong(@SuppressWarnings("unused") Object cls, PString arg, int base) throws NumberFormatException {
1496-
checkBase(base);
1497-
return parseLong(arg.getValue(), base);
1498-
}
1499-
15001454
@Specialization
15011455
Object parsePInt(VirtualFrame frame, Object cls, PString arg, int base) {
15021456
checkBase(base);

0 commit comments

Comments
 (0)