Skip to content

Commit bc2cdda

Browse files
committed
Improve syntax error reporing from f-strings
1 parent db2b0a8 commit bc2cdda

File tree

3 files changed

+63
-10
lines changed

3 files changed

+63
-10
lines changed

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/nodes/literal/FormatStringTests.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -41,14 +41,15 @@
4141

4242
package com.oracle.graal.python.nodes.literal;
4343

44+
import org.junit.Assert;
45+
import org.junit.Test;
46+
4447
import com.oracle.graal.python.runtime.PythonParser;
4548
import com.oracle.graal.python.test.parser.ParserTestBase;
4649
import com.oracle.truffle.api.Truffle;
4750
import com.oracle.truffle.api.frame.FrameDescriptor;
4851
import com.oracle.truffle.api.frame.VirtualFrame;
4952
import com.oracle.truffle.api.nodes.Node;
50-
import org.junit.Assert;
51-
import org.junit.Test;
5253

5354
public class FormatStringTests extends ParserTestBase {
5455

@@ -207,6 +208,11 @@ public void quotes01() throws Exception {
207208
testFormatString("f'{\"{{}}\"}'", "format((\"{{}}\"))");
208209
}
209210

211+
@Test
212+
public void embeddedColon() throws Exception {
213+
testFormatString("f'{var[:1]}'", "format((var[:1]))");
214+
}
215+
210216
@Test
211217
public void parser01() throws Exception {
212218
testFormatString("f'{name}'", "format((name))");
@@ -352,7 +358,7 @@ private void testFormatString(String text, String expected) throws Exception {
352358

353359
Assert.assertTrue("The source has to be just fstring", parserResult instanceof FormatStringLiteralNode);
354360
FormatStringLiteralNode fsl = (FormatStringLiteralNode) parserResult;
355-
int[][] tokens = FormatStringLiteralNode.createTokens(fsl, fsl.getValues(), true);
361+
int[][] tokens = FormatStringLiteralNode.createTokens(fsl, fsl.getValues());
356362
FormatStringLiteralNode.StringPart[] fslParts = fsl.getValues();
357363
String[] expressions = FormatStringLiteralNode.createExpressionSources(fslParts, tokens, 0, tokens.length);
358364
int expressionsIndex = 0;

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_fstring.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
*graalpython.lib-python.3.test.test_fstring.TestCase.test_backslash_char
66
*graalpython.lib-python.3.test.test_fstring.TestCase.test_call
77
*graalpython.lib-python.3.test.test_fstring.TestCase.test_compile_time_concat_errors
8+
*graalpython.lib-python.3.test.test_fstring.TestCase.test_unterminated_string
9+
*graalpython.lib-python.3.test.test_fstring.TestCase.test_mismatched_parens
10+
*graalpython.lib-python.3.test.test_fstring.TestCase.test_comments
811
*graalpython.lib-python.3.test.test_fstring.TestCase.test_del
912
*graalpython.lib-python.3.test.test_fstring.TestCase.test_dict
1013
*graalpython.lib-python.3.test.test_fstring.TestCase.test_double_braces

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/literal/FormatStringLiteralNode.java

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ public class FormatStringLiteralNode extends LiteralNode {
6262
static final String ERROR_MESSAGE_UNTERMINATED_STRING = "f-string: unterminated string";
6363
static final String ERROR_MESSAGE_INVALID_SYNTAX = "f-string: invalid syntax";
6464
static final String ERROR_MESSAGE_BACKSLASH_IN_EXPRESSION = "f-string expression part cannot include a backslash";
65+
static final String ERROR_MESSAGE_HASH_IN_EXPRESSION = "f-string expression part cannot include '#'";
66+
static final String ERROR_MESSAGE_CLOSING_PAR_DOES_NOT_MATCH = "f-string: closing parenthesis '%c' does not match opening parenthesis '%c'";
67+
static final String ERROR_MESSAGE_UNMATCHED_PAR = "f-string: unmatched '%c'";
68+
static final String ERROR_MESSAGE_TOO_MANY_NESTED_PARS = "f-string: too many nested parenthesis";
69+
static final String ERROR_MESSAGE_EXPECTING_CLOSING_BRACE = "f-string: expecting '}'";
6570

6671
private static final String EMPTY_STRING = "";
6772

@@ -145,7 +150,7 @@ private static String getText(StringBuilder result) {
145150

146151
private void parse(VirtualFrame frame) {
147152
// create tokens
148-
tokens = createTokens(this, values, true);
153+
tokens = createTokens(this, values);
149154
// create sources from tokens, that marks expressions
150155
String[] expressionSources = createExpressionSources(values, tokens, 0, tokens.length);
151156
// and create the expressions
@@ -243,6 +248,8 @@ protected static String[] createExpressionSources(StringPart[] values, int[][] t
243248
private static final int STATE_EXPRESSION = 5; // in {}
244249
private static final int STATE_UNKNOWN = 6;
245250

251+
private static final int MAX_PAR_NESTING = 200;
252+
246253
// protected for testing
247254
/**
248255
* This is the parser of the fstring. As result is a list of tokens, when a token is int array
@@ -256,17 +263,16 @@ protected static String[] createExpressionSources(StringPart[] values, int[][] t
256263
*
257264
* @param node it's needed for raising syntax errors
258265
* @param values this part of text will be parsed
259-
* @param topLevel if there is called recursion on topLevel = false, then the syntax error is
260-
* raised
261266
* @return a list of tokens
262267
*/
263-
protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[] values, boolean topLevel) {
268+
protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[] values) {
264269
int index;
265270
int state = STATE_TEXT;
266271
int start = 0;
267272

268273
int braceLevel = 0;
269274
int braceLevelInExpression = 0;
275+
char[] bracesInExpression = new char[MAX_PAR_NESTING];
270276
List<int[]> resultParts = new ArrayList<>(values.length);
271277
for (int valueIndex = 0; valueIndex < values.length; valueIndex++) {
272278
StringPart value = values[valueIndex];
@@ -276,6 +282,7 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
276282
String text = value.text;
277283
int len = text.length();
278284
index = 0;
285+
start = 0;
279286
while (index < len) {
280287
char ch = text.charAt(index);
281288
switch (state) {
@@ -328,8 +335,24 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
328335
case STATE_EXPRESSION:
329336
switch (ch) {
330337
case '{':
331-
338+
case '(':
339+
case '[':
340+
bracesInExpression[braceLevelInExpression] = ch;
332341
braceLevelInExpression++;
342+
if (braceLevelInExpression >= MAX_PAR_NESTING) {
343+
raiseInvalidSyntax(node, ERROR_MESSAGE_TOO_MANY_NESTED_PARS);
344+
}
345+
break;
346+
case ')':
347+
case ']':
348+
if (braceLevelInExpression == 0) {
349+
raiseInvalidSyntax(node, ERROR_MESSAGE_UNMATCHED_PAR, ch);
350+
}
351+
braceLevelInExpression--;
352+
char expected = ch == ')' ? '(' : '[';
353+
if (bracesInExpression[braceLevelInExpression] != expected) {
354+
raiseUnmatchingClosingPar(node, bracesInExpression[braceLevelInExpression], ch);
355+
}
333356
break;
334357
case '}':
335358
if (braceLevelInExpression == 0) {
@@ -341,6 +364,9 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
341364
start = index + 1;
342365
} else {
343366
braceLevelInExpression--;
367+
if (bracesInExpression[braceLevelInExpression] != '{') {
368+
raiseUnmatchingClosingPar(node, bracesInExpression[braceLevelInExpression], '}');
369+
}
344370
}
345371
break;
346372
case '\'':
@@ -383,6 +409,9 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
383409
state = STATE_AFTER_EXCLAMATION;
384410
break;
385411
case ':':
412+
if (braceLevelInExpression > 0) {
413+
break;
414+
}
386415
int[] specifierValue;
387416
if (start < index) {
388417
// cases like {3:spec}
@@ -406,7 +435,7 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
406435
braceLevelInSpecifier--;
407436
if (braceLevelInSpecifier == -1) {
408437
if (start < index) {
409-
int[][] specifierParts = createTokens(node, new StringPart[]{new StringPart(text.substring(start, index), true)}, false);
438+
int[][] specifierParts = createTokens(node, new StringPart[]{new StringPart(text.substring(start, index), true)});
410439
specifierValue[4] = specifierParts.length;
411440
for (int[] part : specifierParts) {
412441
part[1] = valueIndex;
@@ -423,6 +452,9 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
423452
index++;
424453
}
425454
break;
455+
case '#':
456+
raiseInvalidSyntax(node, ERROR_MESSAGE_HASH_IN_EXPRESSION);
457+
break;
426458
case '\n':
427459
case '\b':
428460
case '\u0007':
@@ -482,6 +514,10 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
482514
createExpressionToken(node, values, valueIndex, start, index - 1);
483515
raiseInvalidSyntax(node, ERROR_MESSAGE_SINGLE_BRACE);
484516
break;
517+
case STATE_EXPRESSION:
518+
// expression is not allowed to span multiple f-strings: f'{3+' f'1}' is not
519+
// the same as f'{3+1}'
520+
raiseInvalidSyntax(node, ERROR_MESSAGE_EXPECTING_CLOSING_BRACE);
485521
}
486522
}
487523
}
@@ -506,10 +542,18 @@ private static int[] createExpressionToken(FormatStringLiteralNode node, StringP
506542
return new int[]{TOKEN_TYPE_EXPRESSION, valueIndex, start, end, 0};
507543
}
508544

545+
private static void raiseUnmatchingClosingPar(FormatStringLiteralNode node, char opening, char closing) {
546+
PythonLanguage.getCore().raiseInvalidSyntax(node, ERROR_MESSAGE_CLOSING_PAR_DOES_NOT_MATCH, closing, opening);
547+
}
548+
509549
private static void raiseInvalidSyntax(FormatStringLiteralNode node, String message) {
510550
PythonLanguage.getCore().raiseInvalidSyntax(node, message);
511551
}
512552

553+
private static void raiseInvalidSyntax(FormatStringLiteralNode node, String message, Object... args) {
554+
PythonLanguage.getCore().raiseInvalidSyntax(node, message, args);
555+
}
556+
513557
private static ExpressionNode createExpression(String src, VirtualFrame frame) {
514558
PythonParser parser = PythonLanguage.getCore().getParser();
515559
Source source = Source.newBuilder(PythonLanguage.ID, src, "<fstring>").build();

0 commit comments

Comments
 (0)