@@ -62,6 +62,11 @@ public class FormatStringLiteralNode extends LiteralNode {
62
62
static final String ERROR_MESSAGE_UNTERMINATED_STRING = "f-string: unterminated string" ;
63
63
static final String ERROR_MESSAGE_INVALID_SYNTAX = "f-string: invalid syntax" ;
64
64
static final String ERROR_MESSAGE_BACKSLASH_IN_EXPRESSION = "f-string expression part cannot include a backslash" ;
65
+ static final String ERROR_MESSAGE_HASH_IN_EXPRESSION = "f-string expression part cannot include '#'" ;
66
+ static final String ERROR_MESSAGE_CLOSING_PAR_DOES_NOT_MATCH = "f-string: closing parenthesis '%c' does not match opening parenthesis '%c'" ;
67
+ static final String ERROR_MESSAGE_UNMATCHED_PAR = "f-string: unmatched '%c'" ;
68
+ static final String ERROR_MESSAGE_TOO_MANY_NESTED_PARS = "f-string: too many nested parenthesis" ;
69
+ static final String ERROR_MESSAGE_EXPECTING_CLOSING_BRACE = "f-string: expecting '}'" ;
65
70
66
71
private static final String EMPTY_STRING = "" ;
67
72
@@ -145,7 +150,7 @@ private static String getText(StringBuilder result) {
145
150
146
151
private void parse (VirtualFrame frame ) {
147
152
// create tokens
148
- tokens = createTokens (this , values , true );
153
+ tokens = createTokens (this , values );
149
154
// create sources from tokens, that marks expressions
150
155
String [] expressionSources = createExpressionSources (values , tokens , 0 , tokens .length );
151
156
// and create the expressions
@@ -243,6 +248,8 @@ protected static String[] createExpressionSources(StringPart[] values, int[][] t
243
248
private static final int STATE_EXPRESSION = 5 ; // in {}
244
249
private static final int STATE_UNKNOWN = 6 ;
245
250
251
+ private static final int MAX_PAR_NESTING = 200 ;
252
+
246
253
// protected for testing
247
254
/**
248
255
* This is the parser of the fstring. As result is a list of tokens, when a token is int array
@@ -256,17 +263,16 @@ protected static String[] createExpressionSources(StringPart[] values, int[][] t
256
263
*
257
264
* @param node it's needed for raising syntax errors
258
265
* @param values this part of text will be parsed
259
- * @param topLevel if there is called recursion on topLevel = false, then the syntax error is
260
- * raised
261
266
* @return a list of tokens
262
267
*/
263
- protected static int [][] createTokens (FormatStringLiteralNode node , StringPart [] values , boolean topLevel ) {
268
+ protected static int [][] createTokens (FormatStringLiteralNode node , StringPart [] values ) {
264
269
int index ;
265
270
int state = STATE_TEXT ;
266
271
int start = 0 ;
267
272
268
273
int braceLevel = 0 ;
269
274
int braceLevelInExpression = 0 ;
275
+ char [] bracesInExpression = new char [MAX_PAR_NESTING ];
270
276
List <int []> resultParts = new ArrayList <>(values .length );
271
277
for (int valueIndex = 0 ; valueIndex < values .length ; valueIndex ++) {
272
278
StringPart value = values [valueIndex ];
@@ -276,6 +282,7 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
276
282
String text = value .text ;
277
283
int len = text .length ();
278
284
index = 0 ;
285
+ start = 0 ;
279
286
while (index < len ) {
280
287
char ch = text .charAt (index );
281
288
switch (state ) {
@@ -328,8 +335,24 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
328
335
case STATE_EXPRESSION :
329
336
switch (ch ) {
330
337
case '{' :
331
-
338
+ case '(' :
339
+ case '[' :
340
+ bracesInExpression [braceLevelInExpression ] = ch ;
332
341
braceLevelInExpression ++;
342
+ if (braceLevelInExpression >= MAX_PAR_NESTING ) {
343
+ raiseInvalidSyntax (node , ERROR_MESSAGE_TOO_MANY_NESTED_PARS );
344
+ }
345
+ break ;
346
+ case ')' :
347
+ case ']' :
348
+ if (braceLevelInExpression == 0 ) {
349
+ raiseInvalidSyntax (node , ERROR_MESSAGE_UNMATCHED_PAR , ch );
350
+ }
351
+ braceLevelInExpression --;
352
+ char expected = ch == ')' ? '(' : '[' ;
353
+ if (bracesInExpression [braceLevelInExpression ] != expected ) {
354
+ raiseUnmatchingClosingPar (node , bracesInExpression [braceLevelInExpression ], ch );
355
+ }
333
356
break ;
334
357
case '}' :
335
358
if (braceLevelInExpression == 0 ) {
@@ -341,6 +364,9 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
341
364
start = index + 1 ;
342
365
} else {
343
366
braceLevelInExpression --;
367
+ if (bracesInExpression [braceLevelInExpression ] != '{' ) {
368
+ raiseUnmatchingClosingPar (node , bracesInExpression [braceLevelInExpression ], '}' );
369
+ }
344
370
}
345
371
break ;
346
372
case '\'' :
@@ -383,6 +409,9 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
383
409
state = STATE_AFTER_EXCLAMATION ;
384
410
break ;
385
411
case ':' :
412
+ if (braceLevelInExpression > 0 ) {
413
+ break ;
414
+ }
386
415
int [] specifierValue ;
387
416
if (start < index ) {
388
417
// cases like {3:spec}
@@ -406,7 +435,7 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
406
435
braceLevelInSpecifier --;
407
436
if (braceLevelInSpecifier == -1 ) {
408
437
if (start < index ) {
409
- int [][] specifierParts = createTokens (node , new StringPart []{new StringPart (text .substring (start , index ), true )}, false );
438
+ int [][] specifierParts = createTokens (node , new StringPart []{new StringPart (text .substring (start , index ), true )});
410
439
specifierValue [4 ] = specifierParts .length ;
411
440
for (int [] part : specifierParts ) {
412
441
part [1 ] = valueIndex ;
@@ -423,6 +452,9 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
423
452
index ++;
424
453
}
425
454
break ;
455
+ case '#' :
456
+ raiseInvalidSyntax (node , ERROR_MESSAGE_HASH_IN_EXPRESSION );
457
+ break ;
426
458
case '\n' :
427
459
case '\b' :
428
460
case '\u0007' :
@@ -482,6 +514,10 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
482
514
createExpressionToken (node , values , valueIndex , start , index - 1 );
483
515
raiseInvalidSyntax (node , ERROR_MESSAGE_SINGLE_BRACE );
484
516
break ;
517
+ case STATE_EXPRESSION :
518
+ // expression is not allowed to span multiple f-strings: f'{3+' f'1}' is not
519
+ // the same as f'{3+1}'
520
+ raiseInvalidSyntax (node , ERROR_MESSAGE_EXPECTING_CLOSING_BRACE );
485
521
}
486
522
}
487
523
}
@@ -506,10 +542,18 @@ private static int[] createExpressionToken(FormatStringLiteralNode node, StringP
506
542
return new int []{TOKEN_TYPE_EXPRESSION , valueIndex , start , end , 0 };
507
543
}
508
544
545
+ private static void raiseUnmatchingClosingPar (FormatStringLiteralNode node , char opening , char closing ) {
546
+ PythonLanguage .getCore ().raiseInvalidSyntax (node , ERROR_MESSAGE_CLOSING_PAR_DOES_NOT_MATCH , closing , opening );
547
+ }
548
+
509
549
private static void raiseInvalidSyntax (FormatStringLiteralNode node , String message ) {
510
550
PythonLanguage .getCore ().raiseInvalidSyntax (node , message );
511
551
}
512
552
553
+ private static void raiseInvalidSyntax (FormatStringLiteralNode node , String message , Object ... args ) {
554
+ PythonLanguage .getCore ().raiseInvalidSyntax (node , message , args );
555
+ }
556
+
513
557
private static ExpressionNode createExpression (String src , VirtualFrame frame ) {
514
558
PythonParser parser = PythonLanguage .getCore ().getParser ();
515
559
Source source = Source .newBuilder (PythonLanguage .ID , src , "<fstring>" ).build ();
0 commit comments