@@ -97,7 +97,8 @@ public Token(byte type, int startIndex, int endIndex) {
97
97
* matches the number of Strings that should be concatenated at runtime - it contains
98
98
* {@code null} in positions that should be generated by an expression.
99
99
*/
100
- static String [] parse (ArrayList <SSTNode > expressions , ArrayList <String > formatStringExprsSources , ParserErrorCallback errorCallback , String text , PythonSSTNodeFactory nodeFactory ,
100
+ static String [] parse (ArrayList <SSTNode > expressions , ArrayList <String > formatStringExprsSources , ParserErrorCallback errorCallback , String text , boolean isRawString ,
101
+ PythonSSTNodeFactory nodeFactory ,
101
102
FStringExprParser exprParser ) {
102
103
// fast and imprecise estimate of the capacity for the tokens array
103
104
int estimatedTokensCount = 1 ;
@@ -114,7 +115,7 @@ static String[] parse(ArrayList<SSTNode> expressions, ArrayList<String> formatSt
114
115
115
116
// create tokens
116
117
ArrayList <Token > tokens = new ArrayList <>(estimatedTokensCount );
117
- createTokens (tokens , errorCallback , 0 , text , 0 );
118
+ createTokens (tokens , errorCallback , 0 , text , isRawString , 0 );
118
119
119
120
int topLevelTokensCount = 0 ;
120
121
int expressionsCount = 0 ;
@@ -187,7 +188,6 @@ public static ArrayList<String> createExpressionSources(String text, ArrayList<T
187
188
} else {
188
189
// the expression has token[TOKEN_FMT_TOKENS_COUNT] specifiers parts
189
190
// obtains expressions in the format specifier
190
- // Note: no further nesting is allowed and would have been caught during parsing
191
191
int indexPlusOne = index + 1 ;
192
192
ArrayList <String > specifierExpressions = createExpressionSources (text , tokens , indexPlusOne , indexPlusOne + fmtTokensCount , fmtTokensCount );
193
193
expression .append (",(" );
@@ -212,6 +212,8 @@ public static ArrayList<String> createExpressionSources(String text, ArrayList<T
212
212
// add the expression source
213
213
expression .append (specifierExpressions .get (expressionIndex ));
214
214
expressionIndex ++;
215
+ // skip the nested format specifiers
216
+ sindex += stoken .formatTokensCount ;
215
217
}
216
218
}
217
219
index --;
@@ -240,12 +242,14 @@ public static ArrayList<String> createExpressionSources(String text, ArrayList<T
240
242
* @param errorCallback it's needed for raising syntax errors
241
243
* @param startIndex start parsing from this index
242
244
* @param text text to be parsed
245
+ * @param isRawString whether the String is raw, i.e., escape sequences should be interpreted as
246
+ * a verbatim text
243
247
* @param recursionLevel recursive calls are used for parsing the formatting string, which may
244
248
* contain other expressions. Depending on the recursive level some rules apply
245
249
* differently.
246
250
* @return the index of the last processed character
247
251
*/
248
- public static int createTokens (ArrayList <Token > tokens , ParserErrorCallback errorCallback , int startIndex , String text , int recursionLevel ) {
252
+ public static int createTokens (ArrayList <Token > tokens , ParserErrorCallback errorCallback , int startIndex , String text , boolean isRawString , int recursionLevel ) {
249
253
int index ;
250
254
int state = STATE_TEXT ;
251
255
int start = 0 ;
@@ -267,13 +271,27 @@ public static int createTokens(ArrayList<Token> tokens, ParserErrorCallback erro
267
271
case STATE_TEXT :
268
272
switch (ch ) {
269
273
case '\\' :
270
- // skip escape sequence \N{...}, it should not be treated as an
271
- // expression inside f-string
272
- if (lookahead (text , index , len , 'N' , '{' )) {
274
+ if (isRawString ) {
275
+ break ;
276
+ }
277
+ if (lookahead (text , index , len , '\\' )) {
278
+ // double "\\" is skipped, note that "\\\N{...}" should still be
279
+ // treated as \N escape sequence
280
+ index ++;
281
+ } else if (lookahead (text , index , len , 'N' , '{' )) {
282
+ // skip escape sequence \N{...}, it should not be treated as an
283
+ // expression inside f-string, but \\N{...} should be left intact
273
284
index += 2 ;
274
285
while (index < len && text .charAt (index ) != '}' ) {
275
286
index ++;
276
287
}
288
+ if (index >= len ) {
289
+ // Missing the closing brace. The escape sequence is malformed,
290
+ // which will be reported by the String escaping code later,
291
+ // here we just end the parsing
292
+ index = len - 1 ;
293
+ break parserLoop ;
294
+ }
277
295
}
278
296
break ;
279
297
case '{' :
@@ -402,6 +420,9 @@ public static int createTokens(ArrayList<Token> tokens, ParserErrorCallback erro
402
420
currentExpression = createExpressionToken (errorCallback , text , start , expressionEndIndex );
403
421
tokens .add (currentExpression );
404
422
if (endChar == '}' ) {
423
+ // "debug" expressions are by default converted using "repr",
424
+ // but as long as there is no format
425
+ currentExpression .type = TOKEN_TYPE_EXPRESSION_REPR ;
405
426
// we're done with the expression
406
427
braceLevel --;
407
428
state = STATE_TEXT ;
@@ -477,7 +498,7 @@ public static int createTokens(ArrayList<Token> tokens, ParserErrorCallback erro
477
498
case STATE_AFTER_COLON :
478
499
assert currentExpression != null ;
479
500
int tokensSizeBefore = tokens .size ();
480
- index = createTokens (tokens , errorCallback , index , text , recursionLevel + 1 );
501
+ index = createTokens (tokens , errorCallback , index , text , isRawString , recursionLevel + 1 );
481
502
currentExpression .formatTokensCount = tokens .size () - tokensSizeBefore ;
482
503
if (index >= len || text .charAt (index ) != '}' ) {
483
504
throw raiseInvalidSyntax (errorCallback , ERROR_MESSAGE_EXPECTING_CLOSING_BRACE );
@@ -543,7 +564,11 @@ private static int skipString(ParserErrorCallback errorCallback, String text, in
543
564
}
544
565
if (inString ) {
545
566
while (index < len ) {
546
- if (text .charAt (index ) == startq ) {
567
+ char ch = text .charAt (index );
568
+ if (ch == '\\' ) {
569
+ throw raiseInvalidSyntax (errorCallback , ERROR_MESSAGE_BACKSLASH_IN_EXPRESSION );
570
+ }
571
+ if (ch == startq ) {
547
572
if (triple ) {
548
573
// single quote should be ignored in a triple quoted string
549
574
if (lookahead (text , index , len , startq , startq )) {
0 commit comments