Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
import dis
import os
import re
import tokenize
import types
import decimal
import unittest
import warnings
from io import BytesIO
from test import support
from test.support.os_helper import temp_cwd
from test.support.script_helper import assert_python_failure, assert_python_ok
Expand Down Expand Up @@ -1859,6 +1861,12 @@ def __format__(self, format):
# Test multiple format specs in same raw f-string
self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n')

def test_gh139516(self):
# gh-139516
# The '\n' is explicit to ensure no trailing whitespace which would invalidate the test.
# Must use tokenize instead of compile so that source is parsed by line which exposes the bug.
list(tokenize.tokenize(BytesIO('''f"{f(a=lambda: 'à'\n)}"'''.encode()).readline))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am confused. Isn't it possible to trigger this in an exec or eval call? Or perhaps a file with an encoding?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See below VVV



if __name__ == '__main__':
unittest.main()
17 changes: 17 additions & 0 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,23 @@ def test_multiline_non_ascii_fstring_with_expr(self):
FSTRING_END "\'\'\'" (3, 1) (3, 4)
""")

# gh-139516, the '\n' is explicit to ensure no trailing whitespace which would invalidate the test
self.check_tokenize('''f"{f(a=lambda: 'à'\n)}"''', """\
FSTRING_START \'f"\' (1, 0) (1, 2)
OP '{' (1, 2) (1, 3)
NAME 'f' (1, 3) (1, 4)
OP '(' (1, 4) (1, 5)
NAME 'a' (1, 5) (1, 6)
OP '=' (1, 6) (1, 7)
NAME 'lambda' (1, 7) (1, 13)
OP ':' (1, 13) (1, 14)
STRING "\'à\'" (1, 15) (1, 18)
NL '\\n' (1, 18) (1, 19)
OP ')' (2, 0) (2, 1)
OP '}' (2, 1) (2, 2)
FSTRING_END \'"\' (2, 2) (2, 3)
""")

class GenerateTokensTest(TokenizeTest):
def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix lambda colon erroneously start format spec in f-string in tokenizer.
2 changes: 1 addition & 1 deletion Parser/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1376,7 +1376,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
}

if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
current_tok->in_debug = 1;
}

Expand Down
2 changes: 2 additions & 0 deletions Parser/lexer/state.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

#define INSIDE_FSTRING(tok) (tok->tok_mode_stack_index > 0)
#define INSIDE_FSTRING_EXPR(tok) (tok->curly_bracket_expr_start_depth >= 0)
#define INSIDE_FSTRING_EXPR_AT_TOP(tok) \
(tok->curly_bracket_depth - tok->curly_bracket_expr_start_depth == 1)

enum decoding_state {
STATE_INIT,
Expand Down
Loading