From 0237c0052b7fb0ed417cf9adf3100f70c744a08d Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Tue, 7 Oct 2025 12:28:15 -0400 Subject: [PATCH] [3.13] gh-139516: Fix lambda colon start format spec in f-string in tokenizer (GH-139657) (cherry picked from commit 539461d9ec8e5322ead638f7be733fd196aa6c79) Co-authored-by: Tomasz Pytel --- Lib/test/test_fstring.py | 7 +++++++ Lib/test/test_tokenize.py | 17 +++++++++++++++++ ...25-10-06-13-15-26.gh-issue-139516.d9Pkur.rst | 1 + Parser/lexer/lexer.c | 2 +- Parser/lexer/state.h | 2 ++ 5 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 5e743d34d2a573..d86977c55c3b90 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1849,6 +1849,13 @@ def __format__(self, format): # Test multiple format specs in same raw f-string self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n') + def test_gh139516(self): + with temp_cwd(): + script = 'script.py' + with open(script, 'wb') as f: + f.write('''def f(a): pass\nf"{f(a=lambda: 'à'\n)}"'''.encode()) + assert_python_ok(script) + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index e9a9ee4469d877..9aff15eb530845 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1212,6 +1212,23 @@ def test_multiline_non_ascii_fstring_with_expr(self): FSTRING_END "\'\'\'" (3, 1) (3, 4) """) + # gh-139516, the '\n' is explicit to ensure no trailing whitespace which would invalidate the test + self.check_tokenize('''f"{f(a=lambda: 'à'\n)}"''', """\ + FSTRING_START \'f"\' (1, 0) (1, 2) + OP '{' (1, 2) (1, 3) + NAME 'f' (1, 3) (1, 4) + OP '(' (1, 4) (1, 5) + NAME 'a' (1, 5) (1, 6) + OP '=' (1, 6) (1, 7) + NAME 'lambda' (1, 7) (1, 13) + OP ':' (1, 13) (1, 14) + STRING "\'à\'" (1, 15) (1, 18) + NL '\\n' (1, 18) (1, 19) + OP ')' (2, 0) (2, 1) + OP '}' (2, 1) (2, 2) + FSTRING_END \'"\' (2, 2) (2, 3) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. diff --git a/Misc/NEWS.d/next/Core and Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst b/Misc/NEWS.d/next/Core and Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst new file mode 100644 index 00000000000000..a709112306025f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst @@ -0,0 +1 @@ +Fix lambda colon erroneously start format spec in f-string in tokenizer. diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 384239bd414c38..66a7cbb769aa46 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -1291,7 +1291,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c)); } - if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) { + if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) { current_tok->f_string_debug = 1; } diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h index d8791d89dd02db..7df24d571d558e 100644 --- a/Parser/lexer/state.h +++ b/Parser/lexer/state.h @@ -10,6 +10,8 @@ #define INSIDE_FSTRING(tok) (tok->tok_mode_stack_index > 0) #define INSIDE_FSTRING_EXPR(tok) (tok->curly_bracket_expr_start_depth >= 0) +#define INSIDE_FSTRING_EXPR_AT_TOP(tok) \ + (tok->curly_bracket_depth - tok->curly_bracket_expr_start_depth == 1) enum decoding_state { STATE_INIT,