From 9e7d1022b2e4fb4508496815ac4695c747fb3564 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 1 May 2025 11:06:57 +0300 Subject: [PATCH 1/3] gh-133197: Improve error message for incompatible string / bytes prefixes --- Lib/test/test_grammar.py | 21 ++++++ Lib/test/test_syntax.py | 70 ++++++++++++++++-- ...-05-01-11-06-29.gh-issue-133197.BHjfh4.rst | 2 + Parser/lexer/lexer.c | 72 ++++++++++++++----- 4 files changed, 141 insertions(+), 24 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index c0681bccd9ec32..c39565144bf7f4 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -216,6 +216,27 @@ def test_string_literals(self): ' self.assertEqual(x, y) + def test_string_prefixes(self): + def check(s): + parsed = eval(s) + self.assertIs(type(parsed), str) + self.assertGreater(len(parsed), 0) + + check("u'abc'") + check("r'abc\t'") + check("rf'abc\a {1 + 1}'") + check("fr'abc\a {1 + 1}'") + + def test_bytes_prefixes(self): + def check(s): + parsed = eval(s) + self.assertIs(type(parsed), bytes) + self.assertGreater(len(parsed), 0) + + check("b'abc'") + check("br'abc\t'") + check("rb'abc\a'") + def test_ellipsis(self): x = ... self.assertTrue(x is Ellipsis) diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 7ef4b735fcb805..20d5484315febd 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1877,21 +1877,77 @@ Traceback (most recent call last): SyntaxError: cannot assign to f-string expression here. Maybe you meant '==' instead of '='? ->>> ft'abc' +>>> ub'' Traceback (most recent call last): -SyntaxError: can't use 'f' and 't' string prefixes together +SyntaxError: 'u' and 'b' prefixes are incompatible ->>> tf"{x=}" +>>> bu"привет" Traceback (most recent call last): -SyntaxError: can't use 'f' and 't' string prefixes together +SyntaxError: 'u' and 'b' prefixes are incompatible ->>> tb'' +>>> ur'' +Traceback (most recent call last): +SyntaxError: 'u' and 'r' prefixes are incompatible + +>>> ru"\t" +Traceback (most recent call last): +SyntaxError: 'u' and 'r' prefixes are incompatible + +>>> uf'{1 + 1}' +Traceback (most recent call last): +SyntaxError: 'u' and 'f' prefixes are incompatible + +>>> fu"" +Traceback (most recent call last): +SyntaxError: 'u' and 'f' prefixes are incompatible + +>>> ut'{1}' +Traceback (most recent call last): +SyntaxError: 'u' and 't' prefixes are incompatible + +>>> tu"234" +Traceback (most recent call last): +SyntaxError: 'u' and 't' prefixes are incompatible + +>>> bf'{x!r}' +Traceback (most recent call last): +SyntaxError: 'b' and 'f' prefixes are incompatible + +>>> fb"text" Traceback (most recent call last): -SyntaxError: can't use 'b' and 't' string prefixes together +SyntaxError: 'b' and 'f' prefixes are incompatible >>> bt"text" Traceback (most recent call last): -SyntaxError: can't use 'b' and 't' string prefixes together +SyntaxError: 'b' and 't' prefixes are incompatible + +>>> tb'' +Traceback (most recent call last): +SyntaxError: 'b' and 't' prefixes are incompatible + +>>> tf"{0.3:.02f}" +Traceback (most recent call last): +SyntaxError: 'f' and 't' prefixes are incompatible + +>>> ft'{x=}' +Traceback (most recent call last): +SyntaxError: 'f' and 't' prefixes are incompatible + +>>> tfu"{x=}" +Traceback (most recent call last): +SyntaxError: 'u' and 'f' prefixes are incompatible + +>>> turf"{x=}" +Traceback (most recent call last): +SyntaxError: 'u' and 'r' prefixes are incompatible + +>>> burft"{x=}" +Traceback (most recent call last): +SyntaxError: 'u' and 'b' prefixes are incompatible + +>>> brft"{x=}" +Traceback (most recent call last): +SyntaxError: 'b' and 'f' prefixes are incompatible >>> t'{x}' = 42 Traceback (most recent call last): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst new file mode 100644 index 00000000000000..009bc3760535d5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst @@ -0,0 +1,2 @@ +Improve :exc:`SyntaxError` error messages for incompatible string / bytes +prefixes. diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 98adf7447c5626..459d30d608e150 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -404,6 +404,50 @@ tok_continuation_line(struct tok_state *tok) { return c; } +static int +maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok, + int saw_b, int saw_r, int saw_u, + int saw_f, int saw_t) { + // Return -1 when there's no error, + // return token_type >= 0 when there's an error. + // Supported: rb, rf, rt (in any order) + // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order) + +#define MAKE_SYNTAX_ERROR(PREFIX1, PREFIX2) \ + _PyTokenizer_syntaxerror_known_range( \ + tok, (int)(tok->start + 1 - tok->line_start), \ + (int)(tok->cur - tok->line_start), \ + "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible") + + if (saw_u && saw_b) { + return MAKE_SYNTAX_ERROR("u", "b"); + } + if (saw_u && saw_r) { + return MAKE_SYNTAX_ERROR("u", "r"); + } + if (saw_u && saw_f) { + return MAKE_SYNTAX_ERROR("u", "f"); + } + if (saw_u && saw_t) { + return MAKE_SYNTAX_ERROR("u", "t"); + } + + if (saw_b && saw_f) { + return MAKE_SYNTAX_ERROR("b", "f"); + } + if (saw_b && saw_t) { + return MAKE_SYNTAX_ERROR("b", "t"); + } + + if (saw_f && saw_t) { + return MAKE_SYNTAX_ERROR("f", "t"); + } + return -1; + +#undef MAKE_SYNTAX_ERROR + +} + static int tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token) { @@ -648,22 +692,22 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t /* Process the various legal combinations of b"", r"", u"", and f"". */ int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0; while (1) { - if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B')) + if (!saw_b && (c == 'b' || c == 'B')) { saw_b = 1; + } /* Since this is a backwards compatibility support literal we don't want to support it in arbitrary order like byte literals. */ - else if (!(saw_b || saw_u || saw_r || saw_f || saw_t) - && (c == 'u'|| c == 'U')) { + else if (!saw_u && (c == 'u'|| c == 'U')) { saw_u = 1; } /* ur"" and ru"" are not supported */ - else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) { + else if (!saw_r && (c == 'r' || c == 'R')) { saw_r = 1; } - else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) { + else if (!saw_f && (c == 'f' || c == 'F')) { saw_f = 1; } - else if (!(saw_t || saw_u) && (c == 't' || c == 'T')) { + else if (!saw_t && (c == 't' || c == 'T')) { saw_t = 1; } else { @@ -671,17 +715,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t } c = tok_nextc(tok); if (c == '"' || c == '\'') { - if (saw_b && saw_t) { - return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range( - tok, (int)(tok->start + 1 - tok->line_start), - (int)(tok->cur - tok->line_start), - "can't use 'b' and 't' string prefixes together")); - } - if (saw_f && saw_t) { - return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range( - tok, (int)(tok->start + 1 - tok->line_start), - (int)(tok->cur - tok->line_start), - "can't use 'f' and 't' string prefixes together")); + // Raise error on incompatible string prefixes: + int err = maybe_raise_syntax_error_for_string_prefixes( + tok, saw_b, saw_r, saw_u, saw_f, saw_t); + if (err >= 0) { + return MAKE_TOKEN(err); } // Handle valid f or t string creation: From 0615595f52a02d7dd65f814f479cc458dfc0c040 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 1 May 2025 11:21:26 +0300 Subject: [PATCH 2/3] Fix test --- Lib/test/test_fstring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index a10d1fd5fd2b1f..dd58e032a8befe 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1304,7 +1304,7 @@ def test_invalid_string_prefixes(self): "Bf''", "BF''",] double_quote_cases = [case.replace("'", '"') for case in single_quote_cases] - self.assertAllRaise(SyntaxError, 'invalid syntax', + self.assertAllRaise(SyntaxError, 'prefixes are incompatible', single_quote_cases + double_quote_cases) def test_leading_trailing_spaces(self): From 26653c541af11fe8c166f5f58e02bec37136b227 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 1 May 2025 14:08:12 +0300 Subject: [PATCH 3/3] Address review --- Parser/lexer/lexer.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 459d30d608e150..4d10bccf0a53f2 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -408,44 +408,45 @@ static int maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok, int saw_b, int saw_r, int saw_u, int saw_f, int saw_t) { - // Return -1 when there's no error, - // return token_type >= 0 when there's an error. // Supported: rb, rf, rt (in any order) // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order) -#define MAKE_SYNTAX_ERROR(PREFIX1, PREFIX2) \ - _PyTokenizer_syntaxerror_known_range( \ - tok, (int)(tok->start + 1 - tok->line_start), \ - (int)(tok->cur - tok->line_start), \ - "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible") +#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2) \ + do { \ + (void)_PyTokenizer_syntaxerror_known_range( \ + tok, (int)(tok->start + 1 - tok->line_start), \ + (int)(tok->cur - tok->line_start), \ + "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \ + return -1; \ + } while (0) if (saw_u && saw_b) { - return MAKE_SYNTAX_ERROR("u", "b"); + RETURN_SYNTAX_ERROR("u", "b"); } if (saw_u && saw_r) { - return MAKE_SYNTAX_ERROR("u", "r"); + RETURN_SYNTAX_ERROR("u", "r"); } if (saw_u && saw_f) { - return MAKE_SYNTAX_ERROR("u", "f"); + RETURN_SYNTAX_ERROR("u", "f"); } if (saw_u && saw_t) { - return MAKE_SYNTAX_ERROR("u", "t"); + RETURN_SYNTAX_ERROR("u", "t"); } if (saw_b && saw_f) { - return MAKE_SYNTAX_ERROR("b", "f"); + RETURN_SYNTAX_ERROR("b", "f"); } if (saw_b && saw_t) { - return MAKE_SYNTAX_ERROR("b", "t"); + RETURN_SYNTAX_ERROR("b", "t"); } if (saw_f && saw_t) { - return MAKE_SYNTAX_ERROR("f", "t"); + RETURN_SYNTAX_ERROR("f", "t"); } - return -1; -#undef MAKE_SYNTAX_ERROR +#undef RETURN_SYNTAX_ERROR + return 0; } static int @@ -716,10 +717,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t c = tok_nextc(tok); if (c == '"' || c == '\'') { // Raise error on incompatible string prefixes: - int err = maybe_raise_syntax_error_for_string_prefixes( + int status = maybe_raise_syntax_error_for_string_prefixes( tok, saw_b, saw_r, saw_u, saw_f, saw_t); - if (err >= 0) { - return MAKE_TOKEN(err); + if (status < 0) { + return MAKE_TOKEN(ERRORTOKEN); } // Handle valid f or t string creation: