From d00f7ba548b52dbc852fdcafab274db6f1fb4e68 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 22 Feb 2024 16:18:23 +0100 Subject: [PATCH 1/2] gh-115823: Calculate correctly error locations when dealing with implicit encodings --- Lib/test/test_exceptions.py | 1 + ...-02-22-16-17-53.gh-issue-115823.c1TreJ.rst | 3 +++ Parser/pegen_errors.c | 21 +++++++++---------- 3 files changed, 14 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index c7e76414ff0715..c5eff8ad8ccca1 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -301,6 +301,7 @@ def baz(): { 6 0="""''', 5, 13) + check('b"fooжжж"'.encode(), 1, 1, 1, 10) # Errors thrown by symtable.c check('x = [(yield i) for i in range(3)]', 1, 7) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst new file mode 100644 index 00000000000000..8cda4c9343d4d7 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst @@ -0,0 +1,3 @@ +Properly calculate error ranges in the parser when raising +:exc:`SyntaxError` exceptions caused by invalid byte sequences. Patch by +Pablo Galindo diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index e15673d02dd3b0..d05499fafdaf72 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -369,20 +369,19 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, Py_ssize_t col_number = col_offset; Py_ssize_t end_col_number = end_col_offset; - if (p->tok->encoding != NULL) { - col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); - if (col_number < 0) { + col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); + if (col_number < 0) { + goto error; + } + if (end_col_number > 0) { + Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); + if (end_col_offset < 0) { goto error; - } - if (end_col_number > 0) { - Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); - if (end_col_offset < 0) { - goto error; - } else { - end_col_number = end_col_offset; - } + } else { + end_col_number = end_col_offset; } } + tmp = Py_BuildValue("(OnnNnn)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); if (!tmp) { goto error; From af747d5b4310f32cac173483e41035ed131c66e4 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 26 Feb 2024 13:32:04 +0100 Subject: [PATCH 2/2] fixup! gh-115823: Calculate correctly error locations when dealing with implicit encodings --- Parser/pegen_errors.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index d05499fafdaf72..e8f11a67e50fa0 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -373,12 +373,11 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, if (col_number < 0) { goto error; } - if (end_col_number > 0) { - Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); - if (end_col_offset < 0) { + + if (end_col_offset > 0) { + end_col_number = _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset); + if (end_col_number < 0) { goto error; - } else { - end_col_number = end_col_offset; } }