From 5b013d9ed0d4256dc49c491c50e5d4c40484a74b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 4 Aug 2025 14:38:39 +0200 Subject: [PATCH 1/6] initial fix --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 ++ Lib/_pyrepl/console.py | 13 ++++ Lib/test/test_pyrepl/test_interact.py | 9 +++ Modules/clinic/symtablemodule.c.h | 72 ++++++++++++++++--- Modules/symtablemodule.c | 17 ++++- 8 files changed, 107 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 5e7dda3a3715a1..cdac16fc94fec0 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -950,6 +950,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd2)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fdel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(feature_version)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fget)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fields)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(file)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 6908cbf78f349e..fab94bc20f4c94 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -441,6 +441,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(fd) STRUCT_FOR_ID(fd2) STRUCT_FOR_ID(fdel) + STRUCT_FOR_ID(feature_version) STRUCT_FOR_ID(fget) STRUCT_FOR_ID(fields) STRUCT_FOR_ID(file) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index da2ed7422c9deb..a21be9bb3123d1 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -948,6 +948,7 @@ extern "C" { INIT_ID(fd), \ INIT_ID(fd2), \ INIT_ID(fdel), \ + INIT_ID(feature_version), \ INIT_ID(fget), \ INIT_ID(fields), \ INIT_ID(file), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index b1f411945e7856..7089e6a099d3c0 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1552,6 +1552,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(feature_version); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(fget); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index e0535d50396316..69eb08a60aada5 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -20,10 +20,12 @@ from __future__ import annotations import _colorize +import _symtable from abc import ABC, abstractmethod import ast import code +import codeop import linecache from dataclasses import dataclass, field import os.path @@ -211,6 +213,17 @@ def runsource(self, source, filename="", symbol="single"): except (OverflowError, ValueError): self.showsyntaxerror(filename, source=source) return False + + try: + # validate stuff that cannot be validated with AST parsing only + flags = self.compile.compiler.flags + flags &= ~codeop.PyCF_DONT_IMPLY_DEDENT + flags &= ~codeop.PyCF_ALLOW_INCOMPLETE_INPUT + _symtable.symtable(source, filename, "exec", flags=flags) + except (SyntaxError, OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + if tree.body: *_, last_stmt = tree.body for stmt in tree.body: diff --git a/Lib/test/test_pyrepl/test_interact.py b/Lib/test/test_pyrepl/test_interact.py index 8c0eeab6dcae96..49ea556c831f36 100644 --- a/Lib/test/test_pyrepl/test_interact.py +++ b/Lib/test/test_pyrepl/test_interact.py @@ -131,6 +131,15 @@ def test_runsource_shows_syntax_error_for_failed_compilation(self): console.runsource(source) mock_showsyntaxerror.assert_called_once() + def test_runsource_shows_syntax_error_for_failed_symtable_checks(self): + # Some checks cannot be performed by AST parsing only. + # See https://github.com/python/cpython/issues/137376. + console = InteractiveColoredConsole() + source = "x = 1; global x; x = 2" + with patch.object(console, "showsyntaxerror") as mock_showsyntaxerror: + console.runsource(source) + mock_showsyntaxerror.assert_called_once() + def test_runsource_survives_null_bytes(self): console = InteractiveColoredConsole() source = "\x00\n" diff --git a/Modules/clinic/symtablemodule.c.h b/Modules/clinic/symtablemodule.c.h index 2ecd3afc00d2be..9facec3fac44c9 100644 --- a/Modules/clinic/symtablemodule.c.h +++ b/Modules/clinic/symtablemodule.c.h @@ -2,30 +2,69 @@ preserve [clinic start generated code]*/ -#include "pycore_modsupport.h" // _PyArg_CheckPositional() +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_symtable_symtable__doc__, -"symtable($module, source, filename, startstr, /)\n" +"symtable($module, source, filename, startstr, /, *, flags=0,\n" +" feature_version=0)\n" "--\n" "\n" "Return symbol and scope dictionaries used internally by compiler."); #define _SYMTABLE_SYMTABLE_METHODDEF \ - {"symtable", _PyCFunction_CAST(_symtable_symtable), METH_FASTCALL, _symtable_symtable__doc__}, + {"symtable", _PyCFunction_CAST(_symtable_symtable), METH_FASTCALL|METH_KEYWORDS, _symtable_symtable__doc__}, static PyObject * _symtable_symtable_impl(PyObject *module, PyObject *source, - PyObject *filename, const char *startstr); + PyObject *filename, const char *startstr, int flags, + int feature_version); static PyObject * -_symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(flags), &_Py_ID(feature_version), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "", "", "flags", "feature_version", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "symtable", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[5]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 3; PyObject *source; PyObject *filename; const char *startstr; + int flags = 0; + int feature_version = 0; - if (!_PyArg_CheckPositional("symtable", nargs, 3, 3)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 3, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { goto exit; } source = args[0]; @@ -45,9 +84,26 @@ _symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs) PyErr_SetString(PyExc_ValueError, "embedded null character"); goto exit; } - return_value = _symtable_symtable_impl(module, source, filename, startstr); + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[3]) { + flags = PyLong_AsInt(args[3]); + if (flags == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + feature_version = PyLong_AsInt(args[4]); + if (feature_version == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_kwonly: + return_value = _symtable_symtable_impl(module, source, filename, startstr, flags, feature_version); exit: return return_value; } -/*[clinic end generated code: output=931964a76a72f850 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=71571c035470d288 input=a9049054013a1b77]*/ diff --git a/Modules/symtablemodule.c b/Modules/symtablemodule.c index d0d5223e5acea8..9b94d6585b5916 100644 --- a/Modules/symtablemodule.c +++ b/Modules/symtablemodule.c @@ -16,14 +16,18 @@ _symtable.symtable filename: object(converter='PyUnicode_FSDecoder') startstr: str / + * + flags: int = 0 + feature_version: int = 0 Return symbol and scope dictionaries used internally by compiler. [clinic start generated code]*/ static PyObject * _symtable_symtable_impl(PyObject *module, PyObject *source, - PyObject *filename, const char *startstr) -/*[clinic end generated code: output=59eb0d5fc7285ac4 input=9dd8a50c0c36a4d7]*/ + PyObject *filename, const char *startstr, int flags, + int feature_version) +/*[clinic end generated code: output=1e766ac3387e156a input=5ccfb94e8a19a975]*/ { struct symtable *st; PyObject *t; @@ -31,7 +35,14 @@ _symtable_symtable_impl(PyObject *module, PyObject *source, PyCompilerFlags cf = _PyCompilerFlags_INIT; PyObject *source_copy = NULL; - cf.cf_flags = PyCF_SOURCE_IS_UTF8; + cf.cf_flags = flags | PyCF_SOURCE_IS_UTF8; + if (feature_version >= 0 && (flags & PyCF_ONLY_AST)) { + cf.cf_feature_version = feature_version; + } + if (flags & ~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_COMPILE_MASK)) { + PyErr_SetString(PyExc_ValueError, "_symtable.symtable(): unrecognised flags"); + return NULL; + } const char *str = _Py_SourceAsString(source, "symtable", "string or bytes", &cf, &source_copy); if (str == NULL) { From 69bd25b85b40b8e67076fbaa463cd28127ae4867 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 4 Aug 2025 14:40:53 +0200 Subject: [PATCH 2/6] blurb --- .../Library/2025-08-04-14-38-28.gh-issue-137376.iHSKBL.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-08-04-14-38-28.gh-issue-137376.iHSKBL.rst diff --git a/Misc/NEWS.d/next/Library/2025-08-04-14-38-28.gh-issue-137376.iHSKBL.rst b/Misc/NEWS.d/next/Library/2025-08-04-14-38-28.gh-issue-137376.iHSKBL.rst new file mode 100644 index 00000000000000..4457436b75c524 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-04-14-38-28.gh-issue-137376.iHSKBL.rst @@ -0,0 +1,3 @@ +Ensure that :exc:`SyntaxError` is raised when given incorrect REPL inputs +that are only detected when processing symbol tables. Patch by Bénédikt +Tran. From 64ff638a3084c4090dd4bfc63c85b8257221b1cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 4 Aug 2025 17:01:31 +0200 Subject: [PATCH 3/6] fix mypy lint --- Lib/_pyrepl/console.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index 69eb08a60aada5..2b0b396cfea663 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -20,7 +20,7 @@ from __future__ import annotations import _colorize -import _symtable +import _symtable # type: ignore[import-not-found] from abc import ABC, abstractmethod import ast @@ -214,11 +214,11 @@ def runsource(self, source, filename="", symbol="single"): self.showsyntaxerror(filename, source=source) return False + # validate stuff that cannot be validated with AST parsing only + flags = self.compile.compiler.flags + flags &= ~codeop.PyCF_DONT_IMPLY_DEDENT + flags &= ~codeop.PyCF_ALLOW_INCOMPLETE_INPUT try: - # validate stuff that cannot be validated with AST parsing only - flags = self.compile.compiler.flags - flags &= ~codeop.PyCF_DONT_IMPLY_DEDENT - flags &= ~codeop.PyCF_ALLOW_INCOMPLETE_INPUT _symtable.symtable(source, filename, "exec", flags=flags) except (SyntaxError, OverflowError, ValueError): self.showsyntaxerror(filename, source=source) From 7f16b4bd721aaa20809f20033857075e75a380d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 5 Aug 2025 10:28:32 +0200 Subject: [PATCH 4/6] improve tests --- Lib/test/test_pyrepl/test_interact.py | 62 +++++++++++++++++---------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/Lib/test/test_pyrepl/test_interact.py b/Lib/test/test_pyrepl/test_interact.py index 49ea556c831f36..8e0db45e00198b 100644 --- a/Lib/test/test_pyrepl/test_interact.py +++ b/Lib/test/test_pyrepl/test_interact.py @@ -5,7 +5,7 @@ from unittest.mock import patch from textwrap import dedent -from test.support import force_not_colorized +from test.support import captured_stdout, force_not_colorized from _pyrepl.console import InteractiveColoredConsole from _pyrepl.simple_interact import _more_lines @@ -134,11 +134,7 @@ def test_runsource_shows_syntax_error_for_failed_compilation(self): def test_runsource_shows_syntax_error_for_failed_symtable_checks(self): # Some checks cannot be performed by AST parsing only. # See https://github.com/python/cpython/issues/137376. - console = InteractiveColoredConsole() - source = "x = 1; global x; x = 2" - with patch.object(console, "showsyntaxerror") as mock_showsyntaxerror: - console.runsource(source) - mock_showsyntaxerror.assert_called_once() + self._test_runsource_error("x = 1; global x; x = 2") def test_runsource_survives_null_bytes(self): console = InteractiveColoredConsole() @@ -162,26 +158,46 @@ def test_no_active_future(self): self.assertEqual(f.getvalue(), "{'x': }\n") def test_future_annotations(self): - console = InteractiveColoredConsole() - source = dedent("""\ - from __future__ import annotations - def g(x: int): ... - print(g.__annotations__) - """) - f = io.StringIO() - with contextlib.redirect_stdout(f): - result = console.runsource(source) - self.assertFalse(result) - self.assertEqual(f.getvalue(), "{'x': 'int'}\n") + self._test_runsource_future( + "from __future__ import annotations", + ["def g(x: int): ...", "print(g.__annotations__)"], + "{'x': 'int'}\n", + ) def test_future_barry_as_flufl(self): + self._test_runsource_future( + "from __future__ import barry_as_FLUFL", + ["""print("black" <> 'blue')"""], + "True\n", + ) + + def _test_runsource_error(self, buggy_source): console = InteractiveColoredConsole() - f = io.StringIO() - with contextlib.redirect_stdout(f): - result = console.runsource("from __future__ import barry_as_FLUFL\n") - result = console.runsource("""print("black" <> 'blue')\n""") - self.assertFalse(result) - self.assertEqual(f.getvalue(), "True\n") + with patch.object(console, "showsyntaxerror") as handler: + result = console.runsource(buggy_source) + handler.assert_called_once() + + def _test_runsource_future(self, future_statement, statements, expected): + """Run future_statement + statements. + + This checks whether a standalone future statement remains active + for the entire session lifetime. + """ + with self.subTest("standalone source"): + console = InteractiveColoredConsole() + source = "\n".join([future_statement, *statements]) + with captured_stdout() as stdout: + result = console.runsource(source) + self.assertFalse(result) + self.assertEqual(stdout.getvalue(), expected) + + with self.subTest("__future__ executed separtely"): + console = InteractiveColoredConsole() + with captured_stdout() as stdout: + result = console.runsource(future_statement) + result = console.runsource("\n".join(statements)) + self.assertFalse(result) + self.assertEqual(stdout.getvalue(), expected) class TestMoreLines(unittest.TestCase): From 61f309b9314d82c603d9d7516c3cf3a0d9461f32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 5 Aug 2025 10:28:40 +0200 Subject: [PATCH 5/6] improve comments --- Lib/_pyrepl/console.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index 2b0b396cfea663..490d00c3ea9569 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -214,8 +214,15 @@ def runsource(self, source, filename="", symbol="single"): self.showsyntaxerror(filename, source=source) return False - # validate stuff that cannot be validated with AST parsing only - flags = self.compile.compiler.flags + # Validate stuff that cannot be validated with AST parsing only, + # such as assigning to a variable before a global declaration, + # + # While runsource("x = 1; global x") would fail, runsource("x = 1") + # followed by runsource("global x") would still work since preventing + # this requires the REPL to remember the global names whose number + # grows faster than in a regular program, which then becomes less + # efficient or relevant for the user. + flags = self.compile.compiler.flags # may contain active futures flags &= ~codeop.PyCF_DONT_IMPLY_DEDENT flags &= ~codeop.PyCF_ALLOW_INCOMPLETE_INPUT try: From 9cebe9d1fa225725e29e8e52a8db903a6e29fecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 5 Aug 2025 10:28:46 +0200 Subject: [PATCH 6/6] fix feature_version handling --- Modules/clinic/symtablemodule.c.h | 6 +++--- Modules/symtablemodule.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/clinic/symtablemodule.c.h b/Modules/clinic/symtablemodule.c.h index 9facec3fac44c9..a7f6ed0fcf459b 100644 --- a/Modules/clinic/symtablemodule.c.h +++ b/Modules/clinic/symtablemodule.c.h @@ -10,7 +10,7 @@ preserve PyDoc_STRVAR(_symtable_symtable__doc__, "symtable($module, source, filename, startstr, /, *, flags=0,\n" -" feature_version=0)\n" +" feature_version=-1)\n" "--\n" "\n" "Return symbol and scope dictionaries used internally by compiler."); @@ -60,7 +60,7 @@ _symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py PyObject *filename; const char *startstr; int flags = 0; - int feature_version = 0; + int feature_version = -1; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 3, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -106,4 +106,4 @@ _symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py exit: return return_value; } -/*[clinic end generated code: output=71571c035470d288 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f30a99893fa9de05 input=a9049054013a1b77]*/ diff --git a/Modules/symtablemodule.c b/Modules/symtablemodule.c index 9b94d6585b5916..c4ff5a1a08fc97 100644 --- a/Modules/symtablemodule.c +++ b/Modules/symtablemodule.c @@ -18,7 +18,7 @@ _symtable.symtable / * flags: int = 0 - feature_version: int = 0 + feature_version: int = -1 Return symbol and scope dictionaries used internally by compiler. [clinic start generated code]*/ @@ -27,7 +27,7 @@ static PyObject * _symtable_symtable_impl(PyObject *module, PyObject *source, PyObject *filename, const char *startstr, int flags, int feature_version) -/*[clinic end generated code: output=1e766ac3387e156a input=5ccfb94e8a19a975]*/ +/*[clinic end generated code: output=1e766ac3387e156a input=03e9deda7ab5a9d7]*/ { struct symtable *st; PyObject *t;