From a4da84285c21c90d10702f07d094d9096990f8c7 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 12 Sep 2024 22:42:49 +0100 Subject: [PATCH 1/9] gh-124022: Fix extrenous NOP when class docstring is removed in interactive mode --- Lib/test/test_compile.py | 18 ++++++++++++++++++ Python/codegen.c | 8 ++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index f22761f0a3af9f..2dd70665e3b841 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -926,6 +926,24 @@ class C: dis.dis(code) self.assertNotIn('NOP' , output.getvalue()) + @support.cpython_only + def test_docstring_removed_no_NOP_interactive_mode(self): + # See gh-124022 + src = textwrap.dedent(""" + class C: + "docstring" + x = 42 + """) + for opt in [-1, 0, 1, 2]: + with self.subTest(opt=opt): + code = compile(src, "", "single", optimize=opt) + # make sure the bytecode doesn't have any instruction + # on line 3, where the docstring is, but we have + # instructions for the assignment in line 4. + lines = [line for (_, _, line) in code.co_consts[0].co_lines()] + self.assertNotIn(3, lines) + self.assertIn(4, lines) + def test_dont_merge_constants(self): # Issue #25843: compile() must not merge constants which are equal # but have a different type. diff --git a/Python/codegen.c b/Python/codegen.c index 2ca5db1fc6ad34..b6fc944ec3fb4d 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -758,10 +758,10 @@ _PyCodegen_Body(compiler *c, location loc, asdl_stmt_seq *stmts) return SUCCESS; } Py_ssize_t first_instr = 0; - if (!IS_INTERACTIVE(c)) { - PyObject *docstring = _PyAST_GetDocString(stmts); - if (docstring) { - first_instr = 1; + PyObject *docstring = _PyAST_GetDocString(stmts); + if (docstring) { + first_instr = 1; + if (!IS_INTERACTIVE(c)) { /* set docstring */ assert(OPTIMIZATION_LEVEL(c) < 2); PyObject *cleandoc = _PyCompile_CleanDoc(docstring); From e3ed9f4b88dc733945a873b06d193eb8f8e84b69 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 21:53:32 +0000 Subject: [PATCH 2/9] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst new file mode 100644 index 00000000000000..f29e40d5309fdd --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst @@ -0,0 +1 @@ +Fix bug where a class that has a docstring, which is compiled in interactive mode, generates bytecode that contains a NOP with the line of the docstring. From 66269738f590f6c7ac7c00bea28b1f533d5ae153 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 12 Sep 2024 23:24:03 +0100 Subject: [PATCH 3/9] don't do anything different in interactive mode --- Lib/test/test_compile.py | 47 ++++++++++++++++++++-------------------- Python/codegen.c | 24 ++++++++++---------- 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 2dd70665e3b841..80d52b0ef8e0d3 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -902,6 +902,22 @@ def with_const_expression(): self.assertIsNone(ns['with_fstring'].__doc__) self.assertIsNone(ns['with_const_expression'].__doc__) + @support.cpython_only + def test_docstring_interactive_mode(self): + src = textwrap.dedent(""" + def with_docstring(): + "docstring" + """) + for opt in [0, 1, 2]: + with self.subTest(opt=opt): + code = compile(src, "", "single", optimize=opt) + ns = {} + exec(code, ns) + if opt < 2: + self.assertEqual(ns['with_docstring'].__doc__, "docstring") + else: + self.assertIsNone(ns['with_docstring'].__doc__) + @support.cpython_only def test_docstring_omitted(self): # See gh-115347 @@ -919,30 +935,13 @@ class C: return h """) for opt in [-1, 0, 1, 2]: - with self.subTest(opt=opt): - code = compile(src, "", "exec", optimize=opt) - output = io.StringIO() - with contextlib.redirect_stdout(output): - dis.dis(code) - self.assertNotIn('NOP' , output.getvalue()) - - @support.cpython_only - def test_docstring_removed_no_NOP_interactive_mode(self): - # See gh-124022 - src = textwrap.dedent(""" - class C: - "docstring" - x = 42 - """) - for opt in [-1, 0, 1, 2]: - with self.subTest(opt=opt): - code = compile(src, "", "single", optimize=opt) - # make sure the bytecode doesn't have any instruction - # on line 3, where the docstring is, but we have - # instructions for the assignment in line 4. - lines = [line for (_, _, line) in code.co_consts[0].co_lines()] - self.assertNotIn(3, lines) - self.assertIn(4, lines) + for mode in ["exec", "single"]: + with self.subTest(opt=opt, mode=mode): + code = compile(src, "", mode, optimize=opt) + output = io.StringIO() + with contextlib.redirect_stdout(output): + dis.dis(code) + self.assertNotIn('NOP' , output.getvalue()) def test_dont_merge_constants(self): # Issue #25843: compile() must not merge constants which are equal diff --git a/Python/codegen.c b/Python/codegen.c index b6fc944ec3fb4d..e55764430a8d5c 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -761,20 +761,18 @@ _PyCodegen_Body(compiler *c, location loc, asdl_stmt_seq *stmts) PyObject *docstring = _PyAST_GetDocString(stmts); if (docstring) { first_instr = 1; - if (!IS_INTERACTIVE(c)) { - /* set docstring */ - assert(OPTIMIZATION_LEVEL(c) < 2); - PyObject *cleandoc = _PyCompile_CleanDoc(docstring); - if (cleandoc == NULL) { - return ERROR; - } - stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); - assert(st->kind == Expr_kind); - location loc = LOC(st->v.Expr.value); - ADDOP_LOAD_CONST(c, loc, cleandoc); - Py_DECREF(cleandoc); - RETURN_IF_ERROR(codegen_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store)); + /* set docstring */ + assert(OPTIMIZATION_LEVEL(c) < 2); + PyObject *cleandoc = _PyCompile_CleanDoc(docstring); + if (cleandoc == NULL) { + return ERROR; } + stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); + assert(st->kind == Expr_kind); + location loc = LOC(st->v.Expr.value); + ADDOP_LOAD_CONST(c, loc, cleandoc); + Py_DECREF(cleandoc); + RETURN_IF_ERROR(codegen_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store)); } for (Py_ssize_t i = first_instr; i < asdl_seq_LEN(stmts); i++) { VISIT(c, stmt, (stmt_ty)asdl_seq_GET(stmts, i)); From db7f999061a78b90d8bbf0b3a554e2f44b76a100 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 12 Sep 2024 23:25:36 +0100 Subject: [PATCH 4/9] update news --- .../2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst index f29e40d5309fdd..f390c4dd346fb0 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst @@ -1 +1 @@ -Fix bug where a class that has a docstring, which is compiled in interactive mode, generates bytecode that contains a NOP with the line of the docstring. +Fix bug where docstring it removed from classes in interactive mode. From 16083e555a3669acad7763e3eeec76bf145a066f Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 12 Sep 2024 23:31:01 +0100 Subject: [PATCH 5/9] Apply suggestions from code review Co-authored-by: Jelle Zijlstra --- Lib/test/test_compile.py | 4 ++-- .../2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 80d52b0ef8e0d3..844eefff9386af 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -935,13 +935,13 @@ class C: return h """) for opt in [-1, 0, 1, 2]: - for mode in ["exec", "single"]: + for mode in ["exec", "single"]: with self.subTest(opt=opt, mode=mode): code = compile(src, "", mode, optimize=opt) output = io.StringIO() with contextlib.redirect_stdout(output): dis.dis(code) - self.assertNotIn('NOP' , output.getvalue()) + self.assertNotIn('NOP', output.getvalue()) def test_dont_merge_constants(self): # Issue #25843: compile() must not merge constants which are equal diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst index f390c4dd346fb0..90a77a5346d22b 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-12-21-53-26.gh-issue-124022.fQzUiW.rst @@ -1 +1 @@ -Fix bug where docstring it removed from classes in interactive mode. +Fix bug where docstring is removed from classes in interactive mode. From 0535265f8c8f813d294eede2a5a2460405702688 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 12 Sep 2024 23:34:25 +0100 Subject: [PATCH 6/9] test both func and class --- Lib/test/test_compile.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 844eefff9386af..736eff35c1d5f2 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -904,19 +904,25 @@ def with_const_expression(): @support.cpython_only def test_docstring_interactive_mode(self): - src = textwrap.dedent(""" - def with_docstring(): + srcs = [ + """def with_docstring(): "docstring" - """) + """, + """class with_docstring: + "docstring" + """, + ] + for opt in [0, 1, 2]: - with self.subTest(opt=opt): - code = compile(src, "", "single", optimize=opt) - ns = {} - exec(code, ns) - if opt < 2: - self.assertEqual(ns['with_docstring'].__doc__, "docstring") - else: - self.assertIsNone(ns['with_docstring'].__doc__) + for src in srcs: + with self.subTest(opt=opt, src=src): + code = compile(textwrap.dedent(src), "", "single", optimize=opt) + ns = {} + exec(code, ns) + if opt < 2: + self.assertEqual(ns['with_docstring'].__doc__, "docstring") + else: + self.assertIsNone(ns['with_docstring'].__doc__) @support.cpython_only def test_docstring_omitted(self): From 53a9e48f516772425b5d3df1af74b2c4dc44a69e Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Thu, 12 Sep 2024 22:35:07 -0700 Subject: [PATCH 7/9] Fix bug where string literals in REPL got interpreted as docstrings --- Include/internal/pycore_compile.h | 3 ++- Python/codegen.c | 37 ++++++++++++++++++------------- Python/compile.c | 4 ++-- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h index 51db6fc608caf7..5359f2d650e2a2 100644 --- a/Include/internal/pycore_compile.h +++ b/Include/internal/pycore_compile.h @@ -172,7 +172,8 @@ int _PyCompile_AddDeferredAnnotaion(struct _PyCompiler *c, stmt_ty s); int _PyCodegen_AddReturnAtEnd(struct _PyCompiler *c, int addNone); int _PyCodegen_EnterAnonymousScope(struct _PyCompiler* c, mod_ty mod); int _PyCodegen_Expression(struct _PyCompiler *c, expr_ty e); -int _PyCodegen_Body(struct _PyCompiler *c, _Py_SourceLocation loc, asdl_stmt_seq *stmts); +int _PyCodegen_Body(struct _PyCompiler *c, _Py_SourceLocation loc, asdl_stmt_seq *stmts, + bool is_interactive); /* Utility for a number of growing arrays used in the compiler */ int _PyCompile_EnsureArrayLargeEnough( diff --git a/Python/codegen.c b/Python/codegen.c index e55764430a8d5c..ee83aa41c55c93 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -746,7 +746,7 @@ _PyCodegen_Expression(compiler *c, expr_ty e) and for annotations. */ int -_PyCodegen_Body(compiler *c, location loc, asdl_stmt_seq *stmts) +_PyCodegen_Body(compiler *c, location loc, asdl_stmt_seq *stmts, bool is_interactive) { /* If from __future__ import annotations is active, * every annotated class and module should have __annotations__. @@ -758,21 +758,26 @@ _PyCodegen_Body(compiler *c, location loc, asdl_stmt_seq *stmts) return SUCCESS; } Py_ssize_t first_instr = 0; - PyObject *docstring = _PyAST_GetDocString(stmts); - if (docstring) { - first_instr = 1; - /* set docstring */ - assert(OPTIMIZATION_LEVEL(c) < 2); - PyObject *cleandoc = _PyCompile_CleanDoc(docstring); - if (cleandoc == NULL) { - return ERROR; + // If this is a top-level interactive statement, we don't want to + // extract a docstring, because then writing a string literal in the REPL + // would get interpreted as a docstring. + if (!is_interactive) { + PyObject *docstring = _PyAST_GetDocString(stmts); + if (docstring) { + first_instr = 1; + /* set docstring */ + assert(OPTIMIZATION_LEVEL(c) < 2); + PyObject *cleandoc = _PyCompile_CleanDoc(docstring); + if (cleandoc == NULL) { + return ERROR; + } + stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); + assert(st->kind == Expr_kind); + location loc = LOC(st->v.Expr.value); + ADDOP_LOAD_CONST(c, loc, cleandoc); + Py_DECREF(cleandoc); + RETURN_IF_ERROR(codegen_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store)); } - stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); - assert(st->kind == Expr_kind); - location loc = LOC(st->v.Expr.value); - ADDOP_LOAD_CONST(c, loc, cleandoc); - Py_DECREF(cleandoc); - RETURN_IF_ERROR(codegen_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store)); } for (Py_ssize_t i = first_instr; i < asdl_seq_LEN(stmts); i++) { VISIT(c, stmt, (stmt_ty)asdl_seq_GET(stmts, i)); @@ -1430,7 +1435,7 @@ codegen_class_body(compiler *c, stmt_ty s, int firstlineno) ADDOP_N_IN_SCOPE(c, loc, STORE_DEREF, &_Py_ID(__classdict__), cellvars); } /* compile the body proper */ - RETURN_IF_ERROR_IN_SCOPE(c, _PyCodegen_Body(c, loc, s->v.ClassDef.body)); + RETURN_IF_ERROR_IN_SCOPE(c, _PyCodegen_Body(c, loc, s->v.ClassDef.body, false)); PyObject *static_attributes = _PyCompile_StaticAttributesAsTuple(c); if (static_attributes == NULL) { _PyCompile_ExitScope(c); diff --git a/Python/compile.c b/Python/compile.c index d54c320babc2b7..e1d2c30944d132 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -790,13 +790,13 @@ compiler_codegen(compiler *c, mod_ty mod) switch (mod->kind) { case Module_kind: { asdl_stmt_seq *stmts = mod->v.Module.body; - RETURN_IF_ERROR(_PyCodegen_Body(c, start_location(stmts), stmts)); + RETURN_IF_ERROR(_PyCodegen_Body(c, start_location(stmts), stmts, false)); break; } case Interactive_kind: { c->c_interactive = 1; asdl_stmt_seq *stmts = mod->v.Interactive.body; - RETURN_IF_ERROR(_PyCodegen_Body(c, start_location(stmts), stmts)); + RETURN_IF_ERROR(_PyCodegen_Body(c, start_location(stmts), stmts, true)); break; } case Expression_kind: { From 6de4f83281cce0f940425eaaeb6610a2545e908d Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 13 Sep 2024 10:05:33 +0100 Subject: [PATCH 8/9] shorter comment --- Python/codegen.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Python/codegen.c b/Python/codegen.c index ee83aa41c55c93..7062042b49c1ad 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -758,10 +758,7 @@ _PyCodegen_Body(compiler *c, location loc, asdl_stmt_seq *stmts, bool is_interac return SUCCESS; } Py_ssize_t first_instr = 0; - // If this is a top-level interactive statement, we don't want to - // extract a docstring, because then writing a string literal in the REPL - // would get interpreted as a docstring. - if (!is_interactive) { + if (!is_interactive) { /* A string literal typed on the REPL prompt is not a docstring */ PyObject *docstring = _PyAST_GetDocString(stmts); if (docstring) { first_instr = 1; From 5f4c9402b15b94dd1154638341a1909cb0f8c581 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:13:00 +0100 Subject: [PATCH 9/9] even shorter --- Python/codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/codegen.c b/Python/codegen.c index 7062042b49c1ad..5565d3011c465a 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -758,7 +758,7 @@ _PyCodegen_Body(compiler *c, location loc, asdl_stmt_seq *stmts, bool is_interac return SUCCESS; } Py_ssize_t first_instr = 0; - if (!is_interactive) { /* A string literal typed on the REPL prompt is not a docstring */ + if (!is_interactive) { /* A string literal on REPL prompt is not a docstring */ PyObject *docstring = _PyAST_GetDocString(stmts); if (docstring) { first_instr = 1;