From 9748fb3867d9962d4cf3c37c371afe44c1f6f4fa Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 7 May 2025 18:47:42 +0300 Subject: [PATCH 001/989] Python 3.15.0a0 --- .github/ISSUE_TEMPLATE/bug.yml | 1 + .github/ISSUE_TEMPLATE/crash.yml | 1 + Doc/tutorial/interpreter.rst | 6 +- Doc/tutorial/stdlib.rst | 2 +- Doc/tutorial/stdlib2.rst | 2 +- Doc/whatsnew/3.15.rst | 156 +++++++++++++++++++++++++++++++ Include/patchlevel.h | 8 +- PC/pyconfig.h.in | 8 +- PCbuild/rt.bat | 2 +- README.rst | 4 +- configure.ac | 2 +- 11 files changed, 175 insertions(+), 17 deletions(-) create mode 100644 Doc/whatsnew/3.15.rst diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml index 7b7810cf6965fd..da70710b7ecfa3 100644 --- a/.github/ISSUE_TEMPLATE/bug.yml +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -40,6 +40,7 @@ body: - "3.12" - "3.13" - "3.14" + - "3.15" - "CPython main branch" validations: required: true diff --git a/.github/ISSUE_TEMPLATE/crash.yml b/.github/ISSUE_TEMPLATE/crash.yml index 58da2dfe0c7354..470ad581367b10 100644 --- a/.github/ISSUE_TEMPLATE/crash.yml +++ b/.github/ISSUE_TEMPLATE/crash.yml @@ -33,6 +33,7 @@ body: - "3.12" - "3.13" - "3.14" + - "3.15" - "CPython main branch" validations: required: true diff --git a/Doc/tutorial/interpreter.rst b/Doc/tutorial/interpreter.rst index 02e7de77322e99..cd52607142485e 100644 --- a/Doc/tutorial/interpreter.rst +++ b/Doc/tutorial/interpreter.rst @@ -16,7 +16,7 @@ Unix shell's search path makes it possible to start it by typing the command: .. code-block:: text - python3.14 + python3.15 to the shell. [#]_ Since the choice of the directory where the interpreter lives is an installation option, other places are possible; check with your local @@ -97,8 +97,8 @@ before printing the first prompt: .. code-block:: shell-session - $ python3.14 - Python 3.14 (default, April 4 2024, 09:25:04) + $ python3.15 + Python 3.15 (default, May 7 2025, 15:46:04) [GCC 10.2.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> diff --git a/Doc/tutorial/stdlib.rst b/Doc/tutorial/stdlib.rst index 4b3eef313e76d7..d83ecca270b160 100644 --- a/Doc/tutorial/stdlib.rst +++ b/Doc/tutorial/stdlib.rst @@ -15,7 +15,7 @@ operating system:: >>> import os >>> os.getcwd() # Return the current working directory - 'C:\\Python314' + 'C:\\Python315' >>> os.chdir('/server/accesslogs') # Change current working directory >>> os.system('mkdir today') # Run the command mkdir in the system shell 0 diff --git a/Doc/tutorial/stdlib2.rst b/Doc/tutorial/stdlib2.rst index a2f96b34b2dead..678b71c9274c1c 100644 --- a/Doc/tutorial/stdlib2.rst +++ b/Doc/tutorial/stdlib2.rst @@ -279,7 +279,7 @@ applications include caching objects that are expensive to create:: Traceback (most recent call last): File "", line 1, in d['primary'] # entry was automatically removed - File "C:/python314/lib/weakref.py", line 46, in __getitem__ + File "C:/python315/lib/weakref.py", line 46, in __getitem__ o = self.data[key]() KeyError: 'primary' diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst new file mode 100644 index 00000000000000..41b2685d5d30fe --- /dev/null +++ b/Doc/whatsnew/3.15.rst @@ -0,0 +1,156 @@ + +**************************** + What's new in Python 3.15 +**************************** + +:Editor: TBD + +.. Rules for maintenance: + + * Anyone can add text to this document. Do not spend very much time + on the wording of your changes, because your text will probably + get rewritten to some degree. + + * The maintainer will go through Misc/NEWS periodically and add + changes; it's therefore more important to add your changes to + Misc/NEWS than to this file. + + * This is not a complete list of every single change; completeness + is the purpose of Misc/NEWS. Some changes I consider too small + or esoteric to include. If such a change is added to the text, + I'll just remove it. (This is another reason you shouldn't spend + too much time on writing your addition.) + + * If you want to draw your new text to the attention of the + maintainer, add 'XXX' to the beginning of the paragraph or + section. + + * It's OK to just add a fragmentary note about a change. For + example: "XXX Describe the transmogrify() function added to the + socket module." The maintainer will research the change and + write the necessary text. + + * You can comment out your additions if you like, but it's not + necessary (especially when a final release is some months away). + + * Credit the author of a patch or bugfix. Just the name is + sufficient; the e-mail address isn't necessary. + + * It's helpful to add the issue number as a comment: + + XXX Describe the transmogrify() function added to the socket + module. + (Contributed by P.Y. Developer in :gh:`12345`.) + + This saves the maintainer the effort of going through the VCS log + when researching a change. + +This article explains the new features in Python 3.15, compared to 3.14. + +For full details, see the :ref:`changelog `. + +.. note:: + + Prerelease users should be aware that this document is currently in draft + form. It will be updated substantially as Python 3.15 moves towards release, + so it's worth checking back even after reading earlier versions. + + +Summary --- release highlights +============================== + +.. This section singles out the most important changes in Python 3.15. + Brevity is key. + + +.. PEP-sized items next. + + + +New features +============ + + + +Other language changes +====================== + + + +New modules +=========== + +* None yet. + + +Improved modules +================ + +module_name +----------- + +* TODO + +.. Add improved modules above alphabetically, not here at the end. + +Optimizations +============= + +module_name +----------- + +* TODO + + + +Deprecated +========== + +* module_name: + TODO + + +.. Add deprecations above alphabetically, not here at the end. + +Removed +======= + +module_name +----------- + +* TODO + + +Porting to Python 3.15 +====================== + +This section lists previously described changes and other bugfixes +that may require changes to your code. + + +Build changes +============= + + +C API changes +============= + +New features +------------ + +* TODO +Porting to Python 3.15 +---------------------- + +* TODO + +Deprecated C APIs +----------------- + +* TODO + +.. Add C API deprecations above alphabetically, not here at the end. + +Removed C APIs +-------------- + diff --git a/Include/patchlevel.h b/Include/patchlevel.h index c9dd659046a367..532873b51e65bb 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -18,13 +18,13 @@ /* Version parsed out into numeric values */ /*--start constants--*/ #define PY_MAJOR_VERSION 3 -#define PY_MINOR_VERSION 14 +#define PY_MINOR_VERSION 15 #define PY_MICRO_VERSION 0 -#define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_BETA -#define PY_RELEASE_SERIAL 1 +#define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_ALPHA +#define PY_RELEASE_SERIAL 0 /* Version as a string */ -#define PY_VERSION "3.14.0b1" +#define PY_VERSION "3.15.0a0" /*--end constants--*/ diff --git a/PC/pyconfig.h.in b/PC/pyconfig.h.in index 9e70303868e5de..bbafaed808eecd 100644 --- a/PC/pyconfig.h.in +++ b/PC/pyconfig.h.in @@ -320,19 +320,19 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */ the linking is explicitly handled */ # if defined(Py_GIL_DISABLED) # if defined(_DEBUG) -# pragma comment(lib,"python314t_d.lib") +# pragma comment(lib,"python315t_d.lib") # elif defined(Py_LIMITED_API) # pragma comment(lib,"python3t.lib") # else -# pragma comment(lib,"python314t.lib") +# pragma comment(lib,"python315t.lib") # endif /* _DEBUG */ # else /* Py_GIL_DISABLED */ # if defined(_DEBUG) -# pragma comment(lib,"python314_d.lib") +# pragma comment(lib,"python315_d.lib") # elif defined(Py_LIMITED_API) # pragma comment(lib,"python3.lib") # else -# pragma comment(lib,"python314.lib") +# pragma comment(lib,"python315.lib") # endif /* _DEBUG */ # endif /* Py_GIL_DISABLED */ # endif /* _MSC_VER && !Py_NO_LINK_LIB */ diff --git a/PCbuild/rt.bat b/PCbuild/rt.bat index c436215780fda0..f1e0607393405b 100644 --- a/PCbuild/rt.bat +++ b/PCbuild/rt.bat @@ -42,7 +42,7 @@ if "%~1"=="-O" (set dashO=-O) & shift & goto CheckOpts if "%~1"=="-q" (set qmode=yes) & shift & goto CheckOpts if "%~1"=="-d" (set suffix=_d) & shift & goto CheckOpts rem HACK: Need some way to infer the version number in this script -if "%~1"=="--disable-gil" (set pyname=python3.14t) & shift & goto CheckOpts +if "%~1"=="--disable-gil" (set pyname=python3.15t) & shift & goto CheckOpts if "%~1"=="-win32" (set prefix=%pcbuild%win32) & shift & goto CheckOpts if "%~1"=="-x64" (set prefix=%pcbuild%amd64) & shift & goto CheckOpts if "%~1"=="-amd64" (set prefix=%pcbuild%amd64) & shift & goto CheckOpts diff --git a/README.rst b/README.rst index 897d8995b63d9b..5bad7ea1c36dda 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,5 @@ -This is Python version 3.14.0 beta 1 -==================================== +This is Python version 3.15.0 alpha 0 +===================================== .. image:: https://github.com/python/cpython/actions/workflows/build.yml/badge.svg?branch=main&event=push :alt: CPython build status on GitHub Actions diff --git a/configure.ac b/configure.ac index a7b2f62579b0e9..233f19a82f87c8 100644 --- a/configure.ac +++ b/configure.ac @@ -10,7 +10,7 @@ dnl to regenerate the configure script. dnl # Set VERSION so we only need to edit in one place (i.e., here) -m4_define([PYTHON_VERSION], [3.14]) +m4_define([PYTHON_VERSION], [3.15]) AC_PREREQ([2.72]) From 77b14a6d58e527f915966446eb0866652a46feb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Wed, 7 May 2025 17:49:49 +0200 Subject: [PATCH 002/989] gh-69426: HTMLParser: only unescape properly terminated character entities in attribute values (GH-95215) According to the HTML5 spec, named character references in attribute values should only be processed if they are not followed by an ASCII alphanumeric, or an equals sign. https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state --- Lib/html/parser.py | 20 ++++++++- Lib/test/test_htmlparser.py | 43 +++++++++++++++---- ...2-07-24-20-56-32.gh-issue-69426.unccw7.rst | 3 ++ 3 files changed, 57 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-07-24-20-56-32.gh-issue-69426.unccw7.rst diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 13c95c34e505c8..0a1dd3b7d3bfd2 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -12,6 +12,7 @@ import _markupbase from html import unescape +from html.entities import html5 as html5_entities __all__ = ['HTMLParser'] @@ -23,6 +24,7 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') +attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?') starttagopen = re.compile('<[a-zA-Z]') piclose = re.compile('>') @@ -57,6 +59,22 @@ # ') +# Character reference processing logic specific to attribute values +# See: https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state +def _replace_attr_charref(match): + ref = match.group(0) + # Numeric / hex char refs must always be unescaped + if ref.startswith('&#'): + return unescape(ref) + # Named character / entity references must only be unescaped + # if they are an exact match, and they are not followed by an equals sign + if not ref.endswith('=') and ref[1:] in html5_entities: + return unescape(ref) + # Otherwise do not unescape + return ref + +def _unescape_attrvalue(s): + return attr_charref.sub(_replace_attr_charref, s) class HTMLParser(_markupbase.ParserBase): @@ -323,7 +341,7 @@ def parse_starttag(self, i): attrvalue[:1] == '"' == attrvalue[-1:]: attrvalue = attrvalue[1:-1] if attrvalue: - attrvalue = unescape(attrvalue) + attrvalue = _unescape_attrvalue(attrvalue) attrs.append((attrname.lower(), attrvalue)) k = m.end() diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index b42a611c62c0aa..4fdba06cf4cc92 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -348,18 +348,16 @@ def test_convert_charrefs(self): collector = lambda: EventCollectorCharrefs() self.assertTrue(collector().convert_charrefs) charrefs = ['"', '"', '"', '"', '"', '"'] - # check charrefs in the middle of the text/attributes - expected = [('starttag', 'a', [('href', 'foo"zar')]), - ('data', 'a"z'), ('endtag', 'a')] + # check charrefs in the middle of the text + expected = [('starttag', 'a', []), ('data', 'a"z'), ('endtag', 'a')] for charref in charrefs: - self._run_check('a{0}z'.format(charref), + self._run_check('a{0}z'.format(charref), expected, collector=collector()) - # check charrefs at the beginning/end of the text/attributes - expected = [('data', '"'), - ('starttag', 'a', [('x', '"'), ('y', '"X'), ('z', 'X"')]), + # check charrefs at the beginning/end of the text + expected = [('data', '"'), ('starttag', 'a', []), ('data', '"'), ('endtag', 'a'), ('data', '"')] for charref in charrefs: - self._run_check('{0}' + self._run_check('{0}' '{0}{0}'.format(charref), expected, collector=collector()) # check charrefs in - {% endif %} - - {% if builder != "htmlhelp" %} + {% endif %} + {% if pagename == 'whatsnew/changelog' and not embedded %} {% endif %} {% endif %} From 17d0fec702e43e27a9c3f37f8abe06a19b656bed Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 14 May 2025 20:16:07 +0300 Subject: [PATCH 143/989] Improve tests for str to Fraction conversion (GH-134010) --- Lib/test/test_fractions.py | 207 +++++++++++++++++-------------------- 1 file changed, 97 insertions(+), 110 deletions(-) diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index 84faa63606439e..37de3ad380e633 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -1,7 +1,7 @@ """Tests for Lib/fractions.py.""" from decimal import Decimal -from test.support import requires_IEEE_754 +from test.support import requires_IEEE_754, adjust_int_max_str_digits import math import numbers import operator @@ -395,12 +395,14 @@ class B(metaclass=M): def testFromString(self): self.assertEqual((5, 1), _components(F("5"))) + self.assertEqual((5, 1), _components(F("005"))) self.assertEqual((3, 2), _components(F("3/2"))) self.assertEqual((3, 2), _components(F("3 / 2"))) self.assertEqual((3, 2), _components(F(" \n +3/2"))) self.assertEqual((-3, 2), _components(F("-3/2 "))) - self.assertEqual((13, 2), _components(F(" 013/02 \n "))) + self.assertEqual((13, 2), _components(F(" 0013/002 \n "))) self.assertEqual((16, 5), _components(F(" 3.2 "))) + self.assertEqual((16, 5), _components(F("003.2"))) self.assertEqual((-16, 5), _components(F(" -3.2 "))) self.assertEqual((-3, 1), _components(F(" -3. "))) self.assertEqual((3, 5), _components(F(" .6 "))) @@ -419,116 +421,101 @@ def testFromString(self): self.assertRaisesMessage( ZeroDivisionError, "Fraction(3, 0)", F, "3/0") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '3/'", - F, "3/") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '/2'", - F, "/2") - self.assertRaisesMessage( - # Denominators don't need a sign. - ValueError, "Invalid literal for Fraction: '3/+2'", - F, "3/+2") - self.assertRaisesMessage( - # Imitate float's parsing. - ValueError, "Invalid literal for Fraction: '+ 3/2'", - F, "+ 3/2") - self.assertRaisesMessage( - # Avoid treating '.' as a regex special character. - ValueError, "Invalid literal for Fraction: '3a2'", - F, "3a2") - self.assertRaisesMessage( - # Don't accept combinations of decimals and rationals. - ValueError, "Invalid literal for Fraction: '3/7.2'", - F, "3/7.2") - self.assertRaisesMessage( - # Don't accept combinations of decimals and rationals. - ValueError, "Invalid literal for Fraction: '3.2/7'", - F, "3.2/7") - self.assertRaisesMessage( - # Allow 3. and .3, but not . - ValueError, "Invalid literal for Fraction: '.'", - F, ".") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '_'", - F, "_") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '_1'", - F, "_1") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1__2'", - F, "1__2") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '/_'", - F, "/_") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1_/'", - F, "1_/") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '_1/'", - F, "_1/") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1__2/'", - F, "1__2/") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1/_'", - F, "1/_") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1/_1'", - F, "1/_1") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1/1__2'", - F, "1/1__2") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1._111'", - F, "1._111") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1.1__1'", - F, "1.1__1") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1.1e+_1'", - F, "1.1e+_1") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1.1e+1__1'", - F, "1.1e+1__1") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '123.dd'", - F, "123.dd") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '123.5_dd'", - F, "123.5_dd") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: 'dd.5'", - F, "dd.5") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '7_dd'", - F, "7_dd") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1/dd'", - F, "1/dd") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1/123_dd'", - F, "1/123_dd") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '789edd'", - F, "789edd") - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '789e2_dd'", - F, "789e2_dd") + + def check_invalid(s): + msg = "Invalid literal for Fraction: " + repr(s) + self.assertRaisesMessage(ValueError, msg, F, s) + + check_invalid("3/") + check_invalid("/2") + # Denominators don't need a sign. + check_invalid("3/+2") + check_invalid("3/-2") + # Imitate float's parsing. + check_invalid("+ 3/2") + check_invalid("- 3/2") + # Avoid treating '.' as a regex special character. + check_invalid("3a2") + # Don't accept combinations of decimals and rationals. + check_invalid("3/7.2") + check_invalid("3.2/7") + # No space around dot. + check_invalid("3 .2") + check_invalid("3. 2") + # No space around e. + check_invalid("3.2 e1") + check_invalid("3.2e 1") + # Fractional part don't need a sign. + check_invalid("3.+2") + check_invalid("3.-2") + # Only accept base 10. + check_invalid("0x10") + check_invalid("0x10/1") + check_invalid("1/0x10") + check_invalid("0x10.") + check_invalid("0x10.1") + check_invalid("1.0x10") + check_invalid("1.0e0x10") + # Only accept decimal digits. + check_invalid("³") + check_invalid("³/2") + check_invalid("3/²") + check_invalid("³.2") + check_invalid("3.²") + check_invalid("3.2e²") + check_invalid("¼") + # Allow 3. and .3, but not . + check_invalid(".") + check_invalid("_") + check_invalid("_1") + check_invalid("1__2") + check_invalid("/_") + check_invalid("1_/") + check_invalid("_1/") + check_invalid("1__2/") + check_invalid("1/_") + check_invalid("1/_1") + check_invalid("1/1__2") + check_invalid("1._111") + check_invalid("1.1__1") + check_invalid("1.1e+_1") + check_invalid("1.1e+1__1") + check_invalid("123.dd") + check_invalid("123.5_dd") + check_invalid("dd.5") + check_invalid("7_dd") + check_invalid("1/dd") + check_invalid("1/123_dd") + check_invalid("789edd") + check_invalid("789e2_dd") # Test catastrophic backtracking. val = "9"*50 + "_" - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '" + val + "'", - F, val) - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1/" + val + "'", - F, "1/" + val) - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1." + val + "'", - F, "1." + val) - self.assertRaisesMessage( - ValueError, "Invalid literal for Fraction: '1.1+e" + val + "'", - F, "1.1+e" + val) + check_invalid(val) + check_invalid("1/" + val) + check_invalid("1." + val) + check_invalid("." + val) + check_invalid("1.1+e" + val) + check_invalid("1.1e" + val) + + def test_limit_int(self): + maxdigits = 5000 + with adjust_int_max_str_digits(maxdigits): + val = '1' * maxdigits + num = (10**maxdigits - 1)//9 + self.assertEqual((num, 1), _components(F(val))) + self.assertRaises(ValueError, F, val + '1') + self.assertEqual((num, 2), _components(F(val + '/2'))) + self.assertRaises(ValueError, F, val + '1/2') + self.assertEqual((1, num), _components(F('1/' + val))) + self.assertRaises(ValueError, F, '1/1' + val) + self.assertEqual(((10**(maxdigits+1) - 1)//9, 10**maxdigits), + _components(F('1.' + val))) + self.assertRaises(ValueError, F, '1.1' + val) + self.assertEqual((num, 10**maxdigits), _components(F('.' + val))) + self.assertRaises(ValueError, F, '.1' + val) + self.assertRaises(ValueError, F, '1.1e1' + val) + self.assertEqual((11, 10), _components(F('1.1e' + '0' * maxdigits))) + self.assertRaises(ValueError, F, '1.1e' + '0' * (maxdigits+1)) def testImmutable(self): r = F(7, 3) From ffaeb3dddf17b891301e6f172c87267f59784d00 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Thu, 15 May 2025 05:49:35 +1200 Subject: [PATCH 144/989] gh-127081: add critical sections to `dbm` objects (gh-132749) --- ...-04-21-01-05-14.gh-issue-127081.Egrpq7.rst | 2 + Modules/_dbmmodule.c | 76 ++++++++++++++--- Modules/_gdbmmodule.c | 85 ++++++++++++++++--- Modules/clinic/_dbmmodule.c.h | 37 ++++++-- Modules/clinic/_gdbmmodule.c.h | 70 ++++++++++++--- 5 files changed, 229 insertions(+), 41 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-21-01-05-14.gh-issue-127081.Egrpq7.rst diff --git a/Misc/NEWS.d/next/Library/2025-04-21-01-05-14.gh-issue-127081.Egrpq7.rst b/Misc/NEWS.d/next/Library/2025-04-21-01-05-14.gh-issue-127081.Egrpq7.rst new file mode 100644 index 00000000000000..30643673bf9a3f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-21-01-05-14.gh-issue-127081.Egrpq7.rst @@ -0,0 +1,2 @@ +Fix libc thread safety issues with :mod:`dbm` by performing stateful +operations in critical sections. diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c index cc65cbd98d71dc..0cd0f043de453d 100644 --- a/Modules/_dbmmodule.c +++ b/Modules/_dbmmodule.c @@ -69,6 +69,7 @@ typedef struct { #include "clinic/_dbmmodule.c.h" #define check_dbmobject_open(v, err) \ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED((v)) \ if ((v)->di_dbm == NULL) { \ PyErr_SetString(err, "DBM object has already been closed"); \ return NULL; \ @@ -116,7 +117,7 @@ dbm_dealloc(PyObject *self) } static Py_ssize_t -dbm_length(PyObject *self) +dbm_length_lock_held(PyObject *self) { dbmobject *dp = dbmobject_CAST(self); _dbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); @@ -138,8 +139,18 @@ dbm_length(PyObject *self) return dp->di_size; } +static Py_ssize_t +dbm_length(PyObject *self) +{ + Py_ssize_t result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_length_lock_held(self); + Py_END_CRITICAL_SECTION(); + return result; +} + static int -dbm_bool(PyObject *self) +dbm_bool_lock_held(PyObject *self) { dbmobject *dp = dbmobject_CAST(self); _dbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); @@ -170,8 +181,18 @@ dbm_bool(PyObject *self) return 1; } +static int +dbm_bool(PyObject *self) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_bool_lock_held(self); + Py_END_CRITICAL_SECTION(); + return result; +} + static PyObject * -dbm_subscript(PyObject *self, PyObject *key) +dbm_subscript_lock_held(PyObject *self, PyObject *key) { datum drec, krec; Py_ssize_t tmp_size; @@ -197,8 +218,18 @@ dbm_subscript(PyObject *self, PyObject *key) return PyBytes_FromStringAndSize(drec.dptr, drec.dsize); } +static PyObject * +dbm_subscript(PyObject *self, PyObject *key) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_subscript_lock_held(self, key); + Py_END_CRITICAL_SECTION(); + return result; +} + static int -dbm_ass_sub(PyObject *self, PyObject *v, PyObject *w) +dbm_ass_sub_lock_held(PyObject *self, PyObject *v, PyObject *w) { datum krec, drec; Py_ssize_t tmp_size; @@ -252,7 +283,18 @@ dbm_ass_sub(PyObject *self, PyObject *v, PyObject *w) return 0; } +static int +dbm_ass_sub(PyObject *self, PyObject *v, PyObject *w) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_ass_sub_lock_held(self, v, w); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] +@critical_section _dbm.dbm.close Close the database. @@ -260,7 +302,7 @@ Close the database. static PyObject * _dbm_dbm_close_impl(dbmobject *self) -/*[clinic end generated code: output=c8dc5b6709600b86 input=046db72377d51be8]*/ +/*[clinic end generated code: output=c8dc5b6709600b86 input=4a94f79facbc28ca]*/ { if (self->di_dbm) { dbm_close(self->di_dbm); @@ -270,6 +312,7 @@ _dbm_dbm_close_impl(dbmobject *self) } /*[clinic input] +@critical_section _dbm.dbm.keys cls: defining_class @@ -279,7 +322,7 @@ Return a list of all keys in the database. static PyObject * _dbm_dbm_keys_impl(dbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=f2a593b3038e5996 input=d3706a28fc051097]*/ +/*[clinic end generated code: output=f2a593b3038e5996 input=6ddefeadf2a80156]*/ { PyObject *v, *item; datum key; @@ -310,7 +353,7 @@ _dbm_dbm_keys_impl(dbmobject *self, PyTypeObject *cls) } static int -dbm_contains(PyObject *self, PyObject *arg) +dbm_contains_lock_held(PyObject *self, PyObject *arg) { dbmobject *dp = dbmobject_CAST(self); datum key, val; @@ -343,7 +386,18 @@ dbm_contains(PyObject *self, PyObject *arg) return val.dptr != NULL; } +static int +dbm_contains(PyObject *self, PyObject *arg) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_contains_lock_held(self, arg); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] +@critical_section _dbm.dbm.get cls: defining_class key: str(accept={str, robuffer}, zeroes=True) @@ -356,7 +410,7 @@ Return the value for key if present, otherwise default. static PyObject * _dbm_dbm_get_impl(dbmobject *self, PyTypeObject *cls, const char *key, Py_ssize_t key_length, PyObject *default_value) -/*[clinic end generated code: output=b4e55f8b6d482bc4 input=66b993b8349fa8c1]*/ +/*[clinic end generated code: output=b4e55f8b6d482bc4 input=1d88a22bb5e55202]*/ { datum dbm_key, val; _dbm_state *state = PyType_GetModuleState(cls); @@ -373,6 +427,7 @@ _dbm_dbm_get_impl(dbmobject *self, PyTypeObject *cls, const char *key, } /*[clinic input] +@critical_section _dbm.dbm.setdefault cls: defining_class key: str(accept={str, robuffer}, zeroes=True) @@ -387,7 +442,7 @@ If key is not in the database, it is inserted with default as the value. static PyObject * _dbm_dbm_setdefault_impl(dbmobject *self, PyTypeObject *cls, const char *key, Py_ssize_t key_length, PyObject *default_value) -/*[clinic end generated code: output=9c2f6ea6d0fb576c input=126a3ff15c5f8232]*/ +/*[clinic end generated code: output=9c2f6ea6d0fb576c input=c01510ef7571e13b]*/ { datum dbm_key, val; Py_ssize_t tmp_size; @@ -427,6 +482,7 @@ _dbm_dbm_setdefault_impl(dbmobject *self, PyTypeObject *cls, const char *key, } /*[clinic input] +@critical_section _dbm.dbm.clear cls: defining_class / @@ -436,7 +492,7 @@ Remove all items from the database. static PyObject * _dbm_dbm_clear_impl(dbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=8d126b9e1d01a434 input=43aa6ca1acb7f5f5]*/ +/*[clinic end generated code: output=8d126b9e1d01a434 input=a1aa5d99adfb9656]*/ { _dbm_state *state = PyType_GetModuleState(cls); assert(state != NULL); diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c index ab2ebdba9249bf..9c402e20e513b9 100644 --- a/Modules/_gdbmmodule.c +++ b/Modules/_gdbmmodule.c @@ -81,6 +81,7 @@ typedef struct { #include "clinic/_gdbmmodule.c.h" #define check_gdbmobject_open(v, err) \ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED((v)) \ if ((v)->di_dbm == NULL) { \ PyErr_SetString(err, "GDBM object has already been closed"); \ return NULL; \ @@ -142,7 +143,7 @@ gdbm_dealloc(PyObject *op) } static Py_ssize_t -gdbm_length(PyObject *op) +gdbm_length_lock_held(PyObject *op) { gdbmobject *dp = _gdbmobject_CAST(op); _gdbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); @@ -188,8 +189,18 @@ gdbm_length(PyObject *op) return dp->di_size; } +static Py_ssize_t +gdbm_length(PyObject *op) +{ + Py_ssize_t result; + Py_BEGIN_CRITICAL_SECTION(op); + result = gdbm_length_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + static int -gdbm_bool(PyObject *op) +gdbm_bool_lock_held(PyObject *op) { gdbmobject *dp = _gdbmobject_CAST(op); _gdbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); @@ -218,6 +229,16 @@ gdbm_bool(PyObject *op) return 1; } +static int +gdbm_bool(PyObject *op) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(op); + result = gdbm_bool_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + // Wrapper function for PyArg_Parse(o, "s#", &d.dptr, &d.size). // This function is needed to support PY_SSIZE_T_CLEAN. // Return 1 on success, same to PyArg_Parse(). @@ -240,7 +261,7 @@ parse_datum(PyObject *o, datum *d, const char *failmsg) } static PyObject * -gdbm_subscript(PyObject *op, PyObject *key) +gdbm_subscript_lock_held(PyObject *op, PyObject *key) { PyObject *v; datum drec, krec; @@ -265,6 +286,16 @@ gdbm_subscript(PyObject *op, PyObject *key) return v; } +static PyObject * +gdbm_subscript(PyObject *op, PyObject *key) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = gdbm_subscript_lock_held(op, key); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] _gdbm.gdbm.get @@ -290,7 +321,7 @@ _gdbm_gdbm_get_impl(gdbmobject *self, PyObject *key, PyObject *default_value) } static int -gdbm_ass_sub(PyObject *op, PyObject *v, PyObject *w) +gdbm_ass_sub_lock_held(PyObject *op, PyObject *v, PyObject *w) { datum krec, drec; const char *failmsg = "gdbm mappings have bytes or string indices only"; @@ -335,7 +366,18 @@ gdbm_ass_sub(PyObject *op, PyObject *v, PyObject *w) return 0; } +static int +gdbm_ass_sub(PyObject *op, PyObject *v, PyObject *w) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(op); + result = gdbm_ass_sub_lock_held(op, v, w); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] +@critical_section _gdbm.gdbm.setdefault key: object @@ -348,7 +390,7 @@ Get value for key, or set it to default and return default if not present. static PyObject * _gdbm_gdbm_setdefault_impl(gdbmobject *self, PyObject *key, PyObject *default_value) -/*[clinic end generated code: output=f3246e880509f142 input=0db46b69e9680171]*/ +/*[clinic end generated code: output=f3246e880509f142 input=854374cd81ab51b6]*/ { PyObject *res; @@ -363,6 +405,7 @@ _gdbm_gdbm_setdefault_impl(gdbmobject *self, PyObject *key, } /*[clinic input] +@critical_section _gdbm.gdbm.close Close the database. @@ -370,7 +413,7 @@ Close the database. static PyObject * _gdbm_gdbm_close_impl(gdbmobject *self) -/*[clinic end generated code: output=f5abb4d6bb9e52d5 input=0a203447379b45fd]*/ +/*[clinic end generated code: output=f5abb4d6bb9e52d5 input=56b604f4e77f533d]*/ { if (self->di_dbm) { gdbm_close(self->di_dbm); @@ -381,6 +424,7 @@ _gdbm_gdbm_close_impl(gdbmobject *self) /* XXX Should return a set or a set view */ /*[clinic input] +@critical_section _gdbm.gdbm.keys cls: defining_class @@ -390,7 +434,7 @@ Get a list of all keys in the database. static PyObject * _gdbm_gdbm_keys_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=c24b824e81404755 input=1428b7c79703d7d5]*/ +/*[clinic end generated code: output=c24b824e81404755 input=785988b1ea8f77e0]*/ { PyObject *v, *item; datum key, nextkey; @@ -432,7 +476,7 @@ _gdbm_gdbm_keys_impl(gdbmobject *self, PyTypeObject *cls) } static int -gdbm_contains(PyObject *self, PyObject *arg) +gdbm_contains_lock_held(PyObject *self, PyObject *arg) { gdbmobject *dp = (gdbmobject *)self; datum key; @@ -463,7 +507,18 @@ gdbm_contains(PyObject *self, PyObject *arg) return gdbm_exists(dp->di_dbm, key); } +static int +gdbm_contains(PyObject *self, PyObject *arg) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(self); + result = gdbm_contains_lock_held(self, arg); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] +@critical_section _gdbm.gdbm.firstkey cls: defining_class @@ -477,7 +532,7 @@ hash values, and won't be sorted by the key values. static PyObject * _gdbm_gdbm_firstkey_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=139275e9c8b60827 input=ed8782a029a5d299]*/ +/*[clinic end generated code: output=139275e9c8b60827 input=aad5a7c886c542f5]*/ { PyObject *v; datum key; @@ -497,6 +552,7 @@ _gdbm_gdbm_firstkey_impl(gdbmobject *self, PyTypeObject *cls) } /*[clinic input] +@critical_section _gdbm.gdbm.nextkey cls: defining_class @@ -517,7 +573,7 @@ to create a list in memory that contains them all: static PyObject * _gdbm_gdbm_nextkey_impl(gdbmobject *self, PyTypeObject *cls, const char *key, Py_ssize_t key_length) -/*[clinic end generated code: output=c81a69300ef41766 input=365e297bc0b3db48]*/ +/*[clinic end generated code: output=c81a69300ef41766 input=181f1130d5bfeb1e]*/ { PyObject *v; datum dbm_key, nextkey; @@ -539,6 +595,7 @@ _gdbm_gdbm_nextkey_impl(gdbmobject *self, PyTypeObject *cls, const char *key, } /*[clinic input] +@critical_section _gdbm.gdbm.reorganize cls: defining_class @@ -554,7 +611,7 @@ kept and reused as new (key,value) pairs are added. static PyObject * _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=d77c69e8e3dd644a input=e1359faeef844e46]*/ +/*[clinic end generated code: output=d77c69e8e3dd644a input=3e3ca0d2ea787861]*/ { _gdbm_state *state = PyType_GetModuleState(cls); assert(state != NULL); @@ -573,6 +630,7 @@ _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls) } /*[clinic input] +@critical_section _gdbm.gdbm.sync cls: defining_class @@ -585,7 +643,7 @@ any unwritten data to be written to the disk. static PyObject * _gdbm_gdbm_sync_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=bb680a2035c3f592 input=3d749235f79b6f2a]*/ +/*[clinic end generated code: output=bb680a2035c3f592 input=6054385b071d238a]*/ { _gdbm_state *state = PyType_GetModuleState(cls); assert(state != NULL); @@ -595,6 +653,7 @@ _gdbm_gdbm_sync_impl(gdbmobject *self, PyTypeObject *cls) } /*[clinic input] +@critical_section _gdbm.gdbm.clear cls: defining_class / @@ -604,7 +663,7 @@ Remove all items from the database. static PyObject * _gdbm_gdbm_clear_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=673577c573318661 input=34136d52fcdd4210]*/ +/*[clinic end generated code: output=673577c573318661 input=b17467adfe62f23d]*/ { _gdbm_state *state = PyType_GetModuleState(cls); assert(state != NULL); diff --git a/Modules/clinic/_dbmmodule.c.h b/Modules/clinic/_dbmmodule.c.h index 5e503194408776..091ce9edc43d4b 100644 --- a/Modules/clinic/_dbmmodule.c.h +++ b/Modules/clinic/_dbmmodule.c.h @@ -5,6 +5,7 @@ preserve #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) # include "pycore_runtime.h" // _Py_SINGLETON() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_dbm_dbm_close__doc__, @@ -22,7 +23,13 @@ _dbm_dbm_close_impl(dbmobject *self); static PyObject * _dbm_dbm_close(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _dbm_dbm_close_impl((dbmobject *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _dbm_dbm_close_impl((dbmobject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_dbm_dbm_keys__doc__, @@ -40,11 +47,18 @@ _dbm_dbm_keys_impl(dbmobject *self, PyTypeObject *cls); static PyObject * _dbm_dbm_keys(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "keys() takes no arguments"); - return NULL; + goto exit; } - return _dbm_dbm_keys_impl((dbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _dbm_dbm_keys_impl((dbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_dbm_dbm_get__doc__, @@ -85,7 +99,9 @@ _dbm_dbm_get(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_ &key, &key_length, &default_value)) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _dbm_dbm_get_impl((dbmobject *)self, cls, key, key_length, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -131,7 +147,9 @@ _dbm_dbm_setdefault(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py &key, &key_length, &default_value)) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _dbm_dbm_setdefault_impl((dbmobject *)self, cls, key, key_length, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -152,11 +170,18 @@ _dbm_dbm_clear_impl(dbmobject *self, PyTypeObject *cls); static PyObject * _dbm_dbm_clear(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "clear() takes no arguments"); - return NULL; + goto exit; } - return _dbm_dbm_clear_impl((dbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _dbm_dbm_clear_impl((dbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(dbmopen__doc__, @@ -221,4 +246,4 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=3b456118f231b160 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=279511ea7cda38dd input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_gdbmmodule.c.h b/Modules/clinic/_gdbmmodule.c.h index 00950f18e53541..6fd6aa3da50335 100644 --- a/Modules/clinic/_gdbmmodule.c.h +++ b/Modules/clinic/_gdbmmodule.c.h @@ -5,6 +5,7 @@ preserve #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) # include "pycore_runtime.h" // _Py_SINGLETON() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_gdbm_gdbm_get__doc__, @@ -70,7 +71,9 @@ _gdbm_gdbm_setdefault(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } default_value = args[1]; skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _gdbm_gdbm_setdefault_impl((gdbmobject *)self, key, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -91,7 +94,13 @@ _gdbm_gdbm_close_impl(gdbmobject *self); static PyObject * _gdbm_gdbm_close(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _gdbm_gdbm_close_impl((gdbmobject *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_close_impl((gdbmobject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_keys__doc__, @@ -109,11 +118,18 @@ _gdbm_gdbm_keys_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_keys(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "keys() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_keys_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_keys_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_firstkey__doc__, @@ -135,11 +151,18 @@ _gdbm_gdbm_firstkey_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_firstkey(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "firstkey() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_firstkey_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_firstkey_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_nextkey__doc__, @@ -187,7 +210,9 @@ _gdbm_gdbm_nextkey(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ &key, &key_length)) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _gdbm_gdbm_nextkey_impl((gdbmobject *)self, cls, key, key_length); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -214,11 +239,18 @@ _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_reorganize(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "reorganize() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_reorganize_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_reorganize_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_sync__doc__, @@ -239,11 +271,18 @@ _gdbm_gdbm_sync_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_sync(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "sync() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_sync_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_sync_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_clear__doc__, @@ -261,11 +300,18 @@ _gdbm_gdbm_clear_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_clear(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "clear() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_clear_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_clear_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(dbmopen__doc__, @@ -343,4 +389,4 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=d974cb39e4ee5d67 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8bca34ce9d4493dd input=a9049054013a1b77]*/ From e123a1d09bcb75aae0c53d9d732de5d9a1252efe Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 14 May 2025 20:53:51 +0300 Subject: [PATCH 145/989] Test also error messages in test_limit_int. (GH-134018) --- Lib/test/test_fractions.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index 37de3ad380e633..96b3f30519459b 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -500,22 +500,23 @@ def check_invalid(s): def test_limit_int(self): maxdigits = 5000 with adjust_int_max_str_digits(maxdigits): + msg = 'Exceeds the limit' val = '1' * maxdigits num = (10**maxdigits - 1)//9 self.assertEqual((num, 1), _components(F(val))) - self.assertRaises(ValueError, F, val + '1') + self.assertRaisesRegex(ValueError, msg, F, val + '1') self.assertEqual((num, 2), _components(F(val + '/2'))) - self.assertRaises(ValueError, F, val + '1/2') + self.assertRaisesRegex(ValueError, msg, F, val + '1/2') self.assertEqual((1, num), _components(F('1/' + val))) - self.assertRaises(ValueError, F, '1/1' + val) + self.assertRaisesRegex(ValueError, msg, F, '1/1' + val) self.assertEqual(((10**(maxdigits+1) - 1)//9, 10**maxdigits), _components(F('1.' + val))) - self.assertRaises(ValueError, F, '1.1' + val) + self.assertRaisesRegex(ValueError, msg, F, '1.1' + val) self.assertEqual((num, 10**maxdigits), _components(F('.' + val))) - self.assertRaises(ValueError, F, '.1' + val) - self.assertRaises(ValueError, F, '1.1e1' + val) + self.assertRaisesRegex(ValueError, msg, F, '.1' + val) + self.assertRaisesRegex(ValueError, msg, F, '1.1e1' + val) self.assertEqual((11, 10), _components(F('1.1e' + '0' * maxdigits))) - self.assertRaises(ValueError, F, '1.1e' + '0' * (maxdigits+1)) + self.assertRaisesRegex(ValueError, msg, F, '1.1e' + '0' * (maxdigits+1)) def testImmutable(self): r = F(7, 3) From c3a1da5b9397867e6b3169dd17cb33ef2898da4a Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 15 May 2025 09:11:46 +0300 Subject: [PATCH 146/989] gh-133970: Make PEP750 types generic (#133976) --- Lib/test/test_genericalias.py | 6 +++++- .../Library/2025-05-13-18-54-56.gh-issue-133970.6G-Oi6.rst | 2 ++ Objects/interpolationobject.c | 2 ++ Objects/templateobject.c | 2 ++ 4 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-13-18-54-56.gh-issue-133970.6G-Oi6.rst diff --git a/Lib/test/test_genericalias.py b/Lib/test/test_genericalias.py index 8d21ded45014ba..ea0dc241e39475 100644 --- a/Lib/test/test_genericalias.py +++ b/Lib/test/test_genericalias.py @@ -61,6 +61,7 @@ from tkinter import Event except ImportError: Event = None +from string.templatelib import Template, Interpolation from typing import TypeVar T = TypeVar('T') @@ -139,7 +140,10 @@ class BaseTest(unittest.TestCase): DictReader, DictWriter, array, staticmethod, - classmethod] + classmethod, + Template, + Interpolation, + ] if ctypes is not None: generic_types.extend((ctypes.Array, ctypes.LibraryLoader, ctypes.py_object)) if ValueProxy is not None: diff --git a/Misc/NEWS.d/next/Library/2025-05-13-18-54-56.gh-issue-133970.6G-Oi6.rst b/Misc/NEWS.d/next/Library/2025-05-13-18-54-56.gh-issue-133970.6G-Oi6.rst new file mode 100644 index 00000000000000..ddf456d3939056 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-13-18-54-56.gh-issue-133970.6G-Oi6.rst @@ -0,0 +1,2 @@ +Make :class:`!string.templatelib.Template` and +:class:`!string.templatelib.Interpolation` generic. diff --git a/Objects/interpolationobject.c b/Objects/interpolationobject.c index aaea3b8c0670c9..a5d407a7b0e296 100644 --- a/Objects/interpolationobject.c +++ b/Objects/interpolationobject.c @@ -137,6 +137,8 @@ interpolation_reduce(PyObject *op, PyObject *Py_UNUSED(dummy)) static PyMethodDef interpolation_methods[] = { {"__reduce__", interpolation_reduce, METH_NOARGS, PyDoc_STR("__reduce__() -> (cls, state)")}, + {"__class_getitem__", Py_GenericAlias, + METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL}, }; diff --git a/Objects/templateobject.c b/Objects/templateobject.c index 7d356980b56cbb..06cb19e0b6d056 100644 --- a/Objects/templateobject.c +++ b/Objects/templateobject.c @@ -444,6 +444,8 @@ template_reduce(PyObject *op, PyObject *Py_UNUSED(dummy)) static PyMethodDef template_methods[] = { {"__reduce__", template_reduce, METH_NOARGS, NULL}, + {"__class_getitem__", Py_GenericAlias, + METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL}, }; From 7eaa09739059aaac4812395f8d6bb586af8eadcc Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 15 May 2025 12:13:03 +0300 Subject: [PATCH 147/989] gh-133403: Check `Tools/build/deepfreeze.py` with mypy (#133802) --- .github/workflows/mypy.yml | 2 ++ Tools/build/.ruff.toml | 2 +- Tools/build/deepfreeze.py | 51 ++++++++++++++++++++++++-------------- Tools/build/mypy.ini | 4 ++- Tools/build/umarshal.py | 11 ++++---- 5 files changed, 45 insertions(+), 25 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index c133193d30c4ed..95133c1338b682 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -14,10 +14,12 @@ on: - "Lib/tomllib/**" - "Misc/mypy/**" - "Tools/build/compute-changes.py" + - "Tools/build/deepfreeze.py" - "Tools/build/generate_sbom.py" - "Tools/build/generate-build-details.py" - "Tools/build/verify_ensurepip_wheels.py" - "Tools/build/update_file.py" + - "Tools/build/umarshal.py" - "Tools/cases_generator/**" - "Tools/clinic/**" - "Tools/jit/**" diff --git a/Tools/build/.ruff.toml b/Tools/build/.ruff.toml index fa7689d45dbcb7..dcbf2936290f12 100644 --- a/Tools/build/.ruff.toml +++ b/Tools/build/.ruff.toml @@ -1,7 +1,7 @@ extend = "../../.ruff.toml" # Inherit the project-wide settings [per-file-target-version] -"deepfreeze.py" = "py310" +"deepfreeze.py" = "py311" # requires `code.co_exceptiontable` "stable_abi.py" = "py311" # requires 'tomllib' [format] diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py index 23f58447937976..2b9f03aebb6d7e 100644 --- a/Tools/build/deepfreeze.py +++ b/Tools/build/deepfreeze.py @@ -2,9 +2,12 @@ The script may be executed by _bootstrap_python interpreter. Shared library extension modules are not available in that case. -On Windows, and in cross-compilation cases, it is executed -by Python 3.10, and 3.11 features are not available. +Requires 3.11+ to be executed, +because relies on `code.co_qualname` and `code.co_exceptiontable`. """ + +from __future__ import annotations + import argparse import builtins import collections @@ -13,10 +16,14 @@ import re import time import types -from typing import TextIO import umarshal +TYPE_CHECKING = False +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any, TextIO + ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) verbose = False @@ -45,8 +52,8 @@ def make_string_literal(b: bytes) -> str: next_code_version = 1 -def get_localsplus(code: types.CodeType): - a = collections.defaultdict(int) +def get_localsplus(code: types.CodeType) -> tuple[tuple[str, ...], bytes]: + a: collections.defaultdict[str, int] = collections.defaultdict(int) for name in code.co_varnames: a[name] |= CO_FAST_LOCAL for name in code.co_cellvars: @@ -136,7 +143,7 @@ def get_identifiers_and_strings(self) -> tuple[set[str], dict[str, str]]: return identifiers, strings @contextlib.contextmanager - def indent(self) -> None: + def indent(self) -> Iterator[None]: save_level = self.level try: self.level += 1 @@ -148,7 +155,7 @@ def write(self, arg: str) -> None: self.file.writelines((" "*self.level, arg, "\n")) @contextlib.contextmanager - def block(self, prefix: str, suffix: str = "") -> None: + def block(self, prefix: str, suffix: str = "") -> Iterator[None]: self.write(prefix + " {") with self.indent(): yield @@ -250,9 +257,17 @@ def generate_code(self, name: str, code: types.CodeType) -> str: co_names = self.generate(name + "_names", code.co_names) co_filename = self.generate(name + "_filename", code.co_filename) co_name = self.generate(name + "_name", code.co_name) - co_qualname = self.generate(name + "_qualname", code.co_qualname) co_linetable = self.generate(name + "_linetable", code.co_linetable) - co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable) + # We use 3.10 for type checking, but this module requires 3.11 + # TODO: bump python version for this script. + co_qualname = self.generate( + name + "_qualname", + code.co_qualname, # type: ignore[attr-defined] + ) + co_exceptiontable = self.generate( + name + "_exceptiontable", + code.co_exceptiontable, # type: ignore[attr-defined] + ) # These fields are not directly accessible localsplusnames, localspluskinds = get_localsplus(code) co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames) @@ -379,13 +394,13 @@ def generate_complex(self, name: str, z: complex) -> str: self.write(f".cval = {{ {z.real}, {z.imag} }},") return f"&{name}.ob_base" - def generate_frozenset(self, name: str, fs: frozenset[object]) -> str: + def generate_frozenset(self, name: str, fs: frozenset[Any]) -> str: try: - fs = sorted(fs) + fs_sorted = sorted(fs) except TypeError: # frozen set with incompatible types, fallback to repr() - fs = sorted(fs, key=repr) - ret = self.generate_tuple(name, tuple(fs)) + fs_sorted = sorted(fs, key=repr) + ret = self.generate_tuple(name, tuple(fs_sorted)) self.write("// TODO: The above tuple should be a frozenset") return ret @@ -402,7 +417,7 @@ def generate(self, name: str, obj: object) -> str: # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}") return self.cache[key] self.misses += 1 - if isinstance(obj, (types.CodeType, umarshal.Code)) : + if isinstance(obj, types.CodeType) : val = self.generate_code(name, obj) elif isinstance(obj, tuple): val = self.generate_tuple(name, obj) @@ -458,7 +473,7 @@ def decode_frozen_data(source: str) -> types.CodeType: if re.match(FROZEN_DATA_LINE, line): values.extend([int(x) for x in line.split(",") if x.strip()]) data = bytes(values) - return umarshal.loads(data) + return umarshal.loads(data) # type: ignore[no-any-return] def generate(args: list[str], output: TextIO) -> None: @@ -494,12 +509,12 @@ def generate(args: list[str], output: TextIO) -> None: help="Input file and module name (required) in file:modname format") @contextlib.contextmanager -def report_time(label: str): - t0 = time.time() +def report_time(label: str) -> Iterator[None]: + t0 = time.perf_counter() try: yield finally: - t1 = time.time() + t1 = time.perf_counter() if verbose: print(f"{label}: {t1-t0:.3f} sec") diff --git a/Tools/build/mypy.ini b/Tools/build/mypy.ini index 62d7e150fd8521..123dc895f90a1f 100644 --- a/Tools/build/mypy.ini +++ b/Tools/build/mypy.ini @@ -4,10 +4,12 @@ # .github/workflows/mypy.yml files = Tools/build/compute-changes.py, + Tools/build/deepfreeze.py, Tools/build/generate-build-details.py, Tools/build/generate_sbom.py, Tools/build/verify_ensurepip_wheels.py, - Tools/build/update_file.py + Tools/build/update_file.py, + Tools/build/umarshal.py pretty = True diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py index 679fa7caf9f931..865cffc2440122 100644 --- a/Tools/build/umarshal.py +++ b/Tools/build/umarshal.py @@ -145,12 +145,12 @@ def r_PyLong(self) -> int: def r_float_bin(self) -> float: buf = self.r_string(8) import struct # Lazy import to avoid breaking UNIX build - return struct.unpack("d", buf)[0] + return struct.unpack("d", buf)[0] # type: ignore[no-any-return] def r_float_str(self) -> float: n = self.r_byte() buf = self.r_string(n) - return ast.literal_eval(buf.decode("ascii")) + return ast.literal_eval(buf.decode("ascii")) # type: ignore[no-any-return] def r_ref_reserve(self, flag: int) -> int: if flag: @@ -306,7 +306,7 @@ def loads(data: bytes) -> Any: return r.r_object() -def main(): +def main() -> None: # Test import marshal import pprint @@ -314,8 +314,9 @@ def main(): data = marshal.dumps(sample) retval = loads(data) assert retval == sample, retval - sample = main.__code__ - data = marshal.dumps(sample) + + sample2 = main.__code__ + data = marshal.dumps(sample2) retval = loads(data) assert isinstance(retval, Code), retval pprint.pprint(retval.__dict__) From 54a6875adbc9091909b1473c49595c0cc84dc438 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 15 May 2025 12:39:42 +0300 Subject: [PATCH 148/989] =?UTF-8?q?gh-119535:=203.15=20minus=20=CF=80=20(#?= =?UTF-8?q?134037)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Revert "gh-119535: Support 𝜋thon in Python 3.14 venvs (#125035)" This reverts commit fcef3fc9a593e2aa868d23cf2d91c57d8bf60ac6. * Revert "gh-119535: python𝜋 (#119536)" This reverts commit 3fc673e97dafb8a73ee99937cf2bf0b849b1f418. --- Lib/venv/__init__.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index 15e15b7a5184c2..dc9c5991df7e1c 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -313,11 +313,8 @@ def setup_python(self, context): copier(context.executable, path) if not os.path.islink(path): os.chmod(path, 0o755) - - suffixes = ['python', 'python3', f'python3.{sys.version_info[1]}'] - if sys.version_info[:2] == (3, 14): - suffixes.append('𝜋thon') - for suffix in suffixes: + for suffix in ('python', 'python3', + f'python3.{sys.version_info[1]}'): path = os.path.join(binpath, suffix) if not os.path.exists(path): # Issue 18807: make copies if From 43410953c26153b48554218cbe35ba5f1167b08e Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Thu, 15 May 2025 12:59:11 +0200 Subject: [PATCH 149/989] gh-133572: Avoid using NTSTATUS on unsupported WinAPI partitions (GH-133573) --- ...-05-07-11-45-30.gh-issue-133572.Xc2zxH.rst | 1 + Modules/mmapmodule.c | 26 ++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2025-05-07-11-45-30.gh-issue-133572.Xc2zxH.rst diff --git a/Misc/NEWS.d/next/Windows/2025-05-07-11-45-30.gh-issue-133572.Xc2zxH.rst b/Misc/NEWS.d/next/Windows/2025-05-07-11-45-30.gh-issue-133572.Xc2zxH.rst new file mode 100644 index 00000000000000..993eceab0b79c2 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2025-05-07-11-45-30.gh-issue-133572.Xc2zxH.rst @@ -0,0 +1 @@ +Avoid LsaNtStatus to WinError conversion on unsupported WinAPI partitions. diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 6a385562845849..7c4eb05488eb33 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -290,6 +290,24 @@ filter_page_exception_method(mmap_object *self, EXCEPTION_POINTERS *ptrs, } return EXCEPTION_CONTINUE_SEARCH; } + +static void +_PyErr_SetFromNTSTATUS(ULONG status) +{ +#if defined(MS_WINDOWS_DESKTOP) || defined(MS_WINDOWS_SYSTEM) + PyErr_SetFromWindowsErr(LsaNtStatusToWinError((NTSTATUS)status)); +#else + if (status & 0x80000000) { + // HRESULT-shaped codes are supported by PyErr_SetFromWindowsErr + PyErr_SetFromWindowsErr((int)status); + } + else { + // No mapping for NTSTATUS values, so just return it for diagnostic purposes + // If we provide it as winerror it could incorrectly change the type of the exception. + PyErr_Format(PyExc_OSError, "Operating system error NTSTATUS=0x%08lX", status); + } +#endif +} #endif #if defined(MS_WINDOWS) && !defined(DONT_USE_SEH) @@ -303,9 +321,7 @@ do { \ assert(record.ExceptionCode == EXCEPTION_IN_PAGE_ERROR || \ record.ExceptionCode == EXCEPTION_ACCESS_VIOLATION); \ if (record.ExceptionCode == EXCEPTION_IN_PAGE_ERROR) { \ - NTSTATUS status = (NTSTATUS) record.ExceptionInformation[2]; \ - ULONG code = LsaNtStatusToWinError(status); \ - PyErr_SetFromWindowsErr(code); \ + _PyErr_SetFromNTSTATUS((ULONG)record.ExceptionInformation[2]); \ } \ else if (record.ExceptionCode == EXCEPTION_ACCESS_VIOLATION) { \ PyErr_SetFromWindowsErr(ERROR_NOACCESS); \ @@ -332,9 +348,7 @@ do { \ assert(record.ExceptionCode == EXCEPTION_IN_PAGE_ERROR || \ record.ExceptionCode == EXCEPTION_ACCESS_VIOLATION); \ if (record.ExceptionCode == EXCEPTION_IN_PAGE_ERROR) { \ - NTSTATUS status = (NTSTATUS) record.ExceptionInformation[2]; \ - ULONG code = LsaNtStatusToWinError(status); \ - PyErr_SetFromWindowsErr(code); \ + _PyErr_SetFromNTSTATUS((ULONG)record.ExceptionInformation[2]); \ } \ else if (record.ExceptionCode == EXCEPTION_ACCESS_VIOLATION) { \ PyErr_SetFromWindowsErr(ERROR_NOACCESS); \ From 1c4b34c6cb851334c813647a2a7db77ecee20b7a Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Thu, 15 May 2025 13:50:46 +0200 Subject: [PATCH 150/989] gh-134041: Make _winapi functions compatible with non-desktop API partitions (GH-134042) --- Modules/_winapi.c | 12 ++++++++++++ Modules/clinic/_winapi.c.h | 10 +++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 02817e09b936f3..0dea8d5ef172f9 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1573,6 +1573,7 @@ static PyObject * _winapi_GetLongPathName_impl(PyObject *module, LPCWSTR path) /*[clinic end generated code: output=c4774b080275a2d0 input=9872e211e3a4a88f]*/ { +#if defined(MS_WINDOWS_APP) || defined(MS_WINDOWS_SYSTEM) DWORD cchBuffer; PyObject *result = NULL; @@ -1596,6 +1597,9 @@ _winapi_GetLongPathName_impl(PyObject *module, LPCWSTR path) PyErr_SetFromWindowsErr(0); } return result; +#else + return PyUnicode_FromWideChar(path, -1); +#endif } /*[clinic input] @@ -1632,6 +1636,8 @@ _winapi_GetModuleFileName_impl(PyObject *module, HMODULE module_handle) return PyUnicode_FromWideChar(filename, wcslen(filename)); } +#if defined(MS_WINDOWS_DESKTOP) || defined(MS_WINDOWS_SYSTEM) + /*[clinic input] _winapi.GetShortPathName @@ -1674,6 +1680,8 @@ _winapi_GetShortPathName_impl(PyObject *module, LPCWSTR path) return result; } +#endif /* MS_WINDOWS_DESKTOP || MS_WINDOWS_SYSTEM */ + /*[clinic input] _winapi.GetStdHandle -> HANDLE @@ -2883,6 +2891,7 @@ _winapi_NeedCurrentDirectoryForExePath_impl(PyObject *module, LPCWSTR exe_name) /*[clinic end generated code: output=a65ec879502b58fc input=972aac88a1ec2f00]*/ { +#if defined(MS_WINDOWS_DESKTOP) || defined(MS_WINDOWS_SYSTEM) BOOL result; Py_BEGIN_ALLOW_THREADS @@ -2890,6 +2899,9 @@ _winapi_NeedCurrentDirectoryForExePath_impl(PyObject *module, Py_END_ALLOW_THREADS return result; +#else + return TRUE; +#endif } diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index f8b623fca082d2..b0fc1f1a89b760 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -857,6 +857,8 @@ _winapi_GetModuleFileName(PyObject *module, PyObject *arg) return return_value; } +#if (defined(MS_WINDOWS_DESKTOP) || defined(MS_WINDOWS_SYSTEM)) + PyDoc_STRVAR(_winapi_GetShortPathName__doc__, "GetShortPathName($module, /, path)\n" "--\n" @@ -930,6 +932,8 @@ _winapi_GetShortPathName(PyObject *module, PyObject *const *args, Py_ssize_t nar return return_value; } +#endif /* (defined(MS_WINDOWS_DESKTOP) || defined(MS_WINDOWS_SYSTEM)) */ + PyDoc_STRVAR(_winapi_GetStdHandle__doc__, "GetStdHandle($module, std_handle, /)\n" "--\n" @@ -2161,4 +2165,8 @@ _winapi_CopyFile2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO return return_value; } -/*[clinic end generated code: output=6cd07628af447d0a input=a9049054013a1b77]*/ + +#ifndef _WINAPI_GETSHORTPATHNAME_METHODDEF + #define _WINAPI_GETSHORTPATHNAME_METHODDEF +#endif /* !defined(_WINAPI_GETSHORTPATHNAME_METHODDEF) */ +/*[clinic end generated code: output=ede63eaaf63aa7e6 input=a9049054013a1b77]*/ From 74c4e35ff1b1b020d530a239f8ddadf318472a1a Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Thu, 15 May 2025 13:56:50 +0200 Subject: [PATCH 151/989] Let PyUnicode_FromWideChar calculate the input length (GH-134045) --- Modules/_ctypes/callproc.c | 2 +- Modules/_ctypes/cfield.c | 2 +- Modules/_winapi.c | 2 +- Modules/main.c | 6 +++--- Modules/posixmodule.c | 8 ++++---- PC/winreg.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index 404178ca623a93..9f5ad9f57b75e2 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1389,7 +1389,7 @@ static PyObject *format_error(PyObject *self, PyObject *args) code = GetLastError(); lpMsgBuf = FormatError(code); if (lpMsgBuf) { - result = PyUnicode_FromWideChar(lpMsgBuf, wcslen(lpMsgBuf)); + result = PyUnicode_FromWideChar(lpMsgBuf, -1); LocalFree(lpMsgBuf); } else { result = PyUnicode_FromString(""); diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 86bcc805360a3f..163b92642615e5 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -1253,7 +1253,7 @@ Z_get(void *ptr, Py_ssize_t size) wchar_t *p; p = *(wchar_t **)ptr; if (p) { - return PyUnicode_FromWideChar(p, wcslen(p)); + return PyUnicode_FromWideChar(p, -1); } else { Py_RETURN_NONE; } diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 0dea8d5ef172f9..044505fab6216c 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1633,7 +1633,7 @@ _winapi_GetModuleFileName_impl(PyObject *module, HMODULE module_handle) if (! result) return PyErr_SetFromWindowsErr(GetLastError()); - return PyUnicode_FromWideChar(filename, wcslen(filename)); + return PyUnicode_FromWideChar(filename, -1); } #if defined(MS_WINDOWS_DESKTOP) || defined(MS_WINDOWS_SYSTEM) diff --git a/Modules/main.c b/Modules/main.c index ea1239ecc57f00..2be194bdadf7d0 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -128,7 +128,7 @@ pymain_get_importer(const wchar_t *filename, PyObject **importer_p, int *exitcod { PyObject *sys_path0 = NULL, *importer; - sys_path0 = PyUnicode_FromWideChar(filename, wcslen(filename)); + sys_path0 = PyUnicode_FromWideChar(filename, -1); if (sys_path0 == NULL) { goto error; } @@ -328,7 +328,7 @@ pymain_run_module(const wchar_t *modname, int set_argv0) fprintf(stderr, "Could not import runpy._run_module_as_main\n"); return pymain_exit_err_print(); } - module = PyUnicode_FromWideChar(modname, wcslen(modname)); + module = PyUnicode_FromWideChar(modname, -1); if (module == NULL) { fprintf(stderr, "Could not convert module name to unicode\n"); Py_DECREF(runmodule); @@ -439,7 +439,7 @@ pymain_run_startup(PyConfig *config, int *exitcode) if (env == NULL || env[0] == L'\0') { return 0; } - startup = PyUnicode_FromWideChar(env, wcslen(env)); + startup = PyUnicode_FromWideChar(env, -1); if (startup == NULL) { goto error; } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index bb3bfe802b93e8..588894adeac811 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1782,7 +1782,7 @@ convertenviron(void) return NULL; } #ifdef MS_WINDOWS - v = PyUnicode_FromWideChar(p+1, wcslen(p+1)); + v = PyUnicode_FromWideChar(p+1, -1); #else v = PyBytes_FromStringAndSize(p+1, strlen(p+1)); #endif @@ -5052,7 +5052,7 @@ os__getfullpathname_impl(PyObject *module, path_t *path) return PyErr_NoMemory(); } - PyObject *str = PyUnicode_FromWideChar(abspath, wcslen(abspath)); + PyObject *str = PyUnicode_FromWideChar(abspath, -1); PyMem_RawFree(abspath); if (str == NULL) { return NULL; @@ -5168,7 +5168,7 @@ os__findfirstfile_impl(PyObject *module, path_t *path) } wRealFileName = wFileData.cFileName; - result = PyUnicode_FromWideChar(wRealFileName, wcslen(wRealFileName)); + result = PyUnicode_FromWideChar(wRealFileName, -1); FindClose(hFindFile); return result; } @@ -5212,7 +5212,7 @@ os__getvolumepathname_impl(PyObject *module, path_t *path) result = win32_error_object("_getvolumepathname", path->object); goto exit; } - result = PyUnicode_FromWideChar(mountpath, wcslen(mountpath)); + result = PyUnicode_FromWideChar(mountpath, -1); if (PyBytes_Check(path->object)) Py_SETREF(result, PyUnicode_EncodeFSDefault(result)); diff --git a/PC/winreg.c b/PC/winreg.c index c0de5c1353a349..a8e2e4cc10c882 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -1290,7 +1290,7 @@ winreg_ExpandEnvironmentStrings_impl(PyObject *module, const wchar_t *string) return PyErr_SetFromWindowsErrWithFunction(retValueSize, "ExpandEnvironmentStrings"); } - o = PyUnicode_FromWideChar(retValue, wcslen(retValue)); + o = PyUnicode_FromWideChar(retValue, -1); PyMem_Free(retValue); return o; } From d029a1a1cba515510f31c9e88b7c2a6b7ebee2a7 Mon Sep 17 00:00:00 2001 From: Semyon Moroz Date: Thu, 15 May 2025 13:07:34 +0000 Subject: [PATCH 152/989] gh-77065: add missing parameter `echo_char` in `getpass.fallback_getpass` (#133849) --- Lib/getpass.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/getpass.py b/Lib/getpass.py index f571425e54178a..a4dbd2ea6502d4 100644 --- a/Lib/getpass.py +++ b/Lib/getpass.py @@ -132,14 +132,15 @@ def win_getpass(prompt='Password: ', stream=None, *, echo_char=None): return pw -def fallback_getpass(prompt='Password: ', stream=None): +def fallback_getpass(prompt='Password: ', stream=None, *, echo_char=None): + _check_echo_char(echo_char) import warnings warnings.warn("Can not control echo on the terminal.", GetPassWarning, stacklevel=2) if not stream: stream = sys.stderr print("Warning: Password input may be echoed.", file=stream) - return _raw_input(prompt, stream) + return _raw_input(prompt, stream, echo_char=echo_char) def _check_echo_char(echo_char): From 319acf3d6cb05f5429422fda5c15bbd64fe9bd28 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 15 May 2025 16:10:56 +0300 Subject: [PATCH 153/989] gh-133410: Fix PR detection in build workflow (#133671) --- .github/workflows/reusable-context.yml | 1 + Tools/build/compute-changes.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable-context.yml b/.github/workflows/reusable-context.yml index 73dc254edc5fbc..d2668ddcac1a3d 100644 --- a/.github/workflows/reusable-context.yml +++ b/.github/workflows/reusable-context.yml @@ -97,6 +97,7 @@ jobs: run: python Tools/build/compute-changes.py env: GITHUB_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GITHUB_EVENT_NAME: ${{ github.event_name }} CCF_TARGET_REF: ${{ github.base_ref || github.event.repository.default_branch }} CCF_HEAD_REF: ${{ github.event.pull_request.head.sha || github.sha }} diff --git a/Tools/build/compute-changes.py b/Tools/build/compute-changes.py index cfdd55fd1925fd..b5993d29b92972 100644 --- a/Tools/build/compute-changes.py +++ b/Tools/build/compute-changes.py @@ -57,7 +57,7 @@ class Outputs: def compute_changes() -> None: target_branch, head_ref = git_refs() - if target_branch and head_ref: + if os.environ.get("GITHUB_EVENT_NAME", "") == "pull_request": # Getting changed files only makes sense on a pull request files = get_changed_files(target_branch, head_ref) outputs = process_changed_files(files) From 3f61ea3add0d432345a58f4be2229074f9f191c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 15 May 2025 16:52:07 +0200 Subject: [PATCH 154/989] gh-133873: remove deprecated mark interface for `wave.Wave_{read,write}` objects (#133874) --- Doc/deprecations/pending-removal-in-3.15.rst | 3 +- Doc/library/wave.rst | 20 ------------- Doc/whatsnew/3.13.rst | 3 +- Doc/whatsnew/3.15.rst | 9 ++++++ Lib/test/test_wave.py | 26 ----------------- Lib/wave.py | 29 ------------------- ...-05-11-10-28-11.gh-issue-133873.H03nov.rst | 3 ++ 7 files changed, 14 insertions(+), 79 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-11-10-28-11.gh-issue-133873.H03nov.rst diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index c80588b27b635a..8953aedf989869 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -99,8 +99,7 @@ Pending removal in Python 3.15 * :mod:`wave`: - * The :meth:`~wave.Wave_read.getmark`, :meth:`!setmark`, - and :meth:`~wave.Wave_read.getmarkers` methods of + * The ``getmark()``, ``setmark()`` and ``getmarkers()`` methods of the :class:`~wave.Wave_read` and :class:`~wave.Wave_write` classes have been deprecated since Python 3.13. diff --git a/Doc/library/wave.rst b/Doc/library/wave.rst index 36c2bde87fb8fb..a3f5bfd5e2f99c 100644 --- a/Doc/library/wave.rst +++ b/Doc/library/wave.rst @@ -123,26 +123,6 @@ Wave_read Objects Rewind the file pointer to the beginning of the audio stream. - The following two methods are defined for compatibility with the old :mod:`!aifc` - module, and don't do anything interesting. - - - .. method:: getmarkers() - - Returns ``None``. - - .. deprecated-removed:: 3.13 3.15 - The method only existed for compatibility with the :mod:`!aifc` module - which has been removed in Python 3.13. - - - .. method:: getmark(id) - - Raise an error. - - .. deprecated-removed:: 3.13 3.15 - The method only existed for compatibility with the :mod:`!aifc` module - which has been removed in Python 3.13. The following two methods define a term "position" which is compatible between them, and is otherwise implementation dependent. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index b3530f75b2fa40..7ae184058111d8 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1995,8 +1995,7 @@ New Deprecations * :mod:`wave`: - * Deprecate the :meth:`~wave.Wave_read.getmark`, :meth:`!setmark`, - and :meth:`~wave.Wave_read.getmarkers` methods of + * Deprecate the ``getmark()``, ``setmark()`` and ``getmarkers()`` methods of the :class:`~wave.Wave_read` and :class:`~wave.Wave_write` classes, to be removed in Python 3.15. (Contributed by Victor Stinner in :gh:`105096`.) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 8cf5238e6cc49a..14d96420f698fa 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -144,6 +144,15 @@ typing (Contributed by Bénédikt Tran in :gh:`133823`.) +wave +---- + +* Removed the ``getmark()``, ``setmark()`` and ``getmarkers()`` methods + of the :class:`~wave.Wave_read` and :class:`~wave.Wave_write` classes, + which were deprecated since Python 3.13. + (Contributed by Bénédikt Tran in :gh:`133873`.) + + Porting to Python 3.15 ====================== diff --git a/Lib/test/test_wave.py b/Lib/test/test_wave.py index 5e771c8de969ec..6c3362857fc2ba 100644 --- a/Lib/test/test_wave.py +++ b/Lib/test/test_wave.py @@ -136,32 +136,6 @@ def test__all__(self): not_exported = {'WAVE_FORMAT_PCM', 'WAVE_FORMAT_EXTENSIBLE', 'KSDATAFORMAT_SUBTYPE_PCM'} support.check__all__(self, wave, not_exported=not_exported) - def test_read_deprecations(self): - filename = support.findfile('pluck-pcm8.wav', subdir='audiodata') - with wave.open(filename) as reader: - with self.assertWarns(DeprecationWarning): - with self.assertRaises(wave.Error): - reader.getmark('mark') - with self.assertWarns(DeprecationWarning): - self.assertIsNone(reader.getmarkers()) - - def test_write_deprecations(self): - with io.BytesIO(b'') as tmpfile: - with wave.open(tmpfile, 'wb') as writer: - writer.setnchannels(1) - writer.setsampwidth(1) - writer.setframerate(1) - writer.setcomptype('NONE', 'not compressed') - - with self.assertWarns(DeprecationWarning): - with self.assertRaises(wave.Error): - writer.setmark(0, 0, 'mark') - with self.assertWarns(DeprecationWarning): - with self.assertRaises(wave.Error): - writer.getmark('mark') - with self.assertWarns(DeprecationWarning): - self.assertIsNone(writer.getmarkers()) - class WaveLowLevelTest(unittest.TestCase): diff --git a/Lib/wave.py b/Lib/wave.py index a34af244c3e224..929609fa52409d 100644 --- a/Lib/wave.py +++ b/Lib/wave.py @@ -20,10 +20,6 @@ compression type ('not compressed' linear samples) getparams() -- returns a namedtuple consisting of all of the above in the above order - getmarkers() -- returns None (for compatibility with the - old aifc module) - getmark(id) -- raises an error since the mark does not - exist (for compatibility with the old aifc module) readframes(n) -- returns at most n frames of audio rewind() -- rewind to the beginning of the audio stream setpos(pos) -- seek to the specified position @@ -341,16 +337,6 @@ def getparams(self): self.getframerate(), self.getnframes(), self.getcomptype(), self.getcompname()) - def getmarkers(self): - import warnings - warnings._deprecated("Wave_read.getmarkers", remove=(3, 15)) - return None - - def getmark(self, id): - import warnings - warnings._deprecated("Wave_read.getmark", remove=(3, 15)) - raise Error('no marks') - def setpos(self, pos): if pos < 0 or pos > self._nframes: raise Error('position not in range') @@ -551,21 +537,6 @@ def getparams(self): return _wave_params(self._nchannels, self._sampwidth, self._framerate, self._nframes, self._comptype, self._compname) - def setmark(self, id, pos, name): - import warnings - warnings._deprecated("Wave_write.setmark", remove=(3, 15)) - raise Error('setmark() not supported') - - def getmark(self, id): - import warnings - warnings._deprecated("Wave_write.getmark", remove=(3, 15)) - raise Error('no marks') - - def getmarkers(self): - import warnings - warnings._deprecated("Wave_write.getmarkers", remove=(3, 15)) - return None - def tell(self): return self._nframeswritten diff --git a/Misc/NEWS.d/next/Library/2025-05-11-10-28-11.gh-issue-133873.H03nov.rst b/Misc/NEWS.d/next/Library/2025-05-11-10-28-11.gh-issue-133873.H03nov.rst new file mode 100644 index 00000000000000..79f630b2418e70 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-11-10-28-11.gh-issue-133873.H03nov.rst @@ -0,0 +1,3 @@ +Remove the deprecated ``getmark()``, ``setmark()`` and ``getmarkers()`` +methods of the :class:`~wave.Wave_read` and :class:`~wave.Wave_write` +classes, which were deprecated since Python 3.13. Patch by Bénédikt Tran. From 52a7a22a6b85a470e993b3399b0ee043e5c3596e Mon Sep 17 00:00:00 2001 From: Semyon Moroz Date: Thu, 15 May 2025 15:14:31 +0000 Subject: [PATCH 155/989] gh-77065: Use `putwch` instead of `putch` in `getpass.win_getpass` (#134058) --- Lib/getpass.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/getpass.py b/Lib/getpass.py index a4dbd2ea6502d4..1dd40e25e09068 100644 --- a/Lib/getpass.py +++ b/Lib/getpass.py @@ -119,9 +119,9 @@ def win_getpass(prompt='Password: ', stream=None, *, echo_char=None): raise KeyboardInterrupt if c == '\b': if echo_char and pw: - msvcrt.putch('\b') - msvcrt.putch(' ') - msvcrt.putch('\b') + msvcrt.putwch('\b') + msvcrt.putwch(' ') + msvcrt.putwch('\b') pw = pw[:-1] else: pw = pw + c From 20095fb29ac3fd386eae2189ed725571192ae0af Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Thu, 15 May 2025 22:12:10 +0200 Subject: [PATCH 156/989] Fix GetNamedPipeHandleStateW on non-desktop Windows API partitions (GH-134049) --- Python/fileutils.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/Python/fileutils.c b/Python/fileutils.c index 78603d40704f14..2a3f12d4e872f8 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2784,6 +2784,43 @@ _Py_set_blocking(int fd, int blocking) return -1; } #else /* MS_WINDOWS */ + +// The Windows Games API family doesn't expose GetNamedPipeHandleStateW so attempt +// to load it directly from the Kernel32.dll +#if !defined(MS_WINDOWS_APP) && !defined(MS_WINDOWS_SYSTEM) +BOOL +GetNamedPipeHandleStateW(HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount, + LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize) +{ + static int initialized = 0; + typedef BOOL(__stdcall* PGetNamedPipeHandleStateW) ( + HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount, + LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize); + static PGetNamedPipeHandleStateW _GetNamedPipeHandleStateW; + + if (initialized == 0) { + HMODULE api = LoadLibraryExW(L"Kernel32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); + if (api) { + _GetNamedPipeHandleStateW = (PGetNamedPipeHandleStateW)GetProcAddress( + api, "GetNamedPipeHandleStateW"); + } + else { + _GetNamedPipeHandleStateW = NULL; + } + initialized = 1; + } + + if (!_GetNamedPipeHandleStateW) { + SetLastError(E_NOINTERFACE); + return FALSE; + } + + return _GetNamedPipeHandleStateW( + hNamedPipe, lpState, lpCurInstances, lpMaxCollectionCount, lpCollectDataTimeout, lpUserName, nMaxUserNameSize + ); +} +#endif /* !MS_WINDOWS_APP && !MS_WINDOWS_SYSTEM */ + int _Py_get_blocking(int fd) { From 6a2296329117463fd09abc73656f1d7b48076100 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 15 May 2025 22:11:31 +0100 Subject: [PATCH 157/989] Improve the administrative install docs for Python Install Manager (GH-134066) --- Doc/using/windows.rst | 46 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 38b97a34fcda31..cd22148b531b84 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -98,8 +98,9 @@ does not affect any installs of Python runtimes. Uninstalling the Python install manager does not uninstall any Python runtimes. If you are not able to install an MSIX in your context, for example, you are -using automated deployment software that does not support it, please see -:ref:`pymanager-advancedinstall` below for more information. +using automated deployment software that does not support it, or are targeting +Windows Server 2019, please see :ref:`pymanager-advancedinstall` below for more +information. Basic Use @@ -515,6 +516,11 @@ per-machine installs to its default location in Program Files. It will attempt to modify the system :envvar:`PATH` environment variable to include this install location, but be sure to validate this on your configuration. +.. note:: + + Windows Server 2019 is the only version of Windows that CPython supports that + does not support MSIX. For Windows Server 2019, you should use the MSI. + Be aware that the MSI package does not bundle any runtimes, and so is not suitable for installs into offline environments without also creating an offline install index. See :ref:`pymanager-offline` and :ref:`pymanager-admin-config` @@ -535,13 +541,32 @@ depending on whether it was installed from python.org or through the Windows Store. Attempting to run the executable directly from Program Files is not recommended. -To programmatically install or uninstall the MSIX without using your -distribution platform's native support, the `Add-AppxPackage -`_ and -`Remove-AppxPackage `_ -PowerShell cmdlets are simplest to use: +To programmatically install the Python install manager, it is easiest to use +WinGet, which is included with all supported versions of Windows: -.. code:: +.. code-block:: powershell + + $> winget install 9NQ7512CXL7T -e --accept-package-agreements --disable-interactivity + + # Optionally run the configuration checker and accept all changes + $> py install --configure -y + +To download the Python install manager and install on another machine, the +following WinGet command will download the required files from the Store to your +Downloads directory (add ``-d `` to customize the output location). +This also generates a YAML file that appears to be unnecessary, as the +downloaded MSIX can be installed by launching or using the commands below. + +.. code-block:: powershell + + $> winget download 9NQ7512CXL7T -e --skip-license --accept-package-agreements --disable-interactivity + +To programmatically install or uninstall an MSIX using only PowerShell, the +`Add-AppxPackage `_ +and `Remove-AppxPackage `_ +PowerShell cmdlets are recommended: + +.. code-block:: powershell $> Add-AppxPackage C:\Downloads\python-manager-25.0.msix ... @@ -555,6 +580,11 @@ to install the Python install manager for all users from the MSIX package. Users will still need to install their own copies of Python itself, as there is no way to trigger those installs without being a logged in user. +Note that the MSIX downloadable from the Store and from the Python website are +subtly different and cannot be installed at the same time. Wherever possible, +we suggest using the above commands to download the package from the Store to +reduce the risk of setting up conflicting installs. + .. _pymanager-admin-config: Administrative Configuration From 7a504b3d5da98874536834481539c19ba4a265af Mon Sep 17 00:00:00 2001 From: AN Long Date: Fri, 16 May 2025 08:00:06 +0900 Subject: [PATCH 158/989] gh-130000: Release the GIL in winreg when doing Windows API calls (GH-130001) --- PC/winreg.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/PC/winreg.c b/PC/winreg.c index a8e2e4cc10c882..d1a1c3d1c97850 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -426,7 +426,9 @@ PyHKEY_Close(winreg_state *st, PyObject *ob_handle) if (PyHKEY_Check(st, ob_handle)) { ((PyHKEYObject*)ob_handle)->hkey = 0; } + Py_BEGIN_ALLOW_THREADS rc = key ? RegCloseKey(key) : ERROR_SUCCESS; + Py_END_ALLOW_THREADS if (rc != ERROR_SUCCESS) PyErr_SetFromWindowsErrWithFunction(rc, "RegCloseKey"); return rc == ERROR_SUCCESS; @@ -499,14 +501,21 @@ PyWinObject_CloseHKEY(winreg_state *st, PyObject *obHandle) } #if SIZEOF_LONG >= SIZEOF_HKEY else if (PyLong_Check(obHandle)) { - long rc = RegCloseKey((HKEY)PyLong_AsLong(obHandle)); + long rc; + Py_BEGIN_ALLOW_THREADS + rc = RegCloseKey((HKEY)PyLong_AsLong(obHandle)); + Py_END_ALLOW_THREADS ok = (rc == ERROR_SUCCESS); if (!ok) PyErr_SetFromWindowsErrWithFunction(rc, "RegCloseKey"); } #else else if (PyLong_Check(obHandle)) { - long rc = RegCloseKey((HKEY)PyLong_AsVoidPtr(obHandle)); + long rc; + HKEY hkey = (HKEY)PyLong_AsVoidPtr(obHandle); + Py_BEGIN_ALLOW_THREADS + rc = RegCloseKey(hkey); + Py_END_ALLOW_THREADS ok = (rc == ERROR_SUCCESS); if (!ok) PyErr_SetFromWindowsErrWithFunction(rc, "RegCloseKey"); @@ -924,7 +933,9 @@ winreg_CreateKey_impl(PyObject *module, HKEY key, const wchar_t *sub_key) (Py_ssize_t)KEY_WRITE) < 0) { return NULL; } + Py_BEGIN_ALLOW_THREADS rc = RegCreateKeyW(key, sub_key, &retKey); + Py_END_ALLOW_THREADS if (rc != ERROR_SUCCESS) { PyErr_SetFromWindowsErrWithFunction(rc, "CreateKey"); return NULL; @@ -973,8 +984,10 @@ winreg_CreateKeyEx_impl(PyObject *module, HKEY key, const wchar_t *sub_key, (Py_ssize_t)access) < 0) { return NULL; } + Py_BEGIN_ALLOW_THREADS rc = RegCreateKeyExW(key, sub_key, reserved, NULL, 0, access, NULL, &retKey, NULL); + Py_END_ALLOW_THREADS if (rc != ERROR_SUCCESS) { PyErr_SetFromWindowsErrWithFunction(rc, "CreateKeyEx"); return NULL; @@ -1187,10 +1200,12 @@ winreg_EnumValue_impl(PyObject *module, HKEY key, int index) (Py_ssize_t)key, index) < 0) { return NULL; } - if ((rc = RegQueryInfoKeyW(key, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, - &retValueSize, &retDataSize, NULL, NULL)) - != ERROR_SUCCESS) + + Py_BEGIN_ALLOW_THREADS + rc = RegQueryInfoKeyW(key, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &retValueSize, &retDataSize, NULL, NULL); + Py_END_ALLOW_THREADS + if (rc != ERROR_SUCCESS) return PyErr_SetFromWindowsErrWithFunction(rc, "RegQueryInfoKey"); ++retValueSize; /* include null terminators */ @@ -1477,9 +1492,11 @@ winreg_QueryInfoKey_impl(PyObject *module, HKEY key) if (PySys_Audit("winreg.QueryInfoKey", "n", (Py_ssize_t)key) < 0) { return NULL; } - if ((rc = RegQueryInfoKeyW(key, NULL, NULL, 0, &nSubKeys, NULL, NULL, - &nValues, NULL, NULL, NULL, &ft)) - != ERROR_SUCCESS) { + Py_BEGIN_ALLOW_THREADS + rc = RegQueryInfoKeyW(key, NULL, NULL, 0, &nSubKeys, NULL, NULL, + &nValues, NULL, NULL, NULL, &ft); + Py_END_ALLOW_THREADS + if (rc != ERROR_SUCCESS) { return PyErr_SetFromWindowsErrWithFunction(rc, "RegQueryInfoKey"); } li.LowPart = ft.dwLowDateTime; @@ -1587,7 +1604,9 @@ winreg_QueryValue_impl(PyObject *module, HKEY key, const wchar_t *sub_key) PyMem_Free(pbuf); } if (childKey != key) { + Py_BEGIN_ALLOW_THREADS RegCloseKey(childKey); + Py_END_ALLOW_THREADS } return result; } @@ -1625,7 +1644,9 @@ winreg_QueryValueEx_impl(PyObject *module, HKEY key, const wchar_t *name) (Py_ssize_t)key, NULL, name) < 0) { return NULL; } + Py_BEGIN_ALLOW_THREADS rc = RegQueryValueExW(key, name, NULL, NULL, NULL, &bufSize); + Py_END_ALLOW_THREADS if (rc == ERROR_MORE_DATA) bufSize = 256; else if (rc != ERROR_SUCCESS) @@ -1637,8 +1658,10 @@ winreg_QueryValueEx_impl(PyObject *module, HKEY key, const wchar_t *name) while (1) { retSize = bufSize; + Py_BEGIN_ALLOW_THREADS rc = RegQueryValueExW(key, name, NULL, &typ, (BYTE *)retBuf, &retSize); + Py_END_ALLOW_THREADS if (rc != ERROR_MORE_DATA) break; From 1566c34dc76ec6139e6827fbab6d76e084a63d9d Mon Sep 17 00:00:00 2001 From: aeiouaeiouaeiouaeiouaeiouaeiou Date: Fri, 16 May 2025 13:23:11 +0300 Subject: [PATCH 159/989] gh-134069: bump HACL* revision to incoporate `memset_s` (#134027) Bumps the HACL* revision to include recent revisions that corrects issues building with legacy/cross-platform macOS SDKs. Signed-off-by: aeiouaeiouaeiouaeiouaeiouaeiou --- Misc/sbom.spdx.json | 52 +- Modules/_hacl/Hacl_Hash_Blake2b.c | 52 +- Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c | 52 +- Modules/_hacl/Hacl_Hash_Blake2s.c | 52 +- Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c | 52 +- Modules/_hacl/Hacl_Hash_MD5.c | 523 +++++++----------- Modules/_hacl/Hacl_Hash_SHA1.c | 12 +- Modules/_hacl/Hacl_Hash_SHA2.c | 42 +- Modules/_hacl/Hacl_Hash_SHA3.c | 22 +- Modules/_hacl/Hacl_Streaming_HMAC.c | 127 ++--- Modules/_hacl/Lib_Memzero0.c | 20 +- .../include/krml/FStar_UInt128_Verified.h | 45 +- Modules/_hacl/refresh.sh | 2 +- 13 files changed, 405 insertions(+), 648 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 4a697d047ca6e4..10a5c646d2a59e 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -314,11 +314,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "4b6e7696e8d84f322fb24b1fbb08ccb9b0e7d51b" + "checksumValue": "808af7ff8a2cb2b4ef3a9ce3dbfef58d90828c9f" }, { "algorithm": "SHA256", - "checksumValue": "50a65a34a7a7569eedf7fa864a7892eeee5840a7fdf6fa8f1e87d42c65f6c877" + "checksumValue": "6a492aa586f2d10b1b300ce8ce4c72c976ff7548fee667aded2253f99969ac87" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b.c" @@ -342,11 +342,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "0f75e44a42775247a46acc2beaa6bae8f199a3d9" + "checksumValue": "1bb072f2be9e5d194274fdcc87825cb094e4b32e" }, { "algorithm": "SHA256", - "checksumValue": "03b612c24193464ed6848aeebbf44f9266b78ec6eed2486056211cde8992c49a" + "checksumValue": "9eb22953ce60dde9dc970fec9dfce9d94235f4b7ccd8f0151cad4707dc835d1d" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c" @@ -384,11 +384,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "65bf44140691b046dcfed3ab1576dbf8bbf96dc5" + "checksumValue": "cdd6e9ca86dbede92d1a47b9224d2af70c599a71" }, { "algorithm": "SHA256", - "checksumValue": "0f98959dafffce039ade9d296f7a05bed151c9c512498f48e4b326a5523a240b" + "checksumValue": "f3204f3e60734d811b6630f879b69ce54eaf367f3fca5889c1026e7a1f3ee1e4" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s.c" @@ -412,11 +412,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "0da9782455923aede8d8dce9dfdc38f4fc1de572" + "checksumValue": "4056bb6e3ed184400d1610bdfd4260b3fd05ccbb" }, { "algorithm": "SHA256", - "checksumValue": "2d17ae768fd3d7d6decddd8b4aaf23ce02a809ee62bb98da32c8a7f54acf92d0" + "checksumValue": "c93746df2f219cbb1634ee6fb0ab1c4cbd381d1f36c637114c68346c9935326d" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c" @@ -454,11 +454,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "38e8d96ef1879480780494058a93cec181f8d6d7" + "checksumValue": "61f678cd9234c6eab5d4409ae66f470b068b959b" }, { "algorithm": "SHA256", - "checksumValue": "61e77d2063cf60c96e9ce06af215efe5d42c43026833bffed5732326fe97ed1e" + "checksumValue": "5b91ed0339074e2e546119833398e2cdb7190c33a59c405bf43b2417c789547d" } ], "fileName": "Modules/_hacl/Hacl_Hash_MD5.c" @@ -482,11 +482,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "986dd5ba0b34d15f3e5e5c656979aea1b502e8aa" + "checksumValue": "c7fc5c9721caf37c5a24c9beff27b0ac2ed68cc9" }, { "algorithm": "SHA256", - "checksumValue": "38d5f1f2e67a0eb30789f81fc56c07a6e7246e2b1be6c65485bcca1dcd0e0806" + "checksumValue": "ce08721d491f3b8a9bd4cde6c27bfcc8fc01471512ccca4bd3c0b764cb551d29" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA1.c" @@ -510,11 +510,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "f732a6710fe3e13cd28130f0f20504e347d1c412" + "checksumValue": "fffe8c4f67669ac8ccd87c2e0f95db2427481df1" }, { "algorithm": "SHA256", - "checksumValue": "86cf32e4d1f3ba93a94108271923fdafe2204447792a918acf4a2250f352dbde" + "checksumValue": "f8af382de7e29a73c726f9c70770498ddd99e2c4702489ed6e634f0b68597c95" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA2.c" @@ -538,11 +538,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "50f75337b31f509b5bfcc7ebb3d066b82a0f1b33" + "checksumValue": "77d3d879dfa5949030bca0e8ee75d3d369ec54a7" }, { "algorithm": "SHA256", - "checksumValue": "c9e1442899e5b902fa39f413f1a3131f7ab5c2283d5100dc8ac675a7d5ebbdf1" + "checksumValue": "8744f5b6e054c3e5c44f413a60f9154b76dd7230135dcee26fd063755ec64be1" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA3.c" @@ -566,11 +566,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "8140310f505bb2619a749777a91487d666237bcf" + "checksumValue": "417e68ac8498cb2f93a06a19003ea1cc3f0f6753" }, { "algorithm": "SHA256", - "checksumValue": "9d95e6a651c22185d9b7c38f363d30159f810e6fcdc2208f29492837ed891e82" + "checksumValue": "843db4bba78a476d4d53dabe4eed5c9235f490ccc9fdaf19e22af488af858920" } ], "fileName": "Modules/_hacl/Hacl_Streaming_HMAC.c" @@ -608,11 +608,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c9651ef21479c4d8a3b04c5baa1902866dbb1cdf" + "checksumValue": "0a0b7f3714167ad45ddf5a6a48d76f525a119c9c" }, { "algorithm": "SHA256", - "checksumValue": "e039c82ba670606ca111573942baad800f75da467abbc74cd7d1fe175ebcdfaf" + "checksumValue": "135d4afb4812468885c963c9c87a55ba5fae9181df4431af5fbad08588dda229" } ], "fileName": "Modules/_hacl/Lib_Memzero0.c" @@ -622,11 +622,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "eaa543c778300238dc23034aafeada0951154af1" + "checksumValue": "911c97a0f24067635b164fbca49da76055f192cd" }, { "algorithm": "SHA256", - "checksumValue": "3fd2552d527a23110d61ad2811c774810efb1eaee008f136c2a0d609daa77f5b" + "checksumValue": "972b5111ebada8e11dd60df3119da1af505fd3e0b6c782ead6cab7f1daf134f1" } ], "fileName": "Modules/_hacl/include/krml/FStar_UInt128_Verified.h" @@ -1752,14 +1752,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "02dfcf0c79d488b120d7f2c2a0f9206301c7927ed5106545e0b6f2aef88da76a" + "checksumValue": "39f6fd4f2fe98aecc995a2fd980fae0e0835cef6e563ebbf25f69d3d3102bafd" } ], - "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/7720f6d4fc0468a99d5ea6120976bcc271e42727.zip", + "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/4ef25b547b377dcef855db4289c6a00580e7221c.zip", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:7720f6d4fc0468a99d5ea6120976bcc271e42727:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:4ef25b547b377dcef855db4289c6a00580e7221c:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1767,7 +1767,7 @@ "name": "hacl-star", "originator": "Organization: HACL* Developers", "primaryPackagePurpose": "SOURCE", - "versionInfo": "7720f6d4fc0468a99d5ea6120976bcc271e42727" + "versionInfo": "4ef25b547b377dcef855db4289c6a00580e7221c" }, { "SPDXID": "SPDXRef-PACKAGE-macholib", diff --git a/Modules/_hacl/Hacl_Hash_Blake2b.c b/Modules/_hacl/Hacl_Hash_Blake2b.c index 21ab2b88c799a6..a5b75d61798949 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b.c @@ -544,11 +544,9 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn) uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk1 - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk1 << 8U ^ + ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); tmp[1U] = p.node_offset; tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; tmp[3U] = 0ULL; @@ -860,14 +858,10 @@ static Hacl_Hash_Blake2b_state_t uint64_t x = r4; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ - ((uint64_t)pv.fanout - << 16U - ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ + ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -1059,11 +1053,9 @@ static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params uint64_t x = r; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -1200,8 +1192,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state1, @@ -1265,8 +1256,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state1, @@ -1296,8 +1286,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state10, @@ -1359,8 +1348,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state1, @@ -1690,14 +1678,10 @@ Hacl_Hash_Blake2b_hash_with_key_and_params( uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ - ((uint64_t)params.fanout - << 16U - ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); + (uint64_t)nn ^ + ((uint64_t)kk << 8U ^ + ((uint64_t)params.fanout << 16U ^ + ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); tmp[1U] = params.node_offset; tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U; tmp[3U] = 0ULL; diff --git a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c index c4d9b4a689d28f..f955f1c6115b1e 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c @@ -274,11 +274,9 @@ Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t k uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk1 - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk1 << 8U ^ + ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); tmp[1U] = p.node_offset; tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; tmp[3U] = 0ULL; @@ -746,14 +744,10 @@ static Hacl_Hash_Blake2b_Simd256_state_t uint64_t x = r4; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ - ((uint64_t)pv.fanout - << 16U - ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ + ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -936,11 +930,9 @@ reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and uint64_t x = r; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -1075,8 +1067,7 @@ Hacl_Hash_Blake2b_Simd256_update( uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state1, @@ -1140,8 +1131,7 @@ Hacl_Hash_Blake2b_Simd256_update( nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state1, @@ -1171,8 +1161,7 @@ Hacl_Hash_Blake2b_Simd256_update( uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state10, @@ -1234,8 +1223,7 @@ Hacl_Hash_Blake2b_Simd256_update( nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state1, @@ -1578,14 +1566,10 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params( uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ - ((uint64_t)params.fanout - << 16U - ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); + (uint64_t)nn ^ + ((uint64_t)kk << 8U ^ + ((uint64_t)params.fanout << 16U ^ + ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); tmp[1U] = params.node_offset; tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U; tmp[3U] = 0ULL; diff --git a/Modules/_hacl/Hacl_Hash_Blake2s.c b/Modules/_hacl/Hacl_Hash_Blake2s.c index 730ba135afb2fb..0d4fcc7f3951fc 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s.c @@ -543,13 +543,13 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn) uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)(uint8_t)nn - ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + (uint32_t)(uint8_t)nn ^ + ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); tmp[1U] = p.leaf_length; tmp[2U] = (uint32_t)p.node_offset; tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + (uint32_t)(p.node_offset >> 32U) ^ + ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -846,16 +846,14 @@ static Hacl_Hash_Blake2s_state_t uint32_t x = r4; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ - ((uint32_t)pv.key_length - << 8U - ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ + ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -1042,13 +1040,13 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params uint32_t x = r; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -1182,8 +1180,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state1, @@ -1237,8 +1234,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state1, @@ -1268,8 +1264,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state10, @@ -1321,8 +1316,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state1, @@ -1639,16 +1633,14 @@ Hacl_Hash_Blake2s_hash_with_key_and_params( uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)params.digest_length - ^ - ((uint32_t)params.key_length - << 8U - ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); + (uint32_t)params.digest_length ^ + ((uint32_t)params.key_length << 8U ^ + ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); tmp[1U] = params.leaf_length; tmp[2U] = (uint32_t)params.node_offset; tmp[3U] = - (uint32_t)(params.node_offset >> 32U) - ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); + (uint32_t)(params.node_offset >> 32U) ^ + ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; diff --git a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c index 7e9cd79544f8f1..fa46be045f3441 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c @@ -271,13 +271,13 @@ Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t k uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)(uint8_t)nn - ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + (uint32_t)(uint8_t)nn ^ + ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); tmp[1U] = p.leaf_length; tmp[2U] = (uint32_t)p.node_offset; tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + (uint32_t)(p.node_offset >> 32U) ^ + ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -736,16 +736,14 @@ static Hacl_Hash_Blake2s_Simd128_state_t uint32_t x = r4; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ - ((uint32_t)pv.key_length - << 8U - ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ + ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -923,13 +921,13 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and uint32_t x = r; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -1061,8 +1059,7 @@ Hacl_Hash_Blake2s_Simd128_update( uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state1, @@ -1116,8 +1113,7 @@ Hacl_Hash_Blake2s_Simd128_update( Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state1, @@ -1147,8 +1143,7 @@ Hacl_Hash_Blake2s_Simd128_update( uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state10, @@ -1200,8 +1195,7 @@ Hacl_Hash_Blake2s_Simd128_update( Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state1, @@ -1531,16 +1525,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params( uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)params.digest_length - ^ - ((uint32_t)params.key_length - << 8U - ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); + (uint32_t)params.digest_length ^ + ((uint32_t)params.key_length << 8U ^ + ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); tmp[1U] = params.leaf_length; tmp[2U] = (uint32_t)params.node_offset; tmp[3U] = - (uint32_t)(params.node_offset >> 32U) - ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); + (uint32_t)(params.node_offset >> 32U) ^ + ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; diff --git a/Modules/_hacl/Hacl_Hash_MD5.c b/Modules/_hacl/Hacl_Hash_MD5.c index 75ce8d2926e6e1..305c75483c06ea 100644 --- a/Modules/_hacl/Hacl_Hash_MD5.c +++ b/Modules/_hacl/Hacl_Hash_MD5.c @@ -66,11 +66,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti0 = _t[0U]; uint32_t v = - vb0 - + - ((va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) - << 7U - | (va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) >> 25U); + vb0 + + ((va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) << 7U | + (va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) >> 25U); abcd[0U] = v; uint32_t va0 = abcd[3U]; uint32_t vb1 = abcd[0U]; @@ -82,11 +80,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti1 = _t[1U]; uint32_t v0 = - vb1 - + - ((va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) - << 12U - | (va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) >> 20U); + vb1 + + ((va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) << 12U | + (va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) >> 20U); abcd[3U] = v0; uint32_t va1 = abcd[2U]; uint32_t vb2 = abcd[3U]; @@ -98,11 +94,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti2 = _t[2U]; uint32_t v1 = - vb2 - + - ((va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) - << 17U - | (va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) >> 15U); + vb2 + + ((va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) << 17U | + (va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) >> 15U); abcd[2U] = v1; uint32_t va2 = abcd[1U]; uint32_t vb3 = abcd[2U]; @@ -114,11 +108,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti3 = _t[3U]; uint32_t v2 = - vb3 - + - ((va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) - << 22U - | (va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) >> 10U); + vb3 + + ((va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) << 22U | + (va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) >> 10U); abcd[1U] = v2; uint32_t va3 = abcd[0U]; uint32_t vb4 = abcd[1U]; @@ -130,11 +122,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti4 = _t[4U]; uint32_t v3 = - vb4 - + - ((va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) - << 7U - | (va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) >> 25U); + vb4 + + ((va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) << 7U | + (va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) >> 25U); abcd[0U] = v3; uint32_t va4 = abcd[3U]; uint32_t vb5 = abcd[0U]; @@ -146,11 +136,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti5 = _t[5U]; uint32_t v4 = - vb5 - + - ((va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) - << 12U - | (va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) >> 20U); + vb5 + + ((va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) << 12U | + (va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) >> 20U); abcd[3U] = v4; uint32_t va5 = abcd[2U]; uint32_t vb6 = abcd[3U]; @@ -162,11 +150,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti6 = _t[6U]; uint32_t v5 = - vb6 - + - ((va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) - << 17U - | (va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) >> 15U); + vb6 + + ((va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) << 17U | + (va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) >> 15U); abcd[2U] = v5; uint32_t va6 = abcd[1U]; uint32_t vb7 = abcd[2U]; @@ -178,11 +164,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti7 = _t[7U]; uint32_t v6 = - vb7 - + - ((va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) - << 22U - | (va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) >> 10U); + vb7 + + ((va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) << 22U | + (va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) >> 10U); abcd[1U] = v6; uint32_t va7 = abcd[0U]; uint32_t vb8 = abcd[1U]; @@ -194,11 +178,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti8 = _t[8U]; uint32_t v7 = - vb8 - + - ((va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) - << 7U - | (va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) >> 25U); + vb8 + + ((va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) << 7U | + (va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) >> 25U); abcd[0U] = v7; uint32_t va8 = abcd[3U]; uint32_t vb9 = abcd[0U]; @@ -210,11 +192,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti9 = _t[9U]; uint32_t v8 = - vb9 - + - ((va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) - << 12U - | (va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) >> 20U); + vb9 + + ((va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) << 12U | + (va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) >> 20U); abcd[3U] = v8; uint32_t va9 = abcd[2U]; uint32_t vb10 = abcd[3U]; @@ -226,11 +206,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti10 = _t[10U]; uint32_t v9 = - vb10 - + - ((va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) - << 17U - | (va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) >> 15U); + vb10 + + ((va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) << 17U | + (va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) >> 15U); abcd[2U] = v9; uint32_t va10 = abcd[1U]; uint32_t vb11 = abcd[2U]; @@ -242,11 +220,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti11 = _t[11U]; uint32_t v10 = - vb11 - + - ((va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) - << 22U - | (va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) >> 10U); + vb11 + + ((va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) << 22U | + (va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) >> 10U); abcd[1U] = v10; uint32_t va11 = abcd[0U]; uint32_t vb12 = abcd[1U]; @@ -258,11 +234,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti12 = _t[12U]; uint32_t v11 = - vb12 - + - ((va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) - << 7U - | (va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) >> 25U); + vb12 + + ((va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) << 7U | + (va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) >> 25U); abcd[0U] = v11; uint32_t va12 = abcd[3U]; uint32_t vb13 = abcd[0U]; @@ -274,11 +248,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti13 = _t[13U]; uint32_t v12 = - vb13 - + - ((va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) - << 12U - | (va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) >> 20U); + vb13 + + ((va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) << 12U | + (va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) >> 20U); abcd[3U] = v12; uint32_t va13 = abcd[2U]; uint32_t vb14 = abcd[3U]; @@ -290,11 +262,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti14 = _t[14U]; uint32_t v13 = - vb14 - + - ((va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) - << 17U - | (va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) >> 15U); + vb14 + + ((va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) << 17U | + (va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) >> 15U); abcd[2U] = v13; uint32_t va14 = abcd[1U]; uint32_t vb15 = abcd[2U]; @@ -306,11 +276,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti15 = _t[15U]; uint32_t v14 = - vb15 - + - ((va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) - << 22U - | (va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) >> 10U); + vb15 + + ((va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) << 22U | + (va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) >> 10U); abcd[1U] = v14; uint32_t va15 = abcd[0U]; uint32_t vb16 = abcd[1U]; @@ -322,11 +290,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti16 = _t[16U]; uint32_t v15 = - vb16 - + - ((va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) - << 5U - | (va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) >> 27U); + vb16 + + ((va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) << 5U | + (va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) >> 27U); abcd[0U] = v15; uint32_t va16 = abcd[3U]; uint32_t vb17 = abcd[0U]; @@ -338,11 +304,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti17 = _t[17U]; uint32_t v16 = - vb17 - + - ((va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) - << 9U - | (va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) >> 23U); + vb17 + + ((va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) << 9U | + (va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) >> 23U); abcd[3U] = v16; uint32_t va17 = abcd[2U]; uint32_t vb18 = abcd[3U]; @@ -354,11 +318,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti18 = _t[18U]; uint32_t v17 = - vb18 - + - ((va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) - << 14U - | (va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) >> 18U); + vb18 + + ((va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) << 14U | + (va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) >> 18U); abcd[2U] = v17; uint32_t va18 = abcd[1U]; uint32_t vb19 = abcd[2U]; @@ -370,11 +332,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti19 = _t[19U]; uint32_t v18 = - vb19 - + - ((va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) - << 20U - | (va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) >> 12U); + vb19 + + ((va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) << 20U | + (va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) >> 12U); abcd[1U] = v18; uint32_t va19 = abcd[0U]; uint32_t vb20 = abcd[1U]; @@ -386,11 +346,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti20 = _t[20U]; uint32_t v19 = - vb20 - + - ((va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) - << 5U - | (va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) >> 27U); + vb20 + + ((va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) << 5U | + (va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) >> 27U); abcd[0U] = v19; uint32_t va20 = abcd[3U]; uint32_t vb21 = abcd[0U]; @@ -402,11 +360,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti21 = _t[21U]; uint32_t v20 = - vb21 - + - ((va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) - << 9U - | (va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) >> 23U); + vb21 + + ((va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) << 9U | + (va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) >> 23U); abcd[3U] = v20; uint32_t va21 = abcd[2U]; uint32_t vb22 = abcd[3U]; @@ -418,11 +374,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti22 = _t[22U]; uint32_t v21 = - vb22 - + - ((va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) - << 14U - | (va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) >> 18U); + vb22 + + ((va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) << 14U | + (va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) >> 18U); abcd[2U] = v21; uint32_t va22 = abcd[1U]; uint32_t vb23 = abcd[2U]; @@ -434,11 +388,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti23 = _t[23U]; uint32_t v22 = - vb23 - + - ((va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) - << 20U - | (va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) >> 12U); + vb23 + + ((va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) << 20U | + (va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) >> 12U); abcd[1U] = v22; uint32_t va23 = abcd[0U]; uint32_t vb24 = abcd[1U]; @@ -450,11 +402,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti24 = _t[24U]; uint32_t v23 = - vb24 - + - ((va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) - << 5U - | (va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) >> 27U); + vb24 + + ((va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) << 5U | + (va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) >> 27U); abcd[0U] = v23; uint32_t va24 = abcd[3U]; uint32_t vb25 = abcd[0U]; @@ -466,11 +416,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti25 = _t[25U]; uint32_t v24 = - vb25 - + - ((va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) - << 9U - | (va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) >> 23U); + vb25 + + ((va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) << 9U | + (va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) >> 23U); abcd[3U] = v24; uint32_t va25 = abcd[2U]; uint32_t vb26 = abcd[3U]; @@ -482,11 +430,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti26 = _t[26U]; uint32_t v25 = - vb26 - + - ((va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) - << 14U - | (va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) >> 18U); + vb26 + + ((va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) << 14U | + (va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) >> 18U); abcd[2U] = v25; uint32_t va26 = abcd[1U]; uint32_t vb27 = abcd[2U]; @@ -498,11 +444,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti27 = _t[27U]; uint32_t v26 = - vb27 - + - ((va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) - << 20U - | (va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) >> 12U); + vb27 + + ((va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) << 20U | + (va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) >> 12U); abcd[1U] = v26; uint32_t va27 = abcd[0U]; uint32_t vb28 = abcd[1U]; @@ -514,11 +458,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti28 = _t[28U]; uint32_t v27 = - vb28 - + - ((va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) - << 5U - | (va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) >> 27U); + vb28 + + ((va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) << 5U | + (va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) >> 27U); abcd[0U] = v27; uint32_t va28 = abcd[3U]; uint32_t vb29 = abcd[0U]; @@ -530,11 +472,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti29 = _t[29U]; uint32_t v28 = - vb29 - + - ((va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) - << 9U - | (va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) >> 23U); + vb29 + + ((va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) << 9U | + (va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) >> 23U); abcd[3U] = v28; uint32_t va29 = abcd[2U]; uint32_t vb30 = abcd[3U]; @@ -546,11 +486,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti30 = _t[30U]; uint32_t v29 = - vb30 - + - ((va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) - << 14U - | (va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) >> 18U); + vb30 + + ((va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) << 14U | + (va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) >> 18U); abcd[2U] = v29; uint32_t va30 = abcd[1U]; uint32_t vb31 = abcd[2U]; @@ -562,11 +500,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti31 = _t[31U]; uint32_t v30 = - vb31 - + - ((va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) - << 20U - | (va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) >> 12U); + vb31 + + ((va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) << 20U | + (va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) >> 12U); abcd[1U] = v30; uint32_t va31 = abcd[0U]; uint32_t vb32 = abcd[1U]; @@ -578,11 +514,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti32 = _t[32U]; uint32_t v31 = - vb32 - + - ((va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) - << 4U - | (va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) >> 28U); + vb32 + + ((va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) << 4U | + (va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) >> 28U); abcd[0U] = v31; uint32_t va32 = abcd[3U]; uint32_t vb33 = abcd[0U]; @@ -594,11 +528,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti33 = _t[33U]; uint32_t v32 = - vb33 - + - ((va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) - << 11U - | (va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) >> 21U); + vb33 + + ((va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) << 11U | + (va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) >> 21U); abcd[3U] = v32; uint32_t va33 = abcd[2U]; uint32_t vb34 = abcd[3U]; @@ -610,11 +542,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti34 = _t[34U]; uint32_t v33 = - vb34 - + - ((va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) - << 16U - | (va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) >> 16U); + vb34 + + ((va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) << 16U | + (va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) >> 16U); abcd[2U] = v33; uint32_t va34 = abcd[1U]; uint32_t vb35 = abcd[2U]; @@ -626,11 +556,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti35 = _t[35U]; uint32_t v34 = - vb35 - + - ((va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) - << 23U - | (va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) >> 9U); + vb35 + + ((va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) << 23U | + (va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) >> 9U); abcd[1U] = v34; uint32_t va35 = abcd[0U]; uint32_t vb36 = abcd[1U]; @@ -642,11 +570,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti36 = _t[36U]; uint32_t v35 = - vb36 - + - ((va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) - << 4U - | (va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) >> 28U); + vb36 + + ((va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) << 4U | + (va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) >> 28U); abcd[0U] = v35; uint32_t va36 = abcd[3U]; uint32_t vb37 = abcd[0U]; @@ -658,11 +584,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti37 = _t[37U]; uint32_t v36 = - vb37 - + - ((va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) - << 11U - | (va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) >> 21U); + vb37 + + ((va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) << 11U | + (va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) >> 21U); abcd[3U] = v36; uint32_t va37 = abcd[2U]; uint32_t vb38 = abcd[3U]; @@ -674,11 +598,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti38 = _t[38U]; uint32_t v37 = - vb38 - + - ((va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) - << 16U - | (va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) >> 16U); + vb38 + + ((va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) << 16U | + (va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) >> 16U); abcd[2U] = v37; uint32_t va38 = abcd[1U]; uint32_t vb39 = abcd[2U]; @@ -690,11 +612,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti39 = _t[39U]; uint32_t v38 = - vb39 - + - ((va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) - << 23U - | (va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) >> 9U); + vb39 + + ((va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) << 23U | + (va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) >> 9U); abcd[1U] = v38; uint32_t va39 = abcd[0U]; uint32_t vb40 = abcd[1U]; @@ -706,11 +626,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti40 = _t[40U]; uint32_t v39 = - vb40 - + - ((va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) - << 4U - | (va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) >> 28U); + vb40 + + ((va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) << 4U | + (va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) >> 28U); abcd[0U] = v39; uint32_t va40 = abcd[3U]; uint32_t vb41 = abcd[0U]; @@ -722,11 +640,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti41 = _t[41U]; uint32_t v40 = - vb41 - + - ((va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) - << 11U - | (va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) >> 21U); + vb41 + + ((va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) << 11U | + (va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) >> 21U); abcd[3U] = v40; uint32_t va41 = abcd[2U]; uint32_t vb42 = abcd[3U]; @@ -738,11 +654,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti42 = _t[42U]; uint32_t v41 = - vb42 - + - ((va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) - << 16U - | (va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) >> 16U); + vb42 + + ((va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) << 16U | + (va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) >> 16U); abcd[2U] = v41; uint32_t va42 = abcd[1U]; uint32_t vb43 = abcd[2U]; @@ -754,11 +668,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti43 = _t[43U]; uint32_t v42 = - vb43 - + - ((va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) - << 23U - | (va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) >> 9U); + vb43 + + ((va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) << 23U | + (va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) >> 9U); abcd[1U] = v42; uint32_t va43 = abcd[0U]; uint32_t vb44 = abcd[1U]; @@ -770,11 +682,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti44 = _t[44U]; uint32_t v43 = - vb44 - + - ((va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) - << 4U - | (va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) >> 28U); + vb44 + + ((va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) << 4U | + (va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) >> 28U); abcd[0U] = v43; uint32_t va44 = abcd[3U]; uint32_t vb45 = abcd[0U]; @@ -786,11 +696,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti45 = _t[45U]; uint32_t v44 = - vb45 - + - ((va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) - << 11U - | (va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) >> 21U); + vb45 + + ((va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) << 11U | + (va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) >> 21U); abcd[3U] = v44; uint32_t va45 = abcd[2U]; uint32_t vb46 = abcd[3U]; @@ -802,11 +710,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti46 = _t[46U]; uint32_t v45 = - vb46 - + - ((va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) - << 16U - | (va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) >> 16U); + vb46 + + ((va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) << 16U | + (va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) >> 16U); abcd[2U] = v45; uint32_t va46 = abcd[1U]; uint32_t vb47 = abcd[2U]; @@ -818,11 +724,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti47 = _t[47U]; uint32_t v46 = - vb47 - + - ((va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) - << 23U - | (va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) >> 9U); + vb47 + + ((va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) << 23U | + (va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) >> 9U); abcd[1U] = v46; uint32_t va47 = abcd[0U]; uint32_t vb48 = abcd[1U]; @@ -834,11 +738,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti48 = _t[48U]; uint32_t v47 = - vb48 - + - ((va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) - << 6U - | (va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) >> 26U); + vb48 + + ((va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) << 6U | + (va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) >> 26U); abcd[0U] = v47; uint32_t va48 = abcd[3U]; uint32_t vb49 = abcd[0U]; @@ -850,11 +752,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti49 = _t[49U]; uint32_t v48 = - vb49 - + - ((va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) - << 10U - | (va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) >> 22U); + vb49 + + ((va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) << 10U | + (va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) >> 22U); abcd[3U] = v48; uint32_t va49 = abcd[2U]; uint32_t vb50 = abcd[3U]; @@ -866,11 +766,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti50 = _t[50U]; uint32_t v49 = - vb50 - + - ((va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) - << 15U - | (va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) >> 17U); + vb50 + + ((va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) << 15U | + (va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) >> 17U); abcd[2U] = v49; uint32_t va50 = abcd[1U]; uint32_t vb51 = abcd[2U]; @@ -882,11 +780,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti51 = _t[51U]; uint32_t v50 = - vb51 - + - ((va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) - << 21U - | (va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) >> 11U); + vb51 + + ((va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) << 21U | + (va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) >> 11U); abcd[1U] = v50; uint32_t va51 = abcd[0U]; uint32_t vb52 = abcd[1U]; @@ -898,11 +794,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti52 = _t[52U]; uint32_t v51 = - vb52 - + - ((va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) - << 6U - | (va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) >> 26U); + vb52 + + ((va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) << 6U | + (va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) >> 26U); abcd[0U] = v51; uint32_t va52 = abcd[3U]; uint32_t vb53 = abcd[0U]; @@ -914,11 +808,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti53 = _t[53U]; uint32_t v52 = - vb53 - + - ((va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) - << 10U - | (va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) >> 22U); + vb53 + + ((va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) << 10U | + (va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) >> 22U); abcd[3U] = v52; uint32_t va53 = abcd[2U]; uint32_t vb54 = abcd[3U]; @@ -930,11 +822,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti54 = _t[54U]; uint32_t v53 = - vb54 - + - ((va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) - << 15U - | (va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) >> 17U); + vb54 + + ((va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) << 15U | + (va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) >> 17U); abcd[2U] = v53; uint32_t va54 = abcd[1U]; uint32_t vb55 = abcd[2U]; @@ -946,11 +836,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti55 = _t[55U]; uint32_t v54 = - vb55 - + - ((va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) - << 21U - | (va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) >> 11U); + vb55 + + ((va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) << 21U | + (va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) >> 11U); abcd[1U] = v54; uint32_t va55 = abcd[0U]; uint32_t vb56 = abcd[1U]; @@ -962,11 +850,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti56 = _t[56U]; uint32_t v55 = - vb56 - + - ((va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) - << 6U - | (va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) >> 26U); + vb56 + + ((va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) << 6U | + (va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) >> 26U); abcd[0U] = v55; uint32_t va56 = abcd[3U]; uint32_t vb57 = abcd[0U]; @@ -978,11 +864,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti57 = _t[57U]; uint32_t v56 = - vb57 - + - ((va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) - << 10U - | (va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) >> 22U); + vb57 + + ((va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) << 10U | + (va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) >> 22U); abcd[3U] = v56; uint32_t va57 = abcd[2U]; uint32_t vb58 = abcd[3U]; @@ -994,11 +878,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti58 = _t[58U]; uint32_t v57 = - vb58 - + - ((va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) - << 15U - | (va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) >> 17U); + vb58 + + ((va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) << 15U | + (va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) >> 17U); abcd[2U] = v57; uint32_t va58 = abcd[1U]; uint32_t vb59 = abcd[2U]; @@ -1010,11 +892,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti59 = _t[59U]; uint32_t v58 = - vb59 - + - ((va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) - << 21U - | (va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) >> 11U); + vb59 + + ((va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) << 21U | + (va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) >> 11U); abcd[1U] = v58; uint32_t va59 = abcd[0U]; uint32_t vb60 = abcd[1U]; @@ -1026,11 +906,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti60 = _t[60U]; uint32_t v59 = - vb60 - + - ((va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) - << 6U - | (va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) >> 26U); + vb60 + + ((va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) << 6U | + (va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) >> 26U); abcd[0U] = v59; uint32_t va60 = abcd[3U]; uint32_t vb61 = abcd[0U]; @@ -1042,11 +920,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti61 = _t[61U]; uint32_t v60 = - vb61 - + - ((va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) - << 10U - | (va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) >> 22U); + vb61 + + ((va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) << 10U | + (va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) >> 22U); abcd[3U] = v60; uint32_t va61 = abcd[2U]; uint32_t vb62 = abcd[3U]; @@ -1058,11 +934,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti62 = _t[62U]; uint32_t v61 = - vb62 - + - ((va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) - << 15U - | (va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) >> 17U); + vb62 + + ((va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) << 15U | + (va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) >> 17U); abcd[2U] = v61; uint32_t va62 = abcd[1U]; uint32_t vb = abcd[2U]; @@ -1074,11 +948,8 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti = _t[63U]; uint32_t v62 = - vb - + - ((va62 + (vc ^ (vb | ~vd)) + xk62 + ti) - << 21U - | (va62 + (vc ^ (vb | ~vd)) + xk62 + ti) >> 11U); + vb + + ((va62 + (vc ^ (vb | ~vd)) + xk62 + ti) << 21U | (va62 + (vc ^ (vb | ~vd)) + xk62 + ti) >> 11U); abcd[1U] = v62; uint32_t a = abcd[0U]; uint32_t b = abcd[1U]; @@ -1282,8 +1153,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -1328,8 +1198,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -1359,8 +1228,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state10, @@ -1403,8 +1271,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, diff --git a/Modules/_hacl/Hacl_Hash_SHA1.c b/Modules/_hacl/Hacl_Hash_SHA1.c index 508e447bf275da..97bd4f2204cf24 100644 --- a/Modules/_hacl/Hacl_Hash_SHA1.c +++ b/Modules/_hacl/Hacl_Hash_SHA1.c @@ -315,8 +315,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -361,8 +360,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -392,8 +390,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state10, @@ -436,8 +433,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, diff --git a/Modules/_hacl/Hacl_Hash_SHA2.c b/Modules/_hacl/Hacl_Hash_SHA2.c index d612bafa72cdc4..d2ee0c9ef51721 100644 --- a/Modules/_hacl/Hacl_Hash_SHA2.c +++ b/Modules/_hacl/Hacl_Hash_SHA2.c @@ -100,15 +100,14 @@ static inline void sha256_update(uint8_t *b, uint32_t *hash) uint32_t k_e_t = k_t; uint32_t t1 = - h02 - + ((e0 << 26U | e0 >> 6U) ^ ((e0 << 21U | e0 >> 11U) ^ (e0 << 7U | e0 >> 25U))) - + ((e0 & f0) ^ (~e0 & g0)) + h02 + ((e0 << 26U | e0 >> 6U) ^ ((e0 << 21U | e0 >> 11U) ^ (e0 << 7U | e0 >> 25U))) + + ((e0 & f0) ^ (~e0 & g0)) + k_e_t + ws_t; uint32_t t2 = - ((a0 << 30U | a0 >> 2U) ^ ((a0 << 19U | a0 >> 13U) ^ (a0 << 10U | a0 >> 22U))) - + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); + ((a0 << 30U | a0 >> 2U) ^ ((a0 << 19U | a0 >> 13U) ^ (a0 << 10U | a0 >> 22U))) + + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); uint32_t a1 = t1 + t2; uint32_t b1 = a0; uint32_t c1 = b0; @@ -301,15 +300,14 @@ static inline void sha512_update(uint8_t *b, uint64_t *hash) uint64_t k_e_t = k_t; uint64_t t1 = - h02 - + ((e0 << 50U | e0 >> 14U) ^ ((e0 << 46U | e0 >> 18U) ^ (e0 << 23U | e0 >> 41U))) - + ((e0 & f0) ^ (~e0 & g0)) + h02 + ((e0 << 50U | e0 >> 14U) ^ ((e0 << 46U | e0 >> 18U) ^ (e0 << 23U | e0 >> 41U))) + + ((e0 & f0) ^ (~e0 & g0)) + k_e_t + ws_t; uint64_t t2 = - ((a0 << 36U | a0 >> 28U) ^ ((a0 << 30U | a0 >> 34U) ^ (a0 << 25U | a0 >> 39U))) - + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); + ((a0 << 36U | a0 >> 28U) ^ ((a0 << 30U | a0 >> 34U) ^ (a0 << 25U | a0 >> 39U))) + + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); uint64_t a1 = t1 + t2; uint64_t b1 = a0; uint64_t c1 = b0; @@ -639,8 +637,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -685,8 +682,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -716,8 +712,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state10, @@ -760,8 +755,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -1205,8 +1199,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state1, @@ -1251,8 +1244,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state1, @@ -1282,8 +1274,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state10, @@ -1326,8 +1317,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state1, diff --git a/Modules/_hacl/Hacl_Hash_SHA3.c b/Modules/_hacl/Hacl_Hash_SHA3.c index 87638df9549fbb..466d2b96c0cdfa 100644 --- a/Modules/_hacl/Hacl_Hash_SHA3.c +++ b/Modules/_hacl/Hacl_Hash_SHA3.c @@ -866,8 +866,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ((Hacl_Hash_SHA3_state_t){ .block_state = block_state1, .buf = buf, .total_len = total_len2 }); } else if (sz == 0U) @@ -910,8 +909,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data1, data1_len / block_len(a1)); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_SHA3_state_t){ .block_state = block_state1, @@ -941,8 +939,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_SHA3_state_t){ .block_state = block_state10, @@ -972,10 +969,8 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch uint32_t ite; if ( - (uint64_t)(chunk_len - diff) - % (uint64_t)block_len(i) - == 0ULL - && (uint64_t)(chunk_len - diff) > 0ULL + (uint64_t)(chunk_len - diff) % (uint64_t)block_len(i) == 0ULL && + (uint64_t)(chunk_len - diff) > 0ULL ) { ite = block_len(i); @@ -994,8 +989,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data1, data1_len / block_len(a1)); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_SHA3_state_t){ .block_state = block_state1, @@ -2422,9 +2416,7 @@ Hacl_Hash_SHA3_shake128_squeeze_nblocks( 5U, 1U, _C[i] = - state[i - + 0U] - ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); KRML_MAYBE_FOR5(i2, 0U, 5U, diff --git a/Modules/_hacl/Hacl_Streaming_HMAC.c b/Modules/_hacl/Hacl_Streaming_HMAC.c index d28b39792af576..8dd7e2c0bf3e71 100644 --- a/Modules/_hacl/Hacl_Streaming_HMAC.c +++ b/Modules/_hacl/Hacl_Streaming_HMAC.c @@ -198,8 +198,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_MD5_a, { .case_MD5_a = s1 } }); + st[0U] = ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_MD5_a, { .case_MD5_a = s1 } }); } if (st == NULL) { @@ -220,8 +219,8 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA1_a, { .case_SHA1_a = s1 } }); + st[0U] = + ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA1_a, { .case_SHA1_a = s1 } }); } if (st == NULL) { @@ -242,8 +241,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_224_a, @@ -270,8 +268,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_256_a, @@ -298,8 +295,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_384_a, @@ -326,8 +322,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_512_a, @@ -354,8 +349,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_224_a, @@ -382,8 +376,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_256_a, @@ -410,8 +403,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_384_a, @@ -438,8 +430,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_512_a, @@ -466,8 +457,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2S_a, @@ -495,8 +485,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2S_128_a, @@ -531,8 +520,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2B_a, @@ -560,8 +548,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2B_256_a, @@ -2059,8 +2046,8 @@ Hacl_Streaming_HMAC_update( Hacl_Streaming_HMAC_Definitions_index i1 = Hacl_Streaming_HMAC_index_of_state(block_state); if ( - (uint64_t)chunk_len - > max_input_len64(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - total_len + (uint64_t)chunk_len > + max_input_len64(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - total_len ) { return Hacl_Streaming_Types_MaximumLengthExceeded; @@ -2068,9 +2055,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz; if ( - total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len > 0ULL ) { @@ -2079,8 +2064,8 @@ Hacl_Streaming_HMAC_update( else { sz = - (uint32_t)(total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } if (chunk_len <= block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - sz) { @@ -2091,9 +2076,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz1; if ( - total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len1 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len1 > 0ULL ) { @@ -2102,14 +2085,13 @@ Hacl_Streaming_HMAC_update( else { sz1 = - (uint32_t)(total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len1 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state1, @@ -2127,9 +2109,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz1; if ( - total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len1 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len1 > 0ULL ) { @@ -2138,8 +2118,8 @@ Hacl_Streaming_HMAC_update( else { sz1 = - (uint32_t)(total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len1 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } if (!(sz1 == 0U)) { @@ -2153,8 +2133,8 @@ Hacl_Streaming_HMAC_update( uint32_t ite; if ( - (uint64_t)chunk_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) + (uint64_t)chunk_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && (uint64_t)chunk_len > 0ULL ) @@ -2164,8 +2144,8 @@ Hacl_Streaming_HMAC_update( else { ite = - (uint32_t)((uint64_t)chunk_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)((uint64_t)chunk_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint32_t n_blocks = (chunk_len - ite) / block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); @@ -2178,8 +2158,7 @@ Hacl_Streaming_HMAC_update( update_multi(s11, total_len1, data1, data1_len); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state1, @@ -2200,9 +2179,8 @@ Hacl_Streaming_HMAC_update( uint32_t sz10; if ( - total_len10 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len10 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == + 0ULL && total_len10 > 0ULL ) { @@ -2211,14 +2189,13 @@ Hacl_Streaming_HMAC_update( else { sz10 = - (uint32_t)(total_len10 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len10 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state10, @@ -2233,9 +2210,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz1; if ( - total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len1 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len1 > 0ULL ) { @@ -2244,8 +2219,8 @@ Hacl_Streaming_HMAC_update( else { sz1 = - (uint32_t)(total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len1 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } if (!(sz1 == 0U)) { @@ -2259,8 +2234,8 @@ Hacl_Streaming_HMAC_update( uint32_t ite; if ( - (uint64_t)(chunk_len - diff) - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) + (uint64_t)(chunk_len - diff) % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && (uint64_t)(chunk_len - diff) > 0ULL ) @@ -2270,13 +2245,12 @@ Hacl_Streaming_HMAC_update( else { ite = - (uint32_t)((uint64_t)(chunk_len - diff) - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)((uint64_t)(chunk_len - diff) % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint32_t n_blocks = - (chunk_len - diff - ite) - / block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); + (chunk_len - diff - ite) / block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); uint32_t data1_len = n_blocks * block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); uint32_t data2_len = chunk_len - diff - data1_len; @@ -2286,8 +2260,7 @@ Hacl_Streaming_HMAC_update( update_multi(s11, total_len1, data1, data1_len); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state1, @@ -2324,9 +2297,7 @@ Hacl_Streaming_HMAC_digest( uint32_t r; if ( - total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len > 0ULL ) { @@ -2335,8 +2306,8 @@ Hacl_Streaming_HMAC_digest( else { r = - (uint32_t)(total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint8_t *buf_1 = buf_; Hacl_Agile_Hash_state_s *s110 = malloc_(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)); diff --git a/Modules/_hacl/Lib_Memzero0.c b/Modules/_hacl/Lib_Memzero0.c index 28abd1aa4e2d54..f94e0e2254a912 100644 --- a/Modules/_hacl/Lib_Memzero0.c +++ b/Modules/_hacl/Lib_Memzero0.c @@ -11,18 +11,18 @@ #if defined(__APPLE__) && defined(__MACH__) #include // memset_s is available from macOS 10.9, iOS 7, watchOS 2, and on all tvOS and visionOS versions. -# if (defined(MAC_OS_X_VERSION_MIN_REQUIRED) && (MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_9)) -# define APPLE_HAS_MEMSET_S 1 -# elif (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && (__IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0)) -# define APPLE_HAS_MEMSET_S 1 +# if (defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_X_VERSION_10_9) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9)) +# define APPLE_HAS_MEMSET_S +# elif (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && defined(__IPHONE_7_0) && (__IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0)) +# define APPLE_HAS_MEMSET_S # elif (defined(TARGET_OS_TV) && TARGET_OS_TV) -# define APPLE_HAS_MEMSET_S 1 -# elif (defined(__WATCH_OS_VERSION_MIN_REQUIRED) && (__WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_2_0)) -# define APPLE_HAS_MEMSET_S 1 +# define APPLE_HAS_MEMSET_S +# elif (defined(__WATCH_OS_VERSION_MIN_REQUIRED) && defined(__WATCHOS_2_0) && (__WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_2_0)) +# define APPLE_HAS_MEMSET_S # elif (defined(TARGET_OS_VISION) && TARGET_OS_VISION) -# define APPLE_HAS_MEMSET_S 1 +# define APPLE_HAS_MEMSET_S # else -# define APPLE_HAS_MEMSET_S 0 +# undef APPLE_HAS_MEMSET_S # endif #endif @@ -55,7 +55,7 @@ void Lib_Memzero0_memzero0(void *dst, uint64_t len) { #ifdef _WIN32 SecureZeroMemory(dst, len_); - #elif defined(__APPLE__) && defined(__MACH__) && APPLE_HAS_MEMSET_S + #elif defined(__APPLE__) && defined(__MACH__) && defined(APPLE_HAS_MEMSET_S) memset_s(dst, len_, 0, len_); #elif (defined(__linux__) && !defined(LINUX_NO_EXPLICIT_BZERO)) || defined(__FreeBSD__) || defined(__OpenBSD__) explicit_bzero(dst, len_); diff --git a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h index d4a90220beafb5..f85982f33735d3 100644 --- a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h +++ b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h @@ -257,11 +257,11 @@ FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { FStar_UInt128_uint128 lit; lit.low = - (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) - | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | + (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); lit.high = - (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) - | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | + (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); return lit; } @@ -294,14 +294,12 @@ static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y) { FStar_UInt128_uint128 lit; lit.low = - FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) - * (uint64_t)y - + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), + FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); lit.high = - ((x >> FStar_UInt128_u32_32) - * (uint64_t)y - + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) + ((x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32; return lit; } @@ -315,28 +313,19 @@ static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t { FStar_UInt128_uint128 lit; lit.low = - FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) - * (y >> FStar_UInt128_u32_32) - + - FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) - * FStar_UInt128_u64_mod_32(y) - + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), + FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y))); lit.high = - (x >> FStar_UInt128_u32_32) - * (y >> FStar_UInt128_u32_32) - + - (((x >> FStar_UInt128_u32_32) - * FStar_UInt128_u64_mod_32(y) - + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) + (x >> FStar_UInt128_u32_32) * (y >> FStar_UInt128_u32_32) + + (((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32) + - ((FStar_UInt128_u64_mod_32(x) - * (y >> FStar_UInt128_u32_32) - + - FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) - * FStar_UInt128_u64_mod_32(y) - + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) + ((FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) >> FStar_UInt128_u32_32); return lit; } diff --git a/Modules/_hacl/refresh.sh b/Modules/_hacl/refresh.sh index d91650b44bb4e7..a6776282423b95 100755 --- a/Modules/_hacl/refresh.sh +++ b/Modules/_hacl/refresh.sh @@ -22,7 +22,7 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. -expected_hacl_star_rev=7720f6d4fc0468a99d5ea6120976bcc271e42727 +expected_hacl_star_rev=4ef25b547b377dcef855db4289c6a00580e7221c hacl_dir="$(realpath "$1")" cd "$(dirname "$0")" From 73d71a416fb05b64c2b43fade5d781a1fa0cb2cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 16 May 2025 14:00:01 +0200 Subject: [PATCH 160/989] gh-132388: test HACL* and OpenSSL hash functions in pure Python HMAC (#134051) --- Lib/test/support/hashlib_helper.py | 221 ++++++++++++++++++++++++++++- Lib/test/test_hmac.py | 154 ++++++++++++-------- 2 files changed, 308 insertions(+), 67 deletions(-) diff --git a/Lib/test/support/hashlib_helper.py b/Lib/test/support/hashlib_helper.py index 5043f08dd93de6..7032257b06877a 100644 --- a/Lib/test/support/hashlib_helper.py +++ b/Lib/test/support/hashlib_helper.py @@ -23,6 +23,22 @@ def requires_builtin_hmac(): return unittest.skipIf(_hmac is None, "requires _hmac") +def _missing_hash(digestname, implementation=None, *, exc=None): + parts = ["missing", implementation, f"hash algorithm: {digestname!r}"] + msg = " ".join(filter(None, parts)) + raise unittest.SkipTest(msg) from exc + + +def _openssl_availabillity(digestname, *, usedforsecurity): + try: + _hashlib.new(digestname, usedforsecurity=usedforsecurity) + except AttributeError: + assert _hashlib is None + _missing_hash(digestname, "OpenSSL") + except ValueError as exc: + _missing_hash(digestname, "OpenSSL", exc=exc) + + def _decorate_func_or_class(func_or_class, decorator_func): if not isinstance(func_or_class, type): return decorator_func(func_or_class) @@ -71,8 +87,7 @@ def wrapper(*args, **kwargs): try: test_availability() except ValueError as exc: - msg = f"missing hash algorithm: {digestname!r}" - raise unittest.SkipTest(msg) from exc + _missing_hash(digestname, exc=exc) return func(*args, **kwargs) return wrapper @@ -87,14 +102,44 @@ def requires_openssl_hashdigest(digestname, *, usedforsecurity=True): The hashing algorithm may be missing or blocked by a strict crypto policy. """ def decorator_func(func): - @requires_hashlib() + @requires_hashlib() # avoid checking at each call @functools.wraps(func) def wrapper(*args, **kwargs): + _openssl_availabillity(digestname, usedforsecurity=usedforsecurity) + return func(*args, **kwargs) + return wrapper + + def decorator(func_or_class): + return _decorate_func_or_class(func_or_class, decorator_func) + return decorator + + +def find_openssl_hashdigest_constructor(digestname, *, usedforsecurity=True): + """Find the OpenSSL hash function constructor by its name.""" + assert isinstance(digestname, str), digestname + _openssl_availabillity(digestname, usedforsecurity=usedforsecurity) + # This returns a function of the form _hashlib.openssl_ and + # not a lambda function as it is rejected by _hashlib.hmac_new(). + return getattr(_hashlib, f"openssl_{digestname}") + + +def requires_builtin_hashdigest( + module_name, digestname, *, usedforsecurity=True +): + """Decorator raising SkipTest if a HACL* hashing algorithm is missing. + + - The *module_name* is the C extension module name based on HACL*. + - The *digestname* is one of its member, e.g., 'md5'. + """ + def decorator_func(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + module = import_module(module_name) try: - _hashlib.new(digestname, usedforsecurity=usedforsecurity) - except ValueError: - msg = f"missing OpenSSL hash algorithm: {digestname!r}" - raise unittest.SkipTest(msg) + getattr(module, digestname) + except AttributeError: + fullname = f'{module_name}.{digestname}' + _missing_hash(fullname, implementation="HACL") return func(*args, **kwargs) return wrapper @@ -103,6 +148,168 @@ def decorator(func_or_class): return decorator +def find_builtin_hashdigest_constructor( + module_name, digestname, *, usedforsecurity=True +): + """Find the HACL* hash function constructor. + + - The *module_name* is the C extension module name based on HACL*. + - The *digestname* is one of its member, e.g., 'md5'. + """ + module = import_module(module_name) + try: + constructor = getattr(module, digestname) + constructor(b'', usedforsecurity=usedforsecurity) + except (AttributeError, TypeError, ValueError): + _missing_hash(f'{module_name}.{digestname}', implementation="HACL") + return constructor + + +class HashFunctionsTrait: + """Mixin trait class containing hash functions. + + This class is assumed to have all unitest.TestCase methods but should + not directly inherit from it to prevent the test suite being run on it. + + Subclasses should implement the hash functions by returning an object + that can be recognized as a valid digestmod parameter for both hashlib + and HMAC. In particular, it cannot be a lambda function as it will not + be recognized by hashlib (it will still be accepted by the pure Python + implementation of HMAC). + """ + + ALGORITHMS = [ + 'md5', 'sha1', + 'sha224', 'sha256', 'sha384', 'sha512', + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + ] + + # Default 'usedforsecurity' to use when looking up a hash function. + usedforsecurity = True + + def _find_constructor(self, name): + # By default, a missing algorithm skips the test that uses it. + self.assertIn(name, self.ALGORITHMS) + self.skipTest(f"missing hash function: {name}") + + @property + def md5(self): + return self._find_constructor("md5") + + @property + def sha1(self): + return self._find_constructor("sha1") + + @property + def sha224(self): + return self._find_constructor("sha224") + + @property + def sha256(self): + return self._find_constructor("sha256") + + @property + def sha384(self): + return self._find_constructor("sha384") + + @property + def sha512(self): + return self._find_constructor("sha512") + + @property + def sha3_224(self): + return self._find_constructor("sha3_224") + + @property + def sha3_256(self): + return self._find_constructor("sha3_256") + + @property + def sha3_384(self): + return self._find_constructor("sha3_384") + + @property + def sha3_512(self): + return self._find_constructor("sha3_512") + + +class NamedHashFunctionsTrait(HashFunctionsTrait): + """Trait containing named hash functions. + + Hash functions are available if and only if they are available in hashlib. + """ + + def _find_constructor(self, name): + self.assertIn(name, self.ALGORITHMS) + return name + + +class OpenSSLHashFunctionsTrait(HashFunctionsTrait): + """Trait containing OpenSSL hash functions. + + Hash functions are available if and only if they are available in _hashlib. + """ + + def _find_constructor(self, name): + self.assertIn(name, self.ALGORITHMS) + return find_openssl_hashdigest_constructor( + name, usedforsecurity=self.usedforsecurity + ) + + +class BuiltinHashFunctionsTrait(HashFunctionsTrait): + """Trait containing HACL* hash functions. + + Hash functions are available if and only if they are available in C. + In particular, HACL* HMAC-MD5 may be available even though HACL* md5 + is not since the former is unconditionally built. + """ + + def _find_constructor_in(self, module, name): + self.assertIn(name, self.ALGORITHMS) + return find_builtin_hashdigest_constructor(module, name) + + @property + def md5(self): + return self._find_constructor_in("_md5", "md5") + + @property + def sha1(self): + return self._find_constructor_in("_sha1", "sha1") + + @property + def sha224(self): + return self._find_constructor_in("_sha2", "sha224") + + @property + def sha256(self): + return self._find_constructor_in("_sha2", "sha256") + + @property + def sha384(self): + return self._find_constructor_in("_sha2", "sha384") + + @property + def sha512(self): + return self._find_constructor_in("_sha2", "sha512") + + @property + def sha3_224(self): + return self._find_constructor_in("_sha3", "sha3_224") + + @property + def sha3_256(self): + return self._find_constructor_in("_sha3","sha3_256") + + @property + def sha3_384(self): + return self._find_constructor_in("_sha3","sha3_384") + + @property + def sha3_512(self): + return self._find_constructor_in("_sha3","sha3_512") + + def find_gil_minsize(modules_names, default=2048): """Get the largest GIL_MINSIZE value for the given cryptographic modules. diff --git a/Lib/test/test_hmac.py b/Lib/test/test_hmac.py index 70c7943772249e..e898644dd8a552 100644 --- a/Lib/test/test_hmac.py +++ b/Lib/test/test_hmac.py @@ -1,8 +1,27 @@ +"""Test suite for HMAC. + +Python provides three different implementations of HMAC: + +- OpenSSL HMAC using OpenSSL hash functions. +- HACL* HMAC using HACL* hash functions. +- Generic Python HMAC using user-defined hash functions. + +The generic Python HMAC implementation is able to use OpenSSL +callables or names, HACL* named hash functions or arbitrary +objects implementing PEP 247 interface. + +In the two first cases, Python HMAC wraps a C HMAC object (either OpenSSL +or HACL*-based). As a last resort, HMAC is re-implemented in pure Python. +It is however interesting to test the pure Python implementation against +the OpenSSL and HACL* hash functions. +""" + import binascii import functools import hmac import hashlib import random +import test.support import test.support.hashlib_helper as hashlib_helper import types import unittest @@ -10,6 +29,12 @@ import warnings from _operator import _compare_digest as operator_compare_digest from test.support import check_disallow_instantiation +from test.support.hashlib_helper import ( + BuiltinHashFunctionsTrait, + HashFunctionsTrait, + NamedHashFunctionsTrait, + OpenSSLHashFunctionsTrait, +) from test.support.import_helper import import_fresh_module, import_module try: @@ -382,50 +407,7 @@ class BuiltinAssertersMixin(ThroughBuiltinAPIMixin, AssertersMixin): pass -class HashFunctionsTrait: - """Trait class for 'hashfunc' in hmac_new() and hmac_digest().""" - - ALGORITHMS = [ - 'md5', 'sha1', - 'sha224', 'sha256', 'sha384', 'sha512', - 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', - ] - - # By default, a missing algorithm skips the test that uses it. - _ = property(lambda self: self.skipTest("missing hash function")) - md5 = sha1 = _ - sha224 = sha256 = sha384 = sha512 = _ - sha3_224 = sha3_256 = sha3_384 = sha3_512 = _ - del _ - - -class WithOpenSSLHashFunctions(HashFunctionsTrait): - """Test a HMAC implementation with an OpenSSL-based callable 'hashfunc'.""" - - @classmethod - def setUpClass(cls): - super().setUpClass() - - for name in cls.ALGORITHMS: - @property - @hashlib_helper.requires_openssl_hashdigest(name) - def func(self, *, __name=name): # __name needed to bind 'name' - return getattr(_hashlib, f'openssl_{__name}') - setattr(cls, name, func) - - -class WithNamedHashFunctions(HashFunctionsTrait): - """Test a HMAC implementation with a named 'hashfunc'.""" - - @classmethod - def setUpClass(cls): - super().setUpClass() - - for name in cls.ALGORITHMS: - setattr(cls, name, name) - - -class RFCTestCaseMixin(AssertersMixin, HashFunctionsTrait): +class RFCTestCaseMixin(HashFunctionsTrait, AssertersMixin): """Test HMAC implementations against RFC 2202/4231 and NIST test vectors. - Test vectors for MD5 and SHA-1 are taken from RFC 2202. @@ -739,26 +721,83 @@ def test_sha3_512_nist(self): ) -class PyRFCTestCase(ThroughObjectMixin, PyAssertersMixin, - WithOpenSSLHashFunctions, RFCTestCaseMixin, - unittest.TestCase): +class PurePythonInitHMAC(PyModuleMixin, HashFunctionsTrait): + + @classmethod + def setUpClass(cls): + super().setUpClass() + for meth in ['_init_openssl_hmac', '_init_builtin_hmac']: + fn = getattr(cls.hmac.HMAC, meth) + cm = mock.patch.object(cls.hmac.HMAC, meth, autospec=True, wraps=fn) + cls.enterClassContext(cm) + + @classmethod + def tearDownClass(cls): + cls.hmac.HMAC._init_openssl_hmac.assert_not_called() + cls.hmac.HMAC._init_builtin_hmac.assert_not_called() + # Do not assert that HMAC._init_old() has been called as it's tricky + # to determine whether a test for a specific hash function has been + # executed or not. On regular builds, it will be called but if a + # hash function is not available, it's hard to detect for which + # test we should checj HMAC._init_old() or not. + super().tearDownClass() + + +class PyRFCOpenSSLTestCase(ThroughObjectMixin, + PyAssertersMixin, + OpenSSLHashFunctionsTrait, + RFCTestCaseMixin, + PurePythonInitHMAC, + unittest.TestCase): """Python implementation of HMAC using hmac.HMAC(). - The underlying hash functions are OpenSSL-based. + The underlying hash functions are OpenSSL-based but + _init_old() is used instead of _init_openssl_hmac(). """ -class PyDotNewRFCTestCase(ThroughModuleAPIMixin, PyAssertersMixin, - WithOpenSSLHashFunctions, RFCTestCaseMixin, - unittest.TestCase): +class PyRFCBuiltinTestCase(ThroughObjectMixin, + PyAssertersMixin, + BuiltinHashFunctionsTrait, + RFCTestCaseMixin, + PurePythonInitHMAC, + unittest.TestCase): + """Python implementation of HMAC using hmac.HMAC(). + + The underlying hash functions are HACL*-based but + _init_old() is used instead of _init_builtin_hmac(). + """ + + +class PyDotNewOpenSSLRFCTestCase(ThroughModuleAPIMixin, + PyAssertersMixin, + OpenSSLHashFunctionsTrait, + RFCTestCaseMixin, + PurePythonInitHMAC, + unittest.TestCase): + """Python implementation of HMAC using hmac.new(). + + The underlying hash functions are OpenSSL-based but + _init_old() is used instead of _init_openssl_hmac(). + """ + + +class PyDotNewBuiltinRFCTestCase(ThroughModuleAPIMixin, + PyAssertersMixin, + BuiltinHashFunctionsTrait, + RFCTestCaseMixin, + PurePythonInitHMAC, + unittest.TestCase): """Python implementation of HMAC using hmac.new(). - The underlying hash functions are OpenSSL-based. + The underlying hash functions are HACL-based but + _init_old() is used instead of _init_openssl_hmac(). """ class OpenSSLRFCTestCase(OpenSSLAssertersMixin, - WithOpenSSLHashFunctions, RFCTestCaseMixin, + OpenSSLHashFunctionsTrait, + RFCTestCaseMixin, unittest.TestCase): """OpenSSL implementation of HMAC. @@ -767,7 +806,8 @@ class OpenSSLRFCTestCase(OpenSSLAssertersMixin, class BuiltinRFCTestCase(BuiltinAssertersMixin, - WithNamedHashFunctions, RFCTestCaseMixin, + NamedHashFunctionsTrait, + RFCTestCaseMixin, unittest.TestCase): """Built-in HACL* implementation of HMAC. @@ -784,12 +824,6 @@ def assert_hmac_extra_cases( self.check_hmac_hexdigest(key, msg, hexdigest, digest_size, func) -# TODO(picnixz): once we have a HACL* HMAC, we should also test the Python -# implementation of HMAC with a HACL*-based hash function. For now, we only -# test it partially via the '_sha2' module, but for completeness we could -# also test the RFC test vectors against all possible implementations. - - class DigestModTestCaseMixin(CreatorMixin, DigestMixin): """Tests for the 'digestmod' parameter for hmac_new() and hmac_digest().""" From 62f66caa8c963bdf45d1e22456aea985e74fa2d5 Mon Sep 17 00:00:00 2001 From: Semyon Moroz Date: Fri, 16 May 2025 12:17:29 +0000 Subject: [PATCH 161/989] gh-124210: Add introduction to `threading` docs (#127046) Co-authored-by: Peter Bierma --- Doc/library/threading.rst | 114 +++++++++++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 25 deletions(-) diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index 249c0a5cb035c3..7edcdcabdce1df 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -11,6 +11,52 @@ This module constructs higher-level threading interfaces on top of the lower level :mod:`_thread` module. +.. include:: ../includes/wasm-notavail.rst + +Introduction +------------ + +The :mod:`!threading` module provides a way to run multiple `threads +`_ (smaller +units of a process) concurrently within a single process. It allows for the +creation and management of threads, making it possible to execute tasks in +parallel, sharing memory space. Threads are particularly useful when tasks are +I/O bound, such as file operations or making network requests, +where much of the time is spent waiting for external resources. + +A typical use case for :mod:`!threading` includes managing a pool of worker +threads that can process multiple tasks concurrently. Here's a basic example of +creating and starting threads using :class:`~threading.Thread`:: + + import threading + import time + + def crawl(link, delay=3): + print(f"crawl started for {link}") + time.sleep(delay) # Blocking I/O (simulating a network request) + print(f"crawl ended for {link}") + + links = [ + "https://python.org", + "https://docs.python.org", + "https://peps.python.org", + ] + + # Start threads for each link + threads = [] + for link in links: + # Using `args` to pass positional arguments and `kwargs` for keyword arguments + t = threading.Thread(target=crawl, args=(link,), kwargs={"delay": 2}) + threads.append(t) + + # Start each thread + for t in threads: + t.start() + + # Wait for all threads to finish + for t in threads: + t.join() + .. versionchanged:: 3.7 This module used to be optional, it is now always available. @@ -45,7 +91,25 @@ level :mod:`_thread` module. However, threading is still an appropriate model if you want to run multiple I/O-bound tasks simultaneously. -.. include:: ../includes/wasm-notavail.rst +GIL and performance considerations +---------------------------------- + +Unlike the :mod:`multiprocessing` module, which uses separate processes to +bypass the :term:`global interpreter lock` (GIL), the threading module operates +within a single process, meaning that all threads share the same memory space. +However, the GIL limits the performance gains of threading when it comes to +CPU-bound tasks, as only one thread can execute Python bytecode at a time. +Despite this, threads remain a useful tool for achieving concurrency in many +scenarios. + +As of Python 3.13, experimental :term:`free-threaded ` builds +can disable the GIL, enabling true parallel execution of threads, but this +feature is not available by default (see :pep:`703`). + +.. TODO: At some point this feature will become available by default. + +Reference +--------- This module defines the following functions: @@ -62,7 +126,7 @@ This module defines the following functions: Return the current :class:`Thread` object, corresponding to the caller's thread of control. If the caller's thread of control was not created through the - :mod:`threading` module, a dummy thread object with limited functionality is + :mod:`!threading` module, a dummy thread object with limited functionality is returned. The function ``currentThread`` is a deprecated alias for this function. @@ -157,13 +221,13 @@ This module defines the following functions: .. index:: single: trace function - Set a trace function for all threads started from the :mod:`threading` module. + Set a trace function for all threads started from the :mod:`!threading` module. The *func* will be passed to :func:`sys.settrace` for each thread, before its :meth:`~Thread.run` method is called. .. function:: settrace_all_threads(func) - Set a trace function for all threads started from the :mod:`threading` module + Set a trace function for all threads started from the :mod:`!threading` module and all Python threads that are currently executing. The *func* will be passed to :func:`sys.settrace` for each thread, before its @@ -186,13 +250,13 @@ This module defines the following functions: .. index:: single: profile function - Set a profile function for all threads started from the :mod:`threading` module. + Set a profile function for all threads started from the :mod:`!threading` module. The *func* will be passed to :func:`sys.setprofile` for each thread, before its :meth:`~Thread.run` method is called. .. function:: setprofile_all_threads(func) - Set a profile function for all threads started from the :mod:`threading` module + Set a profile function for all threads started from the :mod:`!threading` module and all Python threads that are currently executing. The *func* will be passed to :func:`sys.setprofile` for each thread, before its @@ -257,8 +321,8 @@ when implemented, are mapped to module-level functions. All of the methods described below are executed atomically. -Thread-Local Data ------------------ +Thread-local data +^^^^^^^^^^^^^^^^^ Thread-local data is data whose values are thread specific. If you have data that you want to be local to a thread, create a @@ -389,8 +453,8 @@ affects what we see:: .. _thread-objects: -Thread Objects --------------- +Thread objects +^^^^^^^^^^^^^^ The :class:`Thread` class represents an activity that is run in a separate thread of control. There are two ways to specify the activity: by passing a @@ -645,8 +709,8 @@ since it is impossible to detect the termination of alien threads. .. _lock-objects: -Lock Objects ------------- +Lock objects +^^^^^^^^^^^^ A primitive lock is a synchronization primitive that is not owned by a particular thread when locked. In Python, it is currently the lowest level @@ -738,8 +802,8 @@ All methods are executed atomically. .. _rlock-objects: -RLock Objects -------------- +RLock objects +^^^^^^^^^^^^^ A reentrant lock is a synchronization primitive that may be acquired multiple times by the same thread. Internally, it uses the concepts of "owning thread" @@ -848,8 +912,8 @@ call release as many times the lock has been acquired can lead to deadlock. .. _condition-objects: -Condition Objects ------------------ +Condition objects +^^^^^^^^^^^^^^^^^ A condition variable is always associated with some kind of lock; this can be passed in or one will be created by default. Passing one in is useful when @@ -1026,8 +1090,8 @@ item to the buffer only needs to wake up one consumer thread. .. _semaphore-objects: -Semaphore Objects ------------------ +Semaphore objects +^^^^^^^^^^^^^^^^^ This is one of the oldest synchronization primitives in the history of computer science, invented by the early Dutch computer scientist Edsger W. Dijkstra (he @@ -1107,7 +1171,7 @@ Semaphores also support the :ref:`context management protocol `. .. _semaphore-examples: -:class:`Semaphore` Example +:class:`Semaphore` example ^^^^^^^^^^^^^^^^^^^^^^^^^^ Semaphores are often used to guard resources with limited capacity, for example, @@ -1135,8 +1199,8 @@ causes the semaphore to be released more than it's acquired will go undetected. .. _event-objects: -Event Objects -------------- +Event objects +^^^^^^^^^^^^^ This is one of the simplest mechanisms for communication between threads: one thread signals an event and other threads wait for it. @@ -1192,8 +1256,8 @@ method. The :meth:`~Event.wait` method blocks until the flag is true. .. _timer-objects: -Timer Objects -------------- +Timer objects +^^^^^^^^^^^^^ This class represents an action that should be run only after a certain amount of time has passed --- a timer. :class:`Timer` is a subclass of :class:`Thread` @@ -1230,8 +1294,8 @@ For example:: only work if the timer is still in its waiting stage. -Barrier Objects ---------------- +Barrier objects +^^^^^^^^^^^^^^^ .. versionadded:: 3.2 From 7a4a6cf2b8d48b302c734a5cee59f42973c8fd36 Mon Sep 17 00:00:00 2001 From: Alexey Makridenko Date: Fri, 16 May 2025 16:17:54 +0200 Subject: [PATCH 162/989] gh-133604: remove deprecated `java_ver` function (#133888) --- Doc/deprecations/pending-removal-in-3.15.rst | 2 +- Doc/library/platform.rst | 18 ----- Doc/whatsnew/3.13.rst | 2 +- Doc/whatsnew/3.15.rst | 8 +++ Lib/platform.py | 67 +------------------ Lib/test/test_platform.py | 9 --- ...-05-11-12-56-52.gh-issue-133604.kFxhc8.rst | 1 + 7 files changed, 13 insertions(+), 94 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-11-12-56-52.gh-issue-133604.kFxhc8.rst diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index 8953aedf989869..707253a91ecd40 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -51,7 +51,7 @@ Pending removal in Python 3.15 * :mod:`platform`: - * :func:`~platform.java_ver` has been deprecated since Python 3.13. + * :func:`!platform.java_ver` has been deprecated since Python 3.13. This function is only useful for Jython support, has a confusing API, and is largely untested. diff --git a/Doc/library/platform.rst b/Doc/library/platform.rst index 5c999054323be5..06de152a742f28 100644 --- a/Doc/library/platform.rst +++ b/Doc/library/platform.rst @@ -188,24 +188,6 @@ Cross platform :attr:`processor` is resolved late instead of immediately. -Java platform -------------- - - -.. function:: java_ver(release='', vendor='', vminfo=('','',''), osinfo=('','','')) - - Version interface for Jython. - - Returns a tuple ``(release, vendor, vminfo, osinfo)`` with *vminfo* being a - tuple ``(vm_name, vm_release, vm_vendor)`` and *osinfo* being a tuple - ``(os_name, os_version, os_arch)``. Values which cannot be determined are set to - the defaults given as parameters (which all default to ``''``). - - .. deprecated-removed:: 3.13 3.15 - It was largely untested, had a confusing API, - and was only useful for Jython support. - - Windows platform ---------------- diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 7ae184058111d8..e64eb19bddb522 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1908,7 +1908,7 @@ New Deprecations * :mod:`platform`: - * Deprecate :func:`~platform.java_ver`, + * Deprecate :func:`!platform.java_ver`, to be removed in Python 3.15. This function is only useful for Jython support, has a confusing API, and is largely untested. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 14d96420f698fa..9e9a168db0e725 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -121,6 +121,14 @@ Deprecated Removed ======= +platform +-------- + +* Removed the :func:`!platform.java_ver` function, + which was deprecated since Python 3.13. + (Contributed by Alexey Makridenko in :gh:`133604`.) + + sysconfig --------- diff --git a/Lib/platform.py b/Lib/platform.py index 55e211212d4209..077db81264a57c 100644 --- a/Lib/platform.py +++ b/Lib/platform.py @@ -29,7 +29,7 @@ # # History: # -# +# # # 1.0.9 - added invalidate_caches() function to invalidate cached values # 1.0.8 - changed Windows support to read version from kernel32.dll @@ -110,7 +110,7 @@ """ -__version__ = '1.0.9' +__version__ = '1.1.0' import collections import os @@ -528,53 +528,6 @@ def ios_ver(system="", release="", model="", is_simulator=False): return IOSVersionInfo(system, release, model, is_simulator) -def _java_getprop(name, default): - """This private helper is deprecated in 3.13 and will be removed in 3.15""" - from java.lang import System - try: - value = System.getProperty(name) - if value is None: - return default - return value - except AttributeError: - return default - -def java_ver(release='', vendor='', vminfo=('', '', ''), osinfo=('', '', '')): - - """ Version interface for Jython. - - Returns a tuple (release, vendor, vminfo, osinfo) with vminfo being - a tuple (vm_name, vm_release, vm_vendor) and osinfo being a - tuple (os_name, os_version, os_arch). - - Values which cannot be determined are set to the defaults - given as parameters (which all default to ''). - - """ - import warnings - warnings._deprecated('java_ver', remove=(3, 15)) - # Import the needed APIs - try: - import java.lang # noqa: F401 - except ImportError: - return release, vendor, vminfo, osinfo - - vendor = _java_getprop('java.vendor', vendor) - release = _java_getprop('java.version', release) - vm_name, vm_release, vm_vendor = vminfo - vm_name = _java_getprop('java.vm.name', vm_name) - vm_vendor = _java_getprop('java.vm.vendor', vm_vendor) - vm_release = _java_getprop('java.vm.version', vm_release) - vminfo = vm_name, vm_release, vm_vendor - os_name, os_version, os_arch = osinfo - os_arch = _java_getprop('java.os.arch', os_arch) - os_name = _java_getprop('java.os.name', os_name) - os_version = _java_getprop('java.os.version', os_version) - osinfo = os_name, os_version, os_arch - - return release, vendor, vminfo, osinfo - - AndroidVer = collections.namedtuple( "AndroidVer", "release api_level manufacturer model device is_emulator") @@ -1034,13 +987,6 @@ def uname(): version = '16bit' system = 'Windows' - elif system[:4] == 'java': - release, vendor, vminfo, osinfo = java_ver() - system = 'Java' - version = ', '.join(vminfo) - if not version: - version = vendor - # System specific extensions if system == 'OpenVMS': # OpenVMS seems to have release and version mixed up @@ -1370,15 +1316,6 @@ def platform(aliased=False, terse=False): platform = _platform(system, release, machine, processor, 'with', libcname+libcversion) - elif system == 'Java': - # Java platforms - r, v, vminfo, (os_name, os_version, os_arch) = java_ver() - if terse or not os_name: - platform = _platform(system, release, version) - else: - platform = _platform(system, release, version, - 'on', - os_name, os_version, os_arch) else: # Generic handler diff --git a/Lib/test/test_platform.py b/Lib/test/test_platform.py index 818e807dd3a6fb..3b673a47c8c137 100644 --- a/Lib/test/test_platform.py +++ b/Lib/test/test_platform.py @@ -383,15 +383,6 @@ def raises_oserror(*a): finally: platform._uname_cache = None - def test_java_ver(self): - import re - msg = re.escape( - "'java_ver' is deprecated and slated for removal in Python 3.15" - ) - with self.assertWarnsRegex(DeprecationWarning, msg): - res = platform.java_ver() - self.assertEqual(len(res), 4) - @unittest.skipUnless(support.MS_WINDOWS, 'This test only makes sense on Windows') def test_win32_ver(self): release1, version1, csd1, ptype1 = 'a', 'b', 'c', 'd' diff --git a/Misc/NEWS.d/next/Library/2025-05-11-12-56-52.gh-issue-133604.kFxhc8.rst b/Misc/NEWS.d/next/Library/2025-05-11-12-56-52.gh-issue-133604.kFxhc8.rst new file mode 100644 index 00000000000000..526ac38f09bab3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-11-12-56-52.gh-issue-133604.kFxhc8.rst @@ -0,0 +1 @@ +Remove :func:`!platform.java_ver` which was deprecated since Python 3.13. From d94b1e9cac82143048031530e6c51e59f597bccd Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Fri, 16 May 2025 21:01:15 +0530 Subject: [PATCH 163/989] gh-133515: fix docs for unawaited coroutines in debug mode (#134081) --- Doc/library/asyncio-dev.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Doc/library/asyncio-dev.rst b/Doc/library/asyncio-dev.rst index 44b507a9811116..7831b613bd4a60 100644 --- a/Doc/library/asyncio-dev.rst +++ b/Doc/library/asyncio-dev.rst @@ -46,10 +46,6 @@ In addition to enabling the debug mode, consider also: When the debug mode is enabled: -* asyncio checks for :ref:`coroutines that were not awaited - ` and logs them; this mitigates - the "forgotten await" pitfall. - * Many non-threadsafe asyncio APIs (such as :meth:`loop.call_soon` and :meth:`loop.call_at` methods) raise an exception if they are called from a wrong thread. From b5febf73b9b049822994b21b8f4ee9443aa2e777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 16 May 2025 20:36:48 +0200 Subject: [PATCH 164/989] gh-134082: modernize docstrings in `string.Formatter` (#134083) --- Lib/string/__init__.py | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/Lib/string/__init__.py b/Lib/string/__init__.py index eab5067c9b133e..b7782e042b94eb 100644 --- a/Lib/string/__init__.py +++ b/Lib/string/__init__.py @@ -264,22 +264,18 @@ def _vformat(self, format_string, args, kwargs, used_args, recursion_depth, return ''.join(result), auto_arg_index - def get_value(self, key, args, kwargs): if isinstance(key, int): return args[key] else: return kwargs[key] - def check_unused_args(self, used_args, args, kwargs): pass - def format_field(self, value, format_spec): return format(value, format_spec) - def convert_field(self, value, conversion): # do any conversion on the resulting object if conversion is None: @@ -292,28 +288,27 @@ def convert_field(self, value, conversion): return ascii(value) raise ValueError("Unknown conversion specifier {0!s}".format(conversion)) - - # returns an iterable that contains tuples of the form: - # (literal_text, field_name, format_spec, conversion) - # literal_text can be zero length - # field_name can be None, in which case there's no - # object to format and output - # if field_name is not None, it is looked up, formatted - # with format_spec and conversion and then used def parse(self, format_string): - return _string.formatter_parser(format_string) + """ + Return an iterable that contains tuples of the form + (literal_text, field_name, format_spec, conversion). - # given a field_name, find the object it references. - # field_name: the field being looked up, e.g. "0.name" - # or "lookup[3]" - # used_args: a set of which args have been used - # args, kwargs: as passed in to vformat + *field_name* can be None, in which case there's no object + to format and output; otherwise, it is looked up and + formatted with *format_spec* and *conversion*. + """ + return _string.formatter_parser(format_string) + def get_field(self, field_name, args, kwargs): - first, rest = _string.formatter_field_name_split(field_name) + """Find the object referenced by a given field name. + The field name *field_name* can be for instance "0.name" + or "lookup[3]". The *args* and *kwargs* arguments are + passed to get_value(). + """ + first, rest = _string.formatter_field_name_split(field_name) obj = self.get_value(first, args, kwargs) - # loop through the rest of the field_name, doing # getattr or getitem as needed for is_attr, i in rest: @@ -321,5 +316,4 @@ def get_field(self, field_name, args, kwargs): obj = getattr(obj, i) else: obj = obj[i] - return obj, first From ac8df4b5892d2e4bd99731e7d87223a35c238f81 Mon Sep 17 00:00:00 2001 From: alexey semenyuk Date: Fri, 16 May 2025 23:42:06 +0500 Subject: [PATCH 165/989] gh-133286: add explanation about `seq` for pathlib Pattern Language (#133340) --- Doc/library/pathlib.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 7d7692dea5c38c..86351e65dc4ed6 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1781,9 +1781,12 @@ The following wildcards are supported in patterns for ``?`` Matches one non-separator character. ``[seq]`` - Matches one character in *seq*. + Matches one character in *seq*, where *seq* is a sequence of characters. + Range expressions are supported; for example, ``[a-z]`` matches any lowercase ASCII letter. + Multiple ranges can be combined: ``[a-zA-Z0-9_]`` matches any ASCII letter, digit, or underscore. + ``[!seq]`` - Matches one character not in *seq*. + Matches one character not in *seq*, where *seq* follows the same rules as above. For a literal match, wrap the meta-characters in brackets. For example, ``"[?]"`` matches the character ``"?"``. From ea2d707bd59963bd4f53407108026930ff12ae56 Mon Sep 17 00:00:00 2001 From: Saleh Dehqanpour Date: Fri, 16 May 2025 22:42:40 +0330 Subject: [PATCH 166/989] gh-117026: Remove outdated sentence in SimpleHTTPRequestHandler docs (GH-117027) The code was changed in 0f7cddc308b297e6a1c2dd61503acea38401656f (bpo-839496/gh-39531). --- Doc/library/http.server.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 54df4a7e804d50..02016c789b24b4 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -429,8 +429,7 @@ instantiation, of which this module provides three different variants: ``'Last-Modified:'`` header with the file's modification time. Then follows a blank line signifying the end of the headers, and then the - contents of the file are output. If the file's MIME type starts with - ``text/`` the file is opened in text mode; otherwise binary mode is used. + contents of the file are output. For example usage, see the implementation of the ``test`` function in :source:`Lib/http/server.py`. From 71cf4dd622832848cace358a7f8444243afd2e83 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 16 May 2025 23:29:14 +0300 Subject: [PATCH 167/989] gh-134109: Fix showing comments in pydoc output for argparse (GH-134110) Comments immediately preceding the object's source code are used if the object has no docstring. Comments that do not describe the object should be separated from the following source code by an empty line. --- Lib/argparse.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Lib/argparse.py b/Lib/argparse.py index f688c38d0d1ae5..d1a6350c3fda6d 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -205,6 +205,7 @@ def _set_color(self, color): # =============================== # Section and indentation methods # =============================== + def _indent(self): self._current_indent += self._indent_increment self._level += 1 @@ -256,6 +257,7 @@ def _add_item(self, func, args): # ======================== # Message building methods # ======================== + def start_section(self, heading): self._indent() section = self._Section(self, self._current_section, heading) @@ -299,6 +301,7 @@ def add_arguments(self, actions): # ======================= # Help-formatting methods # ======================= + def format_help(self): help = self._root_section.format_help() if help: @@ -1467,6 +1470,7 @@ def __init__(self, # ==================== # Registration methods # ==================== + def register(self, registry_name, value, object): registry = self._registries.setdefault(registry_name, {}) registry[value] = object @@ -1477,6 +1481,7 @@ def _registry_get(self, registry_name, value, default=None): # ================================== # Namespace default accessor methods # ================================== + def set_defaults(self, **kwargs): self._defaults.update(kwargs) @@ -1496,6 +1501,7 @@ def get_default(self, dest): # ======================= # Adding argument actions # ======================= + def add_argument(self, *args, **kwargs): """ add_argument(dest, ..., name=value, ...) @@ -1921,6 +1927,7 @@ def identity(string): # ======================= # Pretty __repr__ methods # ======================= + def _get_kwargs(self): names = [ 'prog', @@ -1935,6 +1942,7 @@ def _get_kwargs(self): # ================================== # Optional/Positional adding methods # ================================== + def add_subparsers(self, **kwargs): if self._subparsers is not None: raise ValueError('cannot have multiple subparser arguments') @@ -1988,6 +1996,7 @@ def _get_positional_actions(self): # ===================================== # Command line argument parsing methods # ===================================== + def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: @@ -2582,6 +2591,7 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # ======================== # Value conversion methods # ======================== + def _get_values(self, action, arg_strings): # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: @@ -2681,6 +2691,7 @@ def _check_value(self, action, value): # ======================= # Help-formatting methods # ======================= + def format_usage(self): formatter = self._get_formatter() formatter.add_usage(self.usage, self._actions, @@ -2718,6 +2729,7 @@ def _get_formatter(self): # ===================== # Help-printing methods # ===================== + def print_usage(self, file=None): if file is None: file = _sys.stdout @@ -2739,6 +2751,7 @@ def _print_message(self, message, file=None): # =============== # Exiting methods # =============== + def exit(self, status=0, message=None): if message: self._print_message(message, _sys.stderr) From fcaf009907fc39d604907315155c1f1de811dd88 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 17 May 2025 10:00:56 +0300 Subject: [PATCH 168/989] gh-133889: Improve tests for SimpleHTTPRequestHandler (GH-134102) --- Lib/test/test_httpservers.py | 140 ++++++++++++++++++++++++----------- 1 file changed, 98 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 2cafa4e45a1313..4cb78c87ea7670 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -522,34 +522,111 @@ def close_conn(): reader.close() return body + def check_list_dir_dirname(self, dirname, quotedname=None): + fullpath = os.path.join(self.tempdir, dirname) + try: + os.mkdir(os.path.join(self.tempdir, dirname)) + except (OSError, UnicodeEncodeError): + self.skipTest(f'Can not create directory {dirname!a} ' + f'on current file system') + + if quotedname is None: + quotedname = urllib.parse.quote(dirname, errors='surrogatepass') + response = self.request(self.base_url + '/' + quotedname + '/') + body = self.check_status_and_reason(response, HTTPStatus.OK) + displaypath = html.escape(f'{self.base_url}/{dirname}/', quote=False) + enc = sys.getfilesystemencoding() + prefix = f'listing for {displaypath}', body) + self.assertIn(prefix + b'h1>', body) + + def check_list_dir_filename(self, filename): + fullpath = os.path.join(self.tempdir, filename) + content = ascii(fullpath).encode() + (os_helper.TESTFN_UNDECODABLE or b'\xff') + try: + with open(fullpath, 'wb') as f: + f.write(content) + except OSError: + self.skipTest(f'Can not create file {filename!a} ' + f'on current file system') + + response = self.request(self.base_url + '/') + body = self.check_status_and_reason(response, HTTPStatus.OK) + quotedname = urllib.parse.quote(filename, errors='surrogatepass') + enc = response.headers.get_content_charset() + self.assertIsNotNone(enc) + self.assertIn((f'href="{quotedname}"').encode('ascii'), body) + displayname = html.escape(filename, quote=False) + self.assertIn(f'>{displayname}<'.encode(enc, 'surrogateescape'), body) + + response = self.request(self.base_url + '/' + quotedname) + self.check_status_and_reason(response, HTTPStatus.OK, data=content) + + @unittest.skipUnless(os_helper.TESTFN_NONASCII, + 'need os_helper.TESTFN_NONASCII') + def test_list_dir_nonascii_dirname(self): + dirname = os_helper.TESTFN_NONASCII + '.dir' + self.check_list_dir_dirname(dirname) + + @unittest.skipUnless(os_helper.TESTFN_NONASCII, + 'need os_helper.TESTFN_NONASCII') + def test_list_dir_nonascii_filename(self): + filename = os_helper.TESTFN_NONASCII + '.txt' + self.check_list_dir_filename(filename) + @unittest.skipIf(is_apple, 'undecodable name cannot always be decoded on Apple platforms') @unittest.skipIf(sys.platform == 'win32', 'undecodable name cannot be decoded on win32') @unittest.skipUnless(os_helper.TESTFN_UNDECODABLE, 'need os_helper.TESTFN_UNDECODABLE') - def test_undecodable_filename(self): - enc = sys.getfilesystemencoding() + def test_list_dir_undecodable_dirname(self): + dirname = os.fsdecode(os_helper.TESTFN_UNDECODABLE) + '.dir' + self.check_list_dir_dirname(dirname) + + @unittest.skipIf(is_apple, + 'undecodable name cannot always be decoded on Apple platforms') + @unittest.skipIf(sys.platform == 'win32', + 'undecodable name cannot be decoded on win32') + @unittest.skipUnless(os_helper.TESTFN_UNDECODABLE, + 'need os_helper.TESTFN_UNDECODABLE') + def test_list_dir_undecodable_filename(self): filename = os.fsdecode(os_helper.TESTFN_UNDECODABLE) + '.txt' - with open(os.path.join(self.tempdir, filename), 'wb') as f: - f.write(os_helper.TESTFN_UNDECODABLE) - response = self.request(self.base_url + '/') - if is_apple: - # On Apple platforms the HFS+ filesystem replaces bytes that - # aren't valid UTF-8 into a percent-encoded value. - for name in os.listdir(self.tempdir): - if name != 'test': # Ignore a filename created in setUp(). - filename = name - break - body = self.check_status_and_reason(response, HTTPStatus.OK) - quotedname = urllib.parse.quote(filename, errors='surrogatepass') - self.assertIn(('href="%s"' % quotedname) - .encode(enc, 'surrogateescape'), body) - self.assertIn(('>%s<' % html.escape(filename, quote=False)) - .encode(enc, 'surrogateescape'), body) - response = self.request(self.base_url + '/' + quotedname) - self.check_status_and_reason(response, HTTPStatus.OK, - data=os_helper.TESTFN_UNDECODABLE) + self.check_list_dir_filename(filename) + + def test_list_dir_undecodable_dirname2(self): + dirname = '\ufffd.dir' + self.check_list_dir_dirname(dirname, quotedname='%ff.dir') + + @unittest.skipUnless(os_helper.TESTFN_UNENCODABLE, + 'need os_helper.TESTFN_UNENCODABLE') + def test_list_dir_unencodable_dirname(self): + dirname = os_helper.TESTFN_UNENCODABLE + '.dir' + self.check_list_dir_dirname(dirname) + + @unittest.skipUnless(os_helper.TESTFN_UNENCODABLE, + 'need os_helper.TESTFN_UNENCODABLE') + def test_list_dir_unencodable_filename(self): + filename = os_helper.TESTFN_UNENCODABLE + '.txt' + self.check_list_dir_filename(filename) + + def test_list_dir_escape_dirname(self): + # Characters that need special treating in URL or HTML. + for name in ('q?', 'f#', '&', '&', '', '"dq"', "'sq'", + '%A4', '%E2%82%AC'): + with self.subTest(name=name): + dirname = name + '.dir' + self.check_list_dir_dirname(dirname, + quotedname=urllib.parse.quote(dirname, safe='&<>\'"')) + + def test_list_dir_escape_filename(self): + # Characters that need special treating in URL or HTML. + for name in ('q?', 'f#', '&', '&', '', '"dq"', "'sq'", + '%A4', '%E2%82%AC'): + with self.subTest(name=name): + filename = name + '.txt' + self.check_list_dir_filename(filename) + os_helper.unlink(os.path.join(self.tempdir, filename)) def test_undecodable_parameter(self): # sanity check using a valid parameter @@ -731,27 +808,6 @@ def test_path_without_leading_slash(self): self.assertEqual(response.getheader("Location"), self.tempdir_name + "/?hi=1") - def test_html_escape_filename(self): - filename = '.txt' - fullpath = os.path.join(self.tempdir, filename) - - try: - open(fullpath, 'wb').close() - except OSError: - raise unittest.SkipTest('Can not create file %s on current file ' - 'system' % filename) - - try: - response = self.request(self.base_url + '/') - body = self.check_status_and_reason(response, HTTPStatus.OK) - enc = response.headers.get_content_charset() - finally: - os.unlink(fullpath) # avoid affecting test_undecodable_filename - - self.assertIsNotNone(enc) - html_text = '>%s<' % html.escape(filename, quote=False) - self.assertIn(html_text.encode(enc), body) - cgi_file1 = """\ #!%s From 2f1ecb3bc474a5895dce090cca7b8afe7b560040 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 17 May 2025 10:11:34 +0300 Subject: [PATCH 169/989] gh-134098: Fix handling %-encoded trailing slash in SimpleHTTPRequestHandler (GH-134099) --- Lib/http/server.py | 8 ++++---- Lib/test/test_httpservers.py | 11 +++++++++++ .../2025-05-16-20-10-25.gh-issue-134098.YyTkKr.rst | 2 ++ 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-16-20-10-25.gh-issue-134098.YyTkKr.rst diff --git a/Lib/http/server.py b/Lib/http/server.py index 64f766f9bc2c1b..8be1903743a9a2 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -750,7 +750,7 @@ def send_head(self): f = None if os.path.isdir(path): parts = urllib.parse.urlsplit(self.path) - if not parts.path.endswith('/'): + if not parts.path.endswith(('/', '%2f', '%2F')): # redirect browser - doing basically what apache does self.send_response(HTTPStatus.MOVED_PERMANENTLY) new_parts = (parts[0], parts[1], parts[2] + '/', @@ -890,14 +890,14 @@ def translate_path(self, path): """ # abandon query parameters - path = path.split('?',1)[0] - path = path.split('#',1)[0] + path = path.split('#', 1)[0] + path = path.split('?', 1)[0] # Don't forget explicit trailing slash when normalizing. Issue17324 - trailing_slash = path.rstrip().endswith('/') try: path = urllib.parse.unquote(path, errors='surrogatepass') except UnicodeDecodeError: path = urllib.parse.unquote(path) + trailing_slash = path.endswith('/') path = posixpath.normpath(path) words = path.split('/') words = filter(None, words) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 4cb78c87ea7670..557e698aa3481c 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -692,10 +692,19 @@ def test_get(self): # check for trailing "/" which should return 404. See Issue17324 response = self.request(self.base_url + '/test/') self.check_status_and_reason(response, HTTPStatus.NOT_FOUND) + response = self.request(self.base_url + '/test%2f') + self.check_status_and_reason(response, HTTPStatus.NOT_FOUND) + response = self.request(self.base_url + '/test%2F') + self.check_status_and_reason(response, HTTPStatus.NOT_FOUND) response = self.request(self.base_url + '/') self.check_status_and_reason(response, HTTPStatus.OK) + response = self.request(self.base_url + '%2f') + self.check_status_and_reason(response, HTTPStatus.OK) + response = self.request(self.base_url + '%2F') + self.check_status_and_reason(response, HTTPStatus.OK) response = self.request(self.base_url) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + self.assertEqual(response.getheader("Location"), self.base_url + "/") self.assertEqual(response.getheader("Content-Length"), "0") response = self.request(self.base_url + '/?hi=2') self.check_status_and_reason(response, HTTPStatus.OK) @@ -801,6 +810,8 @@ def test_path_without_leading_slash(self): self.check_status_and_reason(response, HTTPStatus.OK) response = self.request(self.tempdir_name) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + self.assertEqual(response.getheader("Location"), + self.tempdir_name + "/") response = self.request(self.tempdir_name + '/?hi=2') self.check_status_and_reason(response, HTTPStatus.OK) response = self.request(self.tempdir_name + '?hi=1') diff --git a/Misc/NEWS.d/next/Library/2025-05-16-20-10-25.gh-issue-134098.YyTkKr.rst b/Misc/NEWS.d/next/Library/2025-05-16-20-10-25.gh-issue-134098.YyTkKr.rst new file mode 100644 index 00000000000000..32eff5371c4f1e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-16-20-10-25.gh-issue-134098.YyTkKr.rst @@ -0,0 +1,2 @@ +Fix handling paths that end with a percent-encoded slash (``%2f`` or +``%2F``) in :class:`http.server.SimpleHTTPRequestHandler`. From faac627e47f72797f5b7a65134bf4cdce6575ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 17 May 2025 09:58:16 +0200 Subject: [PATCH 170/989] gh-133810: remove `http.server.CGIHTTPRequestHandler` and `--cgi` flag (#133811) The CGI HTTP request handler has been deprecated since Python 3.13. --- Doc/deprecations/pending-removal-in-3.15.rst | 2 +- Doc/library/http.server.rst | 67 ---- Doc/whatsnew/3.13.rst | 2 +- Doc/whatsnew/3.15.rst | 9 + Lib/_compat_pickle.py | 3 - Lib/http/client.py | 9 +- Lib/http/server.py | 352 +----------------- Lib/test/test_httpservers.py | 331 +--------------- Lib/wsgiref/handlers.py | 3 +- Misc/NEWS.d/3.13.0a1.rst | 2 +- ...-05-10-11-04-47.gh-issue-133810.03WhnK.rst | 3 + 11 files changed, 28 insertions(+), 755 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-10-11-04-47.gh-issue-133810.03WhnK.rst diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index 707253a91ecd40..a76d06cce1278a 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -20,7 +20,7 @@ Pending removal in Python 3.15 * :mod:`http.server`: - * The obsolete and rarely used :class:`~http.server.CGIHTTPRequestHandler` + * The obsolete and rarely used :class:`!CGIHTTPRequestHandler` has been deprecated since Python 3.13. No direct replacement exists. *Anything* is better than CGI to interface diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 02016c789b24b4..063344e0284258 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -458,55 +458,6 @@ such as using different index file names by overriding the class attribute :attr:`index_pages`. -.. class:: CGIHTTPRequestHandler(request, client_address, server) - - This class is used to serve either files or output of CGI scripts from the - current directory and below. Note that mapping HTTP hierarchic structure to - local directory structure is exactly as in :class:`SimpleHTTPRequestHandler`. - - .. note:: - - CGI scripts run by the :class:`CGIHTTPRequestHandler` class cannot execute - redirects (HTTP code 302), because code 200 (script output follows) is - sent prior to execution of the CGI script. This pre-empts the status - code. - - The class will however, run the CGI script, instead of serving it as a file, - if it guesses it to be a CGI script. Only directory-based CGI are used --- - the other common server configuration is to treat special extensions as - denoting CGI scripts. - - The :func:`do_GET` and :func:`do_HEAD` functions are modified to run CGI scripts - and serve the output, instead of serving files, if the request leads to - somewhere below the ``cgi_directories`` path. - - The :class:`CGIHTTPRequestHandler` defines the following data member: - - .. attribute:: cgi_directories - - This defaults to ``['/cgi-bin', '/htbin']`` and describes directories to - treat as containing CGI scripts. - - The :class:`CGIHTTPRequestHandler` defines the following method: - - .. method:: do_POST() - - This method serves the ``'POST'`` request type, only allowed for CGI - scripts. Error 501, "Can only POST to CGI scripts", is output when trying - to POST to a non-CGI url. - - Note that CGI scripts will be run with UID of user nobody, for security - reasons. Problems with the CGI script will be translated to error 403. - - .. deprecated-removed:: 3.13 3.15 - - :class:`CGIHTTPRequestHandler` is being removed in 3.15. CGI has not - been considered a good way to do things for well over a decade. This code - has been unmaintained for a while now and sees very little practical use. - Retaining it could lead to further :ref:`security considerations - `. - - .. _http-server-cli: Command-line interface @@ -563,24 +514,6 @@ The following options are accepted: .. versionadded:: 3.11 -.. option:: --cgi - - :class:`CGIHTTPRequestHandler` can be enabled in the command line by passing - the ``--cgi`` option:: - - python -m http.server --cgi - - .. deprecated-removed:: 3.13 3.15 - - :mod:`http.server` command line ``--cgi`` support is being removed - because :class:`CGIHTTPRequestHandler` is being removed. - -.. warning:: - - :class:`CGIHTTPRequestHandler` and the ``--cgi`` command-line option - are not intended for use by untrusted clients and may be vulnerable - to exploitation. Always use within a secure environment. - .. option:: --tls-cert Specifies a TLS certificate chain for HTTPS connections:: diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index e64eb19bddb522..023c279979d842 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1871,7 +1871,7 @@ New Deprecations * :mod:`http.server`: - * Deprecate :class:`~http.server.CGIHTTPRequestHandler`, + * Deprecate :class:`!CGIHTTPRequestHandler`, to be removed in Python 3.15. Process-based CGI HTTP servers have been out of favor for a very long time. This code was outdated, unmaintained, and rarely used. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 9e9a168db0e725..987cf944972329 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -121,6 +121,15 @@ Deprecated Removed ======= +http.server +----------- + +* Removed the :class:`!CGIHTTPRequestHandler` class + and the ``--cgi`` flag from the :program:`python -m http.server` + command-line interface. They were deprecated in Python 3.13. + (Contributed by Bénédikt Tran in :gh:`133810`.) + + platform -------- diff --git a/Lib/_compat_pickle.py b/Lib/_compat_pickle.py index 439f8c02f4b586..a981326432429b 100644 --- a/Lib/_compat_pickle.py +++ b/Lib/_compat_pickle.py @@ -175,7 +175,6 @@ 'SimpleDialog': 'tkinter.simpledialog', 'DocXMLRPCServer': 'xmlrpc.server', 'SimpleHTTPServer': 'http.server', - 'CGIHTTPServer': 'http.server', # For compatibility with broken pickles saved in old Python 3 versions 'UserDict': 'collections', 'UserList': 'collections', @@ -217,8 +216,6 @@ ('DocXMLRPCServer', 'DocCGIXMLRPCRequestHandler'), ('http.server', 'SimpleHTTPRequestHandler'): ('SimpleHTTPServer', 'SimpleHTTPRequestHandler'), - ('http.server', 'CGIHTTPRequestHandler'): - ('CGIHTTPServer', 'CGIHTTPRequestHandler'), ('_socket', 'socket'): ('socket', '_socketobject'), }) diff --git a/Lib/http/client.py b/Lib/http/client.py index 33a858d34ae1ba..e7a1c7bc3b2ae1 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -181,11 +181,10 @@ def _strip_ipv6_iface(enc_name: bytes) -> bytes: return enc_name class HTTPMessage(email.message.Message): - # XXX The only usage of this method is in - # http.server.CGIHTTPRequestHandler. Maybe move the code there so - # that it doesn't need to be part of the public API. The API has - # never been defined so this could cause backwards compatibility - # issues. + + # The getallmatchingheaders() method was only used by the CGI handler + # that was removed in Python 3.15. However, since the public API was not + # properly defined, it will be kept for backwards compatibility reasons. def getallmatchingheaders(self, name): """Find all header lines matching a given header name. diff --git a/Lib/http/server.py b/Lib/http/server.py index 8be1903743a9a2..f42e9a375e479a 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -1,29 +1,10 @@ """HTTP server classes. Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see -SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, -and (deprecated) CGIHTTPRequestHandler for CGI scripts. +SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST. It does, however, optionally implement HTTP/1.1 persistent connections. -Notes on CGIHTTPRequestHandler ------------------------------- - -This class is deprecated. It implements GET and POST requests to cgi-bin scripts. - -If the os.fork() function is not present (Windows), subprocess.Popen() is used, -with slightly altered but never documented semantics. Use from a threaded -process is likely to trigger a warning at os.fork() time. - -In all cases, the implementation is intentionally naive -- all -requests are executed synchronously. - -SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL --- it may execute arbitrary Python code or external programs. - -Note that status code 200 is sent prior to execution of a CGI script, so -scripts cannot send other status codes such as 302 (redirect). - XXX To do: - log requests even later (to capture byte count) @@ -86,10 +67,8 @@ "HTTPServer", "ThreadingHTTPServer", "HTTPSServer", "ThreadingHTTPSServer", "BaseHTTPRequestHandler", "SimpleHTTPRequestHandler", - "CGIHTTPRequestHandler", ] -import copy import datetime import email.utils import html @@ -99,7 +78,6 @@ import mimetypes import os import posixpath -import select import shutil import socket import socketserver @@ -953,56 +931,6 @@ def guess_type(self, path): return 'application/octet-stream' -# Utilities for CGIHTTPRequestHandler - -def _url_collapse_path(path): - """ - Given a URL path, remove extra '/'s and '.' path elements and collapse - any '..' references and returns a collapsed path. - - Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. - The utility of this function is limited to is_cgi method and helps - preventing some security attacks. - - Returns: The reconstituted URL, which will always start with a '/'. - - Raises: IndexError if too many '..' occur within the path. - - """ - # Query component should not be involved. - path, _, query = path.partition('?') - path = urllib.parse.unquote(path) - - # Similar to os.path.split(os.path.normpath(path)) but specific to URL - # path semantics rather than local operating system semantics. - path_parts = path.split('/') - head_parts = [] - for part in path_parts[:-1]: - if part == '..': - head_parts.pop() # IndexError if more '..' than prior parts - elif part and part != '.': - head_parts.append( part ) - if path_parts: - tail_part = path_parts.pop() - if tail_part: - if tail_part == '..': - head_parts.pop() - tail_part = '' - elif tail_part == '.': - tail_part = '' - else: - tail_part = '' - - if query: - tail_part = '?'.join((tail_part, query)) - - splitpath = ('/' + '/'.join(head_parts), tail_part) - collapsed_path = "/".join(splitpath) - - return collapsed_path - - - nobody = None def nobody_uid(): @@ -1026,274 +954,6 @@ def executable(path): return os.access(path, os.X_OK) -class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): - - """Complete HTTP server with GET, HEAD and POST commands. - - GET and HEAD also support running CGI scripts. - - The POST command is *only* implemented for CGI scripts. - - """ - - def __init__(self, *args, **kwargs): - import warnings - warnings._deprecated("http.server.CGIHTTPRequestHandler", - remove=(3, 15)) - super().__init__(*args, **kwargs) - - # Determine platform specifics - have_fork = hasattr(os, 'fork') - - # Make rfile unbuffered -- we need to read one line and then pass - # the rest to a subprocess, so we can't use buffered input. - rbufsize = 0 - - def do_POST(self): - """Serve a POST request. - - This is only implemented for CGI scripts. - - """ - - if self.is_cgi(): - self.run_cgi() - else: - self.send_error( - HTTPStatus.NOT_IMPLEMENTED, - "Can only POST to CGI scripts") - - def send_head(self): - """Version of send_head that support CGI scripts""" - if self.is_cgi(): - return self.run_cgi() - else: - return SimpleHTTPRequestHandler.send_head(self) - - def is_cgi(self): - """Test whether self.path corresponds to a CGI script. - - Returns True and updates the cgi_info attribute to the tuple - (dir, rest) if self.path requires running a CGI script. - Returns False otherwise. - - If any exception is raised, the caller should assume that - self.path was rejected as invalid and act accordingly. - - The default implementation tests whether the normalized url - path begins with one of the strings in self.cgi_directories - (and the next character is a '/' or the end of the string). - - """ - collapsed_path = _url_collapse_path(self.path) - dir_sep = collapsed_path.find('/', 1) - while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories: - dir_sep = collapsed_path.find('/', dir_sep+1) - if dir_sep > 0: - head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] - self.cgi_info = head, tail - return True - return False - - - cgi_directories = ['/cgi-bin', '/htbin'] - - def is_executable(self, path): - """Test whether argument path is an executable file.""" - return executable(path) - - def is_python(self, path): - """Test whether argument path is a Python script.""" - head, tail = os.path.splitext(path) - return tail.lower() in (".py", ".pyw") - - def run_cgi(self): - """Execute a CGI script.""" - dir, rest = self.cgi_info - path = dir + '/' + rest - i = path.find('/', len(dir)+1) - while i >= 0: - nextdir = path[:i] - nextrest = path[i+1:] - - scriptdir = self.translate_path(nextdir) - if os.path.isdir(scriptdir): - dir, rest = nextdir, nextrest - i = path.find('/', len(dir)+1) - else: - break - - # find an explicit query string, if present. - rest, _, query = rest.partition('?') - - # dissect the part after the directory name into a script name & - # a possible additional path, to be stored in PATH_INFO. - i = rest.find('/') - if i >= 0: - script, rest = rest[:i], rest[i:] - else: - script, rest = rest, '' - - scriptname = dir + '/' + script - scriptfile = self.translate_path(scriptname) - if not os.path.exists(scriptfile): - self.send_error( - HTTPStatus.NOT_FOUND, - "No such CGI script (%r)" % scriptname) - return - if not os.path.isfile(scriptfile): - self.send_error( - HTTPStatus.FORBIDDEN, - "CGI script is not a plain file (%r)" % scriptname) - return - ispy = self.is_python(scriptname) - if self.have_fork or not ispy: - if not self.is_executable(scriptfile): - self.send_error( - HTTPStatus.FORBIDDEN, - "CGI script is not executable (%r)" % scriptname) - return - - # Reference: https://www6.uniovi.es/~antonio/ncsa_httpd/cgi/env.html - # XXX Much of the following could be prepared ahead of time! - env = copy.deepcopy(os.environ) - env['SERVER_SOFTWARE'] = self.version_string() - env['SERVER_NAME'] = self.server.server_name - env['GATEWAY_INTERFACE'] = 'CGI/1.1' - env['SERVER_PROTOCOL'] = self.protocol_version - env['SERVER_PORT'] = str(self.server.server_port) - env['REQUEST_METHOD'] = self.command - uqrest = urllib.parse.unquote(rest) - env['PATH_INFO'] = uqrest - env['PATH_TRANSLATED'] = self.translate_path(uqrest) - env['SCRIPT_NAME'] = scriptname - env['QUERY_STRING'] = query - env['REMOTE_ADDR'] = self.client_address[0] - authorization = self.headers.get("authorization") - if authorization: - authorization = authorization.split() - if len(authorization) == 2: - import base64, binascii - env['AUTH_TYPE'] = authorization[0] - if authorization[0].lower() == "basic": - try: - authorization = authorization[1].encode('ascii') - authorization = base64.decodebytes(authorization).\ - decode('ascii') - except (binascii.Error, UnicodeError): - pass - else: - authorization = authorization.split(':') - if len(authorization) == 2: - env['REMOTE_USER'] = authorization[0] - # XXX REMOTE_IDENT - if self.headers.get('content-type') is None: - env['CONTENT_TYPE'] = self.headers.get_content_type() - else: - env['CONTENT_TYPE'] = self.headers['content-type'] - length = self.headers.get('content-length') - if length: - env['CONTENT_LENGTH'] = length - referer = self.headers.get('referer') - if referer: - env['HTTP_REFERER'] = referer - accept = self.headers.get_all('accept', ()) - env['HTTP_ACCEPT'] = ','.join(accept) - ua = self.headers.get('user-agent') - if ua: - env['HTTP_USER_AGENT'] = ua - co = filter(None, self.headers.get_all('cookie', [])) - cookie_str = ', '.join(co) - if cookie_str: - env['HTTP_COOKIE'] = cookie_str - # XXX Other HTTP_* headers - # Since we're setting the env in the parent, provide empty - # values to override previously set values - for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', - 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): - env.setdefault(k, "") - - self.send_response(HTTPStatus.OK, "Script output follows") - self.flush_headers() - - decoded_query = query.replace('+', ' ') - - if self.have_fork: - # Unix -- fork as we should - args = [script] - if '=' not in decoded_query: - args.append(decoded_query) - nobody = nobody_uid() - self.wfile.flush() # Always flush before forking - pid = os.fork() - if pid != 0: - # Parent - pid, sts = os.waitpid(pid, 0) - # throw away additional data [see bug #427345] - while select.select([self.rfile], [], [], 0)[0]: - if not self.rfile.read(1): - break - exitcode = os.waitstatus_to_exitcode(sts) - if exitcode: - self.log_error(f"CGI script exit code {exitcode}") - return - # Child - try: - try: - os.setuid(nobody) - except OSError: - pass - os.dup2(self.rfile.fileno(), 0) - os.dup2(self.wfile.fileno(), 1) - os.execve(scriptfile, args, env) - except: - self.server.handle_error(self.request, self.client_address) - os._exit(127) - - else: - # Non-Unix -- use subprocess - import subprocess - cmdline = [scriptfile] - if self.is_python(scriptfile): - interp = sys.executable - if interp.lower().endswith("w.exe"): - # On Windows, use python.exe, not pythonw.exe - interp = interp[:-5] + interp[-4:] - cmdline = [interp, '-u'] + cmdline - if '=' not in query: - cmdline.append(query) - self.log_message("command: %s", subprocess.list2cmdline(cmdline)) - try: - nbytes = int(length) - except (TypeError, ValueError): - nbytes = 0 - p = subprocess.Popen(cmdline, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env = env - ) - if self.command.lower() == "post" and nbytes > 0: - data = self.rfile.read(nbytes) - else: - data = None - # throw away additional data [see bug #427345] - while select.select([self.rfile._sock], [], [], 0)[0]: - if not self.rfile._sock.recv(1): - break - stdout, stderr = p.communicate(data) - self.wfile.write(stdout) - if stderr: - self.log_error('%s', stderr) - p.stderr.close() - p.stdout.close() - status = p.returncode - if status: - self.log_error("CGI script exit status %#x", status) - else: - self.log_message("CGI script exited OK") - - def _get_best_family(*address): infos = socket.getaddrinfo( *address, @@ -1336,13 +996,12 @@ def test(HandlerClass=BaseHTTPRequestHandler, print("\nKeyboard interrupt received, exiting.") sys.exit(0) + if __name__ == '__main__': import argparse import contextlib parser = argparse.ArgumentParser(color=True) - parser.add_argument('--cgi', action='store_true', - help='run as CGI server') parser.add_argument('-b', '--bind', metavar='ADDRESS', help='bind to this address ' '(default: all interfaces)') @@ -1378,11 +1037,6 @@ def test(HandlerClass=BaseHTTPRequestHandler, except OSError as e: parser.error(f"Failed to read TLS password file: {e}") - if args.cgi: - handler_class = CGIHTTPRequestHandler - else: - handler_class = SimpleHTTPRequestHandler - # ensure dual-stack is not disabled; ref #38907 class DualStackServer(ThreadingHTTPServer): @@ -1398,7 +1052,7 @@ def finish_request(self, request, client_address): directory=args.directory) test( - HandlerClass=handler_class, + HandlerClass=SimpleHTTPRequestHandler, ServerClass=DualStackServer, port=args.port, bind=args.bind, diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 557e698aa3481c..11c74a02bf2903 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -3,16 +3,15 @@ Written by Cody A.W. Somerville , Josip Dzolonga, and Michael Otteneder for the 2007/08 GHOP contest. """ -from collections import OrderedDict + from http.server import BaseHTTPRequestHandler, HTTPServer, HTTPSServer, \ - SimpleHTTPRequestHandler, CGIHTTPRequestHandler + SimpleHTTPRequestHandler from http import server, HTTPStatus import os import socket import sys import re -import base64 import ntpath import pathlib import shutil @@ -31,7 +30,7 @@ import unittest from test import support from test.support import ( - is_apple, import_helper, os_helper, requires_subprocess, threading_helper + is_apple, import_helper, os_helper, threading_helper ) try: @@ -820,329 +819,6 @@ def test_path_without_leading_slash(self): self.tempdir_name + "/?hi=1") -cgi_file1 = """\ -#!%s - -print("Content-type: text/html") -print() -print("Hello World") -""" - -cgi_file2 = """\ -#!%s -import os -import sys -import urllib.parse - -print("Content-type: text/html") -print() - -content_length = int(os.environ["CONTENT_LENGTH"]) -query_string = sys.stdin.buffer.read(content_length) -params = {key.decode("utf-8"): val.decode("utf-8") - for key, val in urllib.parse.parse_qsl(query_string)} - -print("%%s, %%s, %%s" %% (params["spam"], params["eggs"], params["bacon"])) -""" - -cgi_file4 = """\ -#!%s -import os - -print("Content-type: text/html") -print() - -print(os.environ["%s"]) -""" - -cgi_file6 = """\ -#!%s -import os - -print("X-ambv: was here") -print("Content-type: text/html") -print() -print("
")
-for k, v in os.environ.items():
-    try:
-        k.encode('ascii')
-        v.encode('ascii')
-    except UnicodeEncodeError:
-        continue  # see: BPO-44647
-    print(f"{k}={v}")
-print("
") -""" - - -@unittest.skipIf(hasattr(os, 'geteuid') and os.geteuid() == 0, - "This test can't be run reliably as root (issue #13308).") -@requires_subprocess() -class CGIHTTPServerTestCase(BaseTestCase): - class request_handler(NoLogRequestHandler, CGIHTTPRequestHandler): - _test_case_self = None # populated by each setUp() method call. - - def __init__(self, *args, **kwargs): - with self._test_case_self.assertWarnsRegex( - DeprecationWarning, - r'http\.server\.CGIHTTPRequestHandler'): - # This context also happens to catch and silence the - # threading DeprecationWarning from os.fork(). - super().__init__(*args, **kwargs) - - linesep = os.linesep.encode('ascii') - - def setUp(self): - self.request_handler._test_case_self = self # practical, but yuck. - BaseTestCase.setUp(self) - self.cwd = os.getcwd() - self.parent_dir = tempfile.mkdtemp() - self.cgi_dir = os.path.join(self.parent_dir, 'cgi-bin') - self.cgi_child_dir = os.path.join(self.cgi_dir, 'child-dir') - self.sub_dir_1 = os.path.join(self.parent_dir, 'sub') - self.sub_dir_2 = os.path.join(self.sub_dir_1, 'dir') - self.cgi_dir_in_sub_dir = os.path.join(self.sub_dir_2, 'cgi-bin') - os.mkdir(self.cgi_dir) - os.mkdir(self.cgi_child_dir) - os.mkdir(self.sub_dir_1) - os.mkdir(self.sub_dir_2) - os.mkdir(self.cgi_dir_in_sub_dir) - self.nocgi_path = None - self.file1_path = None - self.file2_path = None - self.file3_path = None - self.file4_path = None - self.file5_path = None - - # The shebang line should be pure ASCII: use symlink if possible. - # See issue #7668. - self._pythonexe_symlink = None - if os_helper.can_symlink(): - self.pythonexe = os.path.join(self.parent_dir, 'python') - self._pythonexe_symlink = support.PythonSymlink(self.pythonexe).__enter__() - else: - self.pythonexe = sys.executable - - try: - # The python executable path is written as the first line of the - # CGI Python script. The encoding cookie cannot be used, and so the - # path should be encodable to the default script encoding (utf-8) - self.pythonexe.encode('utf-8') - except UnicodeEncodeError: - self.tearDown() - self.skipTest("Python executable path is not encodable to utf-8") - - self.nocgi_path = os.path.join(self.parent_dir, 'nocgi.py') - with open(self.nocgi_path, 'w', encoding='utf-8') as fp: - fp.write(cgi_file1 % self.pythonexe) - os.chmod(self.nocgi_path, 0o777) - - self.file1_path = os.path.join(self.cgi_dir, 'file1.py') - with open(self.file1_path, 'w', encoding='utf-8') as file1: - file1.write(cgi_file1 % self.pythonexe) - os.chmod(self.file1_path, 0o777) - - self.file2_path = os.path.join(self.cgi_dir, 'file2.py') - with open(self.file2_path, 'w', encoding='utf-8') as file2: - file2.write(cgi_file2 % self.pythonexe) - os.chmod(self.file2_path, 0o777) - - self.file3_path = os.path.join(self.cgi_child_dir, 'file3.py') - with open(self.file3_path, 'w', encoding='utf-8') as file3: - file3.write(cgi_file1 % self.pythonexe) - os.chmod(self.file3_path, 0o777) - - self.file4_path = os.path.join(self.cgi_dir, 'file4.py') - with open(self.file4_path, 'w', encoding='utf-8') as file4: - file4.write(cgi_file4 % (self.pythonexe, 'QUERY_STRING')) - os.chmod(self.file4_path, 0o777) - - self.file5_path = os.path.join(self.cgi_dir_in_sub_dir, 'file5.py') - with open(self.file5_path, 'w', encoding='utf-8') as file5: - file5.write(cgi_file1 % self.pythonexe) - os.chmod(self.file5_path, 0o777) - - self.file6_path = os.path.join(self.cgi_dir, 'file6.py') - with open(self.file6_path, 'w', encoding='utf-8') as file6: - file6.write(cgi_file6 % self.pythonexe) - os.chmod(self.file6_path, 0o777) - - os.chdir(self.parent_dir) - - def tearDown(self): - self.request_handler._test_case_self = None - try: - os.chdir(self.cwd) - if self._pythonexe_symlink: - self._pythonexe_symlink.__exit__(None, None, None) - if self.nocgi_path: - os.remove(self.nocgi_path) - if self.file1_path: - os.remove(self.file1_path) - if self.file2_path: - os.remove(self.file2_path) - if self.file3_path: - os.remove(self.file3_path) - if self.file4_path: - os.remove(self.file4_path) - if self.file5_path: - os.remove(self.file5_path) - if self.file6_path: - os.remove(self.file6_path) - os.rmdir(self.cgi_child_dir) - os.rmdir(self.cgi_dir) - os.rmdir(self.cgi_dir_in_sub_dir) - os.rmdir(self.sub_dir_2) - os.rmdir(self.sub_dir_1) - # The 'gmon.out' file can be written in the current working - # directory if C-level code profiling with gprof is enabled. - os_helper.unlink(os.path.join(self.parent_dir, 'gmon.out')) - os.rmdir(self.parent_dir) - finally: - BaseTestCase.tearDown(self) - - def test_url_collapse_path(self): - # verify tail is the last portion and head is the rest on proper urls - test_vectors = { - '': '//', - '..': IndexError, - '/.//..': IndexError, - '/': '//', - '//': '//', - '/\\': '//\\', - '/.//': '//', - 'cgi-bin/file1.py': '/cgi-bin/file1.py', - '/cgi-bin/file1.py': '/cgi-bin/file1.py', - 'a': '//a', - '/a': '//a', - '//a': '//a', - './a': '//a', - './C:/': '/C:/', - '/a/b': '/a/b', - '/a/b/': '/a/b/', - '/a/b/.': '/a/b/', - '/a/b/c/..': '/a/b/', - '/a/b/c/../d': '/a/b/d', - '/a/b/c/../d/e/../f': '/a/b/d/f', - '/a/b/c/../d/e/../../f': '/a/b/f', - '/a/b/c/../d/e/.././././..//f': '/a/b/f', - '../a/b/c/../d/e/.././././..//f': IndexError, - '/a/b/c/../d/e/../../../f': '/a/f', - '/a/b/c/../d/e/../../../../f': '//f', - '/a/b/c/../d/e/../../../../../f': IndexError, - '/a/b/c/../d/e/../../../../f/..': '//', - '/a/b/c/../d/e/../../../../f/../.': '//', - } - for path, expected in test_vectors.items(): - if isinstance(expected, type) and issubclass(expected, Exception): - self.assertRaises(expected, - server._url_collapse_path, path) - else: - actual = server._url_collapse_path(path) - self.assertEqual(expected, actual, - msg='path = %r\nGot: %r\nWanted: %r' % - (path, actual, expected)) - - def test_headers_and_content(self): - res = self.request('/cgi-bin/file1.py') - self.assertEqual( - (res.read(), res.getheader('Content-type'), res.status), - (b'Hello World' + self.linesep, 'text/html', HTTPStatus.OK)) - - def test_issue19435(self): - res = self.request('///////////nocgi.py/../cgi-bin/nothere.sh') - self.assertEqual(res.status, HTTPStatus.NOT_FOUND) - - def test_post(self): - params = urllib.parse.urlencode( - {'spam' : 1, 'eggs' : 'python', 'bacon' : 123456}) - headers = {'Content-type' : 'application/x-www-form-urlencoded'} - res = self.request('/cgi-bin/file2.py', 'POST', params, headers) - - self.assertEqual(res.read(), b'1, python, 123456' + self.linesep) - - def test_invaliduri(self): - res = self.request('/cgi-bin/invalid') - res.read() - self.assertEqual(res.status, HTTPStatus.NOT_FOUND) - - def test_authorization(self): - headers = {b'Authorization' : b'Basic ' + - base64.b64encode(b'username:pass')} - res = self.request('/cgi-bin/file1.py', 'GET', headers=headers) - self.assertEqual( - (b'Hello World' + self.linesep, 'text/html', HTTPStatus.OK), - (res.read(), res.getheader('Content-type'), res.status)) - - def test_no_leading_slash(self): - # http://bugs.python.org/issue2254 - res = self.request('cgi-bin/file1.py') - self.assertEqual( - (b'Hello World' + self.linesep, 'text/html', HTTPStatus.OK), - (res.read(), res.getheader('Content-type'), res.status)) - - def test_os_environ_is_not_altered(self): - signature = "Test CGI Server" - os.environ['SERVER_SOFTWARE'] = signature - res = self.request('/cgi-bin/file1.py') - self.assertEqual( - (b'Hello World' + self.linesep, 'text/html', HTTPStatus.OK), - (res.read(), res.getheader('Content-type'), res.status)) - self.assertEqual(os.environ['SERVER_SOFTWARE'], signature) - - def test_urlquote_decoding_in_cgi_check(self): - res = self.request('/cgi-bin%2ffile1.py') - self.assertEqual( - (b'Hello World' + self.linesep, 'text/html', HTTPStatus.OK), - (res.read(), res.getheader('Content-type'), res.status)) - - def test_nested_cgi_path_issue21323(self): - res = self.request('/cgi-bin/child-dir/file3.py') - self.assertEqual( - (b'Hello World' + self.linesep, 'text/html', HTTPStatus.OK), - (res.read(), res.getheader('Content-type'), res.status)) - - def test_query_with_multiple_question_mark(self): - res = self.request('/cgi-bin/file4.py?a=b?c=d') - self.assertEqual( - (b'a=b?c=d' + self.linesep, 'text/html', HTTPStatus.OK), - (res.read(), res.getheader('Content-type'), res.status)) - - def test_query_with_continuous_slashes(self): - res = self.request('/cgi-bin/file4.py?k=aa%2F%2Fbb&//q//p//=//a//b//') - self.assertEqual( - (b'k=aa%2F%2Fbb&//q//p//=//a//b//' + self.linesep, - 'text/html', HTTPStatus.OK), - (res.read(), res.getheader('Content-type'), res.status)) - - def test_cgi_path_in_sub_directories(self): - try: - CGIHTTPRequestHandler.cgi_directories.append('/sub/dir/cgi-bin') - res = self.request('/sub/dir/cgi-bin/file5.py') - self.assertEqual( - (b'Hello World' + self.linesep, 'text/html', HTTPStatus.OK), - (res.read(), res.getheader('Content-type'), res.status)) - finally: - CGIHTTPRequestHandler.cgi_directories.remove('/sub/dir/cgi-bin') - - def test_accept(self): - browser_accept = \ - 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' - tests = ( - ((('Accept', browser_accept),), browser_accept), - ((), ''), - # Hack case to get two values for the one header - ((('Accept', 'text/html'), ('ACCEPT', 'text/plain')), - 'text/html,text/plain'), - ) - for headers, expected in tests: - headers = OrderedDict(headers) - with self.subTest(headers): - res = self.request('/cgi-bin/file6.py', 'GET', headers=headers) - self.assertEqual(http.HTTPStatus.OK, res.status) - expected = f"HTTP_ACCEPT={expected}".encode('ascii') - self.assertIn(expected, res.read()) - - class SocketlessRequestHandler(SimpleHTTPRequestHandler): def __init__(self, directory=None): request = mock.Mock() @@ -1162,6 +838,7 @@ def do_GET(self): def log_message(self, format, *args): pass + class RejectingSocketlessRequestHandler(SocketlessRequestHandler): def handle_expect_100(self): self.send_error(HTTPStatus.EXPECTATION_FAILED) diff --git a/Lib/wsgiref/handlers.py b/Lib/wsgiref/handlers.py index cafe872c7aae9b..9353fb678625b3 100644 --- a/Lib/wsgiref/handlers.py +++ b/Lib/wsgiref/handlers.py @@ -69,7 +69,8 @@ def read_environ(): # Python 3's http.server.CGIHTTPRequestHandler decodes # using the urllib.unquote default of UTF-8, amongst other - # issues. + # issues. While the CGI handler is removed in 3.15, this + # is kept for legacy reasons. elif ( software.startswith('simplehttp/') and 'python/3' in software diff --git a/Misc/NEWS.d/3.13.0a1.rst b/Misc/NEWS.d/3.13.0a1.rst index 304baf6ac8eea9..0a93cbcea0ffd2 100644 --- a/Misc/NEWS.d/3.13.0a1.rst +++ b/Misc/NEWS.d/3.13.0a1.rst @@ -2294,7 +2294,7 @@ superclass. Patch by James Hilton-Balfe .. nonce: VksX1D .. section: Library -:class:`http.server.CGIHTTPRequestHandler` has been deprecated for removal +:class:`!http.server.CGIHTTPRequestHandler` has been deprecated for removal in 3.15. Its design is old and the web world has long since moved beyond CGI. diff --git a/Misc/NEWS.d/next/Library/2025-05-10-11-04-47.gh-issue-133810.03WhnK.rst b/Misc/NEWS.d/next/Library/2025-05-10-11-04-47.gh-issue-133810.03WhnK.rst new file mode 100644 index 00000000000000..4073974e364a1c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-10-11-04-47.gh-issue-133810.03WhnK.rst @@ -0,0 +1,3 @@ +Remove :class:`!http.server.CGIHTTPRequestHandler` and ``--cgi`` flag from the +:program:`python -m http.server` command-line interface. They were +deprecated in Python 3.13. Patch by Bénédikt Tran. From c1c9ad1d5a62a591eb2f0f0d29f3fa02e0949f14 Mon Sep 17 00:00:00 2001 From: Oleg Burnaev <51371645+Shepard2154@users.noreply.github.com> Date: Sat, 17 May 2025 10:59:37 +0300 Subject: [PATCH 171/989] gh-133881: add forward reference to `list.sort()` in lambda expression tutorial (#133910) --- Doc/tutorial/controlflow.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index 95939242fb7d44..9a810f16388b9d 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -999,7 +999,8 @@ scope:: 43 The above example uses a lambda expression to return a function. Another use -is to pass a small function as an argument:: +is to pass a small function as an argument. For instance, :meth:`list.sort` +takes a sorting key function *key* which can be a lambda function:: >>> pairs = [(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four')] >>> pairs.sort(key=lambda pair: pair[1]) From 9d73875072e20bf3cc87cadc553d6a6fb4f71df5 Mon Sep 17 00:00:00 2001 From: Victorien <65306057+Viicos@users.noreply.github.com> Date: Sat, 17 May 2025 10:00:13 +0200 Subject: [PATCH 172/989] gh-113878: fix `versionadded` in `dataclasses.field()` documentation (#134065) --- Doc/library/dataclasses.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index 72612211b43d5e..f18c7cc9c02da6 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -304,9 +304,9 @@ Module contents .. versionadded:: 3.10 - - ``doc``: optional docstring for this field. + - *doc*: optional docstring for this field. - .. versionadded:: 3.13 + .. versionadded:: 3.14 If the default value of a field is specified by a call to :func:`!field`, then the class attribute for this field will be From af6b3b825f3b653ffdb29fc1dd36de8acfe0a641 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Sat, 17 May 2025 04:58:41 -0400 Subject: [PATCH 173/989] Docs: C API: Improve documentation around non-Python threads with subinterpreters (GH-131087) Co-authored-by: Victor Stinner --- Doc/c-api/init.rst | 59 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 9 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 90767b12471bb6..9c866438b487d0 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -919,8 +919,36 @@ Note that the ``PyGILState_*`` functions assume there is only one global interpreter (created automatically by :c:func:`Py_Initialize`). Python supports the creation of additional interpreters (using :c:func:`Py_NewInterpreter`), but mixing multiple interpreters and the -``PyGILState_*`` API is unsupported. +``PyGILState_*`` API is unsupported. This is because :c:func:`PyGILState_Ensure` +and similar functions default to :term:`attaching ` a +:term:`thread state` for the main interpreter, meaning that the thread can't safely +interact with the calling subinterpreter. + +Supporting subinterpreters in non-Python threads +------------------------------------------------ + +If you would like to support subinterpreters with non-Python created threads, you +must use the ``PyThreadState_*`` API instead of the traditional ``PyGILState_*`` +API. + +In particular, you must store the interpreter state from the calling +function and pass it to :c:func:`PyThreadState_New`, which will ensure that +the :term:`thread state` is targeting the correct interpreter:: + + /* The return value of PyInterpreterState_Get() from the + function that created this thread. */ + PyInterpreterState *interp = ThreadData->interp; + PyThreadState *tstate = PyThreadState_New(interp); + PyThreadState_Swap(tstate); + + /* GIL of the subinterpreter is now held. + Perform Python actions here. */ + result = CallSomeFunction(); + /* evaluate result or handle exception */ + /* Destroy the thread state. No Python API allowed beyond this point. */ + PyThreadState_Clear(tstate); + PyThreadState_DeleteCurrent(); .. _fork-and-threads: @@ -1097,6 +1125,10 @@ code, or when embedding the Python interpreter: .. seealso: :c:func:`PyEval_ReleaseThread` + .. note:: + Similar to :c:func:`PyGILState_Ensure`, this function will hang the + thread if the runtime is finalizing. + The following functions use thread-local storage, and are not compatible with sub-interpreters: @@ -1123,10 +1155,10 @@ with sub-interpreters: When the function returns, there will be an :term:`attached thread state` and the thread will be able to call arbitrary Python code. Failure is a fatal error. - .. note:: - Calling this function from a thread when the runtime is finalizing will - hang the thread until the program exits, even if the thread was not - created by Python. Refer to + .. warning:: + Calling this function when the runtime is finalizing is unsafe. Doing + so will either hang the thread until the program ends, or fully crash + the interpreter in rare cases. Refer to :ref:`cautions-regarding-runtime-finalization` for more details. .. versionchanged:: 3.14 @@ -1143,7 +1175,6 @@ with sub-interpreters: Every call to :c:func:`PyGILState_Ensure` must be matched by a call to :c:func:`PyGILState_Release` on the same thread. - .. c:function:: PyThreadState* PyGILState_GetThisThreadState() Get the :term:`attached thread state` for this thread. May return ``NULL`` if no @@ -1151,20 +1182,30 @@ with sub-interpreters: always has such a thread-state, even if no auto-thread-state call has been made on the main thread. This is mainly a helper/diagnostic function. - .. seealso: :c:func:`PyThreadState_Get`` + .. note:: + This function does not account for :term:`thread states ` created + by something other than :c:func:`PyGILState_Ensure` (such as :c:func:`PyThreadState_New`). + Prefer :c:func:`PyThreadState_Get` or :c:func:`PyThreadState_GetUnchecked` + for most cases. + .. seealso: :c:func:`PyThreadState_Get`` .. c:function:: int PyGILState_Check() Return ``1`` if the current thread is holding the :term:`GIL` and ``0`` otherwise. This function can be called from any thread at any time. - Only if it has had its Python thread state initialized and currently is - holding the :term:`GIL` will it return ``1``. + Only if it has had its :term:`thread state ` initialized + via :c:func:`PyGILState_Ensure` will it return ``1``. This is mainly a helper/diagnostic function. It can be useful for example in callback contexts or memory allocation functions when knowing that the :term:`GIL` is locked can allow the caller to perform sensitive actions or otherwise behave differently. + .. note:: + If the current Python process has ever created a subinterpreter, this + function will *always* return ``1``. Prefer :c:func:`PyThreadState_GetUnchecked` + for most cases. + .. versionadded:: 3.4 From b41d79c776af4bfa5d28fc2036c9137978294b6a Mon Sep 17 00:00:00 2001 From: Clifford Gama <53076065+cliff688@users.noreply.github.com> Date: Sat, 17 May 2025 13:36:38 +0200 Subject: [PATCH 174/989] Docs: fix spelling of "test case" in `unittest` documentation (#134137) --- Doc/library/unittest.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index 61022fe052ca80..dcdda1719bf593 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -109,7 +109,7 @@ Here is a short script to test three string methods:: unittest.main() -A testcase is created by subclassing :class:`unittest.TestCase`. The three +A test case is created by subclassing :class:`unittest.TestCase`. The three individual tests are defined with methods whose names start with the letters ``test``. This naming convention informs the test runner about which methods represent tests. From 7a9d46295a497669eaa6e647c33ab71c8cf620a1 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sat, 17 May 2025 14:11:19 +0100 Subject: [PATCH 175/989] gh-88275: Add missing `__init__` method to `match` example (#120281) --- Doc/whatsnew/3.10.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index f5e38950756afe..1067601c652300 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -551,11 +551,12 @@ Patterns and classes If you are using classes to structure your data, you can use as a pattern the class name followed by an argument list resembling a constructor. This -pattern has the ability to capture class attributes into variables:: +pattern has the ability to capture instance attributes into variables:: class Point: - x: int - y: int + def __init__(self, x, y): + self.x = x + self.y = y def location(point): match point: From 84914ad0e5f96f0ca7238f3b4bc7fc4e50b1abb3 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sat, 17 May 2025 17:57:02 +0300 Subject: [PATCH 176/989] gh-133999: Fix `except` parsing regression in 3.14 (#134035) --- Grammar/python.gram | 4 +-- Lib/test/test_syntax.py | 17 ++++++++++ ...-05-15-11-38-16.gh-issue-133999.uBZ8uS.rst | 2 ++ Parser/parser.c | 32 +++++++++++++------ 4 files changed, 43 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-15-11-38-16.gh-issue-133999.uBZ8uS.rst diff --git a/Grammar/python.gram b/Grammar/python.gram index 7f0ee11ba7e2e7..de435537095031 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -1418,7 +1418,7 @@ invalid_except_stmt: RAISE_SYNTAX_ERROR_STARTING_FROM(a, "multiple exception types must be parenthesized when using 'as'") } | a='except' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | a='except' NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } - | 'except' expression 'as' a=expression { + | 'except' expression 'as' a=expression ':' block { RAISE_SYNTAX_ERROR_KNOWN_LOCATION( a, "cannot use except statement with %s", _PyPegen_get_expr_name(a)) } invalid_except_star_stmt: @@ -1426,7 +1426,7 @@ invalid_except_star_stmt: RAISE_SYNTAX_ERROR_STARTING_FROM(a, "multiple exception types must be parenthesized when using 'as'") } | a='except' '*' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | a='except' '*' (NEWLINE | ':') { RAISE_SYNTAX_ERROR("expected one or more exception types") } - | 'except' '*' expression 'as' a=expression { + | 'except' '*' expression 'as' a=expression ':' block { RAISE_SYNTAX_ERROR_KNOWN_LOCATION( a, "cannot use except* statement with %s", _PyPegen_get_expr_name(a)) } invalid_finally_stmt: diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 0eccf03a1a96e3..c7ac791415830f 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1431,6 +1431,23 @@ Traceback (most recent call last): SyntaxError: cannot use except* statement with literal +Regression tests for gh-133999: + + >>> try: pass + ... except TypeError as name: raise from None + Traceback (most recent call last): + SyntaxError: invalid syntax + + >>> try: pass + ... except* TypeError as name: raise from None + Traceback (most recent call last): + SyntaxError: invalid syntax + + >>> match 1: + ... case 1 | 2 as abc: raise from None + Traceback (most recent call last): + SyntaxError: invalid syntax + Ensure that early = are not matched by the parser as invalid comparisons >>> f(2, 4, x=34); 1 $ 2 Traceback (most recent call last): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-15-11-38-16.gh-issue-133999.uBZ8uS.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-15-11-38-16.gh-issue-133999.uBZ8uS.rst new file mode 100644 index 00000000000000..7d9c49688c3ce2 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-15-11-38-16.gh-issue-133999.uBZ8uS.rst @@ -0,0 +1,2 @@ +Fix :exc:`SyntaxError` regression in :keyword:`except` parsing after +:gh:`123440`. diff --git a/Parser/parser.c b/Parser/parser.c index b48b1b20ee4d8e..84a293cddffde6 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -24202,7 +24202,7 @@ invalid_try_stmt_rule(Parser *p) // | 'except' expression ',' expressions 'as' NAME ':' // | 'except' expression ['as' NAME] NEWLINE // | 'except' NEWLINE -// | 'except' expression 'as' expression +// | 'except' expression 'as' expression ':' block static void * invalid_except_stmt_rule(Parser *p) { @@ -24318,15 +24318,17 @@ invalid_except_stmt_rule(Parser *p) D(fprintf(stderr, "%*c%s invalid_except_stmt[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' NEWLINE")); } - { // 'except' expression 'as' expression + { // 'except' expression 'as' expression ':' block if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> invalid_except_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression")); + D(fprintf(stderr, "%*c> invalid_except_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression ':' block")); Token * _keyword; Token * _keyword_1; + Token * _literal; expr_ty a; + asdl_stmt_seq* block_var; expr_ty expression_var; if ( (_keyword = _PyPegen_expect_token(p, 677)) // token='except' @@ -24336,9 +24338,13 @@ invalid_except_stmt_rule(Parser *p) (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' && (a = expression_rule(p)) // expression + && + (_literal = _PyPegen_expect_token(p, 11)) // token=':' + && + (block_var = block_rule(p)) // block ) { - D(fprintf(stderr, "%*c+ invalid_except_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression")); + D(fprintf(stderr, "%*c+ invalid_except_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression ':' block")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "cannot use except statement with %s" , _PyPegen_get_expr_name ( a ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -24349,7 +24355,7 @@ invalid_except_stmt_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s invalid_except_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' expression 'as' expression")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' expression 'as' expression ':' block")); } _res = NULL; done: @@ -24361,7 +24367,7 @@ invalid_except_stmt_rule(Parser *p) // | 'except' '*' expression ',' expressions 'as' NAME ':' // | 'except' '*' expression ['as' NAME] NEWLINE // | 'except' '*' (NEWLINE | ':') -// | 'except' '*' expression 'as' expression +// | 'except' '*' expression 'as' expression ':' block static void * invalid_except_star_stmt_rule(Parser *p) { @@ -24486,16 +24492,18 @@ invalid_except_star_stmt_rule(Parser *p) D(fprintf(stderr, "%*c%s invalid_except_star_stmt[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' '*' (NEWLINE | ':')")); } - { // 'except' '*' expression 'as' expression + { // 'except' '*' expression 'as' expression ':' block if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> invalid_except_star_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression")); + D(fprintf(stderr, "%*c> invalid_except_star_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression ':' block")); Token * _keyword; Token * _keyword_1; Token * _literal; + Token * _literal_1; expr_ty a; + asdl_stmt_seq* block_var; expr_ty expression_var; if ( (_keyword = _PyPegen_expect_token(p, 677)) // token='except' @@ -24507,9 +24515,13 @@ invalid_except_star_stmt_rule(Parser *p) (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' && (a = expression_rule(p)) // expression + && + (_literal_1 = _PyPegen_expect_token(p, 11)) // token=':' + && + (block_var = block_rule(p)) // block ) { - D(fprintf(stderr, "%*c+ invalid_except_star_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression")); + D(fprintf(stderr, "%*c+ invalid_except_star_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression ':' block")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "cannot use except* statement with %s" , _PyPegen_get_expr_name ( a ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -24520,7 +24532,7 @@ invalid_except_star_stmt_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s invalid_except_star_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' '*' expression 'as' expression")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' '*' expression 'as' expression ':' block")); } _res = NULL; done: From fc7f4c36664314393bd4c30355e21bd7aeac524d Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sat, 17 May 2025 12:23:19 -0700 Subject: [PATCH 177/989] gh-134119: Fix crash from calling next() on exhausted template iterator (#134120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_string/test_templatelib.py | 7 +++++++ .../2025-05-16-20-59-12.gh-issue-134119.w8expI.rst | 2 ++ Objects/templateobject.c | 3 +++ 3 files changed, 12 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-20-59-12.gh-issue-134119.w8expI.rst diff --git a/Lib/test/test_string/test_templatelib.py b/Lib/test/test_string/test_templatelib.py index 5b9490c2be6de0..85fcff486d6616 100644 --- a/Lib/test/test_string/test_templatelib.py +++ b/Lib/test/test_string/test_templatelib.py @@ -148,6 +148,13 @@ def test_iter(self): self.assertEqual(res[1].format_spec, '') self.assertEqual(res[2], ' yz') + def test_exhausted(self): + # See https://github.com/python/cpython/issues/134119. + template_iter = iter(t"{1}") + self.assertIsInstance(next(template_iter), Interpolation) + self.assertRaises(StopIteration, next, template_iter) + self.assertRaises(StopIteration, next, template_iter) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-20-59-12.gh-issue-134119.w8expI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-20-59-12.gh-issue-134119.w8expI.rst new file mode 100644 index 00000000000000..754e816628563e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-20-59-12.gh-issue-134119.w8expI.rst @@ -0,0 +1,2 @@ +Fix crash when calling :func:`next` on an exhausted template string iterator. +Patch by Jelle Zijlstra. diff --git a/Objects/templateobject.c b/Objects/templateobject.c index 06cb19e0b6d056..4293a311c440f7 100644 --- a/Objects/templateobject.c +++ b/Objects/templateobject.c @@ -23,6 +23,9 @@ templateiter_next(PyObject *op) if (self->from_strings) { item = PyIter_Next(self->stringsiter); self->from_strings = 0; + if (item == NULL) { + return NULL; + } if (PyUnicode_GET_LENGTH(item) == 0) { Py_SETREF(item, PyIter_Next(self->interpolationsiter)); self->from_strings = 1; From 009e7b36981fd07f7cca1fdcfcf172ce1584fac7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 17 May 2025 18:24:40 -0400 Subject: [PATCH 178/989] gh-134064: Fix sys.remote_exec() error checking (#134067) --- Lib/test/test_sys.py | 7 +++++++ Python/sysmodule.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 8af2e3488b48d9..fb1c8492a64d38 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -2176,6 +2176,13 @@ def test_remote_exec_invalid_pid(self): with self.assertRaises(OSError): sys.remote_exec(99999, "print('should not run')") + def test_remote_exec_invalid_script(self): + """Test remote exec with invalid script type""" + with self.assertRaises(TypeError): + sys.remote_exec(0, None) + with self.assertRaises(TypeError): + sys.remote_exec(0, 123) + def test_remote_exec_syntax_error(self): """Test remote exec with syntax error in script""" script = ''' diff --git a/Python/sysmodule.c b/Python/sysmodule.c index dd048205757eb1..4ed045e3297bbc 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2485,7 +2485,7 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) PyObject *path; const char *debugger_script_path; - if (PyUnicode_FSConverter(script, &path) < 0) { + if (PyUnicode_FSConverter(script, &path) == 0) { return NULL; } debugger_script_path = PyBytes_AS_STRING(path); From fa4e088668d4a41f9be5babe7edd5409290ee92a Mon Sep 17 00:00:00 2001 From: Micha Albert Date: Sun, 18 May 2025 01:47:37 +0000 Subject: [PATCH 179/989] gh-134150: Clarify distinction between JSON and Python objects (#134154) * gh-134150: Clarify distinction between JSON objects and Python objects in json module docs * Revert change to JSON introduction * Clarify occurrences of "object literal" as JSON --- Doc/library/json.rst | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 26579ec6328860..12a5a96a3c56f3 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -18,12 +18,17 @@ is a lightweight data interchange format inspired by `JavaScript `_ object literal syntax (although it is not a strict subset of JavaScript [#rfc-errata]_ ). +.. note:: + The term "object" in the context of JSON processing in Python can be + ambiguous. All values in Python are objects. In JSON, an object refers to + any data wrapped in curly braces, similar to a Python dictionary. + .. warning:: Be cautious when parsing JSON data from untrusted sources. A malicious JSON string may cause the decoder to consume considerable CPU and memory resources. Limiting the size of data to be parsed is recommended. -:mod:`json` exposes an API familiar to users of the standard library +This module exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. Encoding basic Python object hierarchies:: @@ -60,7 +65,7 @@ Pretty printing:: "6": 7 } -Specializing JSON object encoding:: +Customizing JSON object encoding:: >>> import json >>> def custom_json(obj): @@ -83,7 +88,7 @@ Decoding JSON:: >>> json.load(io) ['streaming API'] -Specializing JSON object decoding:: +Customizing JSON object decoding:: >>> import json >>> def as_complex(dct): @@ -279,7 +284,7 @@ Basic Usage :param object_hook: If set, a function that is called with the result of - any object literal decoded (a :class:`dict`). + any JSON object literal decoded (a :class:`dict`). The return value of this function will be used instead of the :class:`dict`. This feature can be used to implement custom decoders, @@ -289,7 +294,7 @@ Basic Usage :param object_pairs_hook: If set, a function that is called with the result of - any object literal decoded with an ordered list of pairs. + any JSON object literal decoded with an ordered list of pairs. The return value of this function will be used instead of the :class:`dict`. This feature can be used to implement custom decoders. From 4e9005d32ff466925f40af410f2ea6bf2329bcf8 Mon Sep 17 00:00:00 2001 From: Nico-Posada <102486290+Nico-Posada@users.noreply.github.com> Date: Sun, 18 May 2025 03:11:38 -0400 Subject: [PATCH 180/989] gh-134100: Fix use-after-free in `PyImport_ImportModuleLevelObject` (#134117) --- .../import_/test_relative_imports.py | 15 +++++++++++++++ ...2025-05-16-17-25-52.gh-issue-134100.5-FbLK.rst | 2 ++ Python/import.c | 4 +++- 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-17-25-52.gh-issue-134100.5-FbLK.rst diff --git a/Lib/test/test_importlib/import_/test_relative_imports.py b/Lib/test/test_importlib/import_/test_relative_imports.py index e535d119763148..1549cbe96ce2d1 100644 --- a/Lib/test/test_importlib/import_/test_relative_imports.py +++ b/Lib/test/test_importlib/import_/test_relative_imports.py @@ -223,6 +223,21 @@ def test_relative_import_no_package_exists_absolute(self): self.__import__('sys', {'__package__': '', '__spec__': None}, level=1) + def test_malicious_relative_import(self): + # https://github.com/python/cpython/issues/134100 + # Test to make sure UAF bug with error msg doesn't come back to life + import sys + loooong = "".ljust(0x23000, "b") + name = f"a.{loooong}.c" + + with util.uncache(name): + sys.modules[name] = {} + with self.assertRaisesRegex( + KeyError, + r"'a\.b+' not in sys\.modules as expected" + ): + __import__(f"{loooong}.c", {"__package__": "a"}, level=1) + (Frozen_RelativeImports, Source_RelativeImports diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-17-25-52.gh-issue-134100.5-FbLK.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-17-25-52.gh-issue-134100.5-FbLK.rst new file mode 100644 index 00000000000000..d672347f9ad246 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-17-25-52.gh-issue-134100.5-FbLK.rst @@ -0,0 +1,2 @@ +Fix a use-after-free bug that occurs when an imported module isn't +in :data:`sys.modules` after its initial import. Patch by Nico-Posada. diff --git a/Python/import.c b/Python/import.c index 9dec0f488a3b90..e7be1b90751a6c 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3854,15 +3854,17 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals, } final_mod = import_get_module(tstate, to_return); - Py_DECREF(to_return); if (final_mod == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_Format(tstate, PyExc_KeyError, "%R not in sys.modules as expected", to_return); } + Py_DECREF(to_return); goto error; } + + Py_DECREF(to_return); } } else { From 22e4a40d9089dde2f1578c4b320f27b20e2e125b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 18 May 2025 10:10:54 +0200 Subject: [PATCH 181/989] gh-134082: modernize `string.Formatter` class docstring (#134125) fixup Formatter class docstring --- Lib/string/__init__.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/Lib/string/__init__.py b/Lib/string/__init__.py index b7782e042b94eb..b788d7136f1ae3 100644 --- a/Lib/string/__init__.py +++ b/Lib/string/__init__.py @@ -191,16 +191,14 @@ def get_identifiers(self): ######################################################################## -# the Formatter class -# see PEP 3101 for details and purpose of this class - -# The hard parts are reused from the C implementation. They're exposed as "_" -# prefixed methods of str. - +# The Formatter class (PEP 3101). +# # The overall parser is implemented in _string.formatter_parser. -# The field name parser is implemented in _string.formatter_field_name_split +# The field name parser is implemented in _string.formatter_field_name_split. class Formatter: + """See PEP 3101 for details and purpose of this class.""" + def format(self, format_string, /, *args, **kwargs): return self.vformat(format_string, args, kwargs) @@ -293,7 +291,6 @@ def parse(self, format_string): Return an iterable that contains tuples of the form (literal_text, field_name, format_spec, conversion). - *field_name* can be None, in which case there's no object to format and output; otherwise, it is looked up and formatted with *format_spec* and *conversion*. From 0a160bf14c4848f50539e52e2de486c641d122a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 18 May 2025 10:16:10 +0200 Subject: [PATCH 182/989] gh-133157: remove usage of `_Py_NO_SANITIZE_UNDEFINED` in `faulthandler` (#134047) In `faulthandler_sigfpe()`, instead of using 1/0 arithmetic, we explicitly raise SIGFPE. We also remove `faulthandler._read_null()` since reading from NULL is an undefined behavior and `faulthandler` should not check for low-level C undefined behaviors. --- Lib/test/test_faulthandler.py | 23 ----------------------- Modules/faulthandler.c | 31 ++----------------------------- 2 files changed, 2 insertions(+), 52 deletions(-) diff --git a/Lib/test/test_faulthandler.py b/Lib/test/test_faulthandler.py index 371c63adce9412..2fb963f52e5138 100644 --- a/Lib/test/test_faulthandler.py +++ b/Lib/test/test_faulthandler.py @@ -166,29 +166,6 @@ def check_windows_exception(self, code, line_number, name_regex, **kw): fatal_error = 'Windows fatal exception: %s' % name_regex self.check_error(code, line_number, fatal_error, **kw) - @unittest.skipIf(sys.platform.startswith('aix'), - "the first page of memory is a mapped read-only on AIX") - def test_read_null(self): - if not MS_WINDOWS: - self.check_fatal_error(""" - import faulthandler - faulthandler.enable() - faulthandler._read_null() - """, - 3, - # Issue #12700: Read NULL raises SIGILL on Mac OS X Lion - '(?:Segmentation fault' - '|Bus error' - '|Illegal instruction)') - else: - self.check_windows_exception(""" - import faulthandler - faulthandler.enable() - faulthandler._read_null() - """, - 3, - 'access violation') - @skip_segfault_on_android def test_sigsegv(self): self.check_fatal_error(""" diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c index d49ce794d88674..c94f4f66366170 100644 --- a/Modules/faulthandler.c +++ b/Modules/faulthandler.c @@ -1069,18 +1069,6 @@ faulthandler_suppress_crash_report(void) #endif } -static PyObject* _Py_NO_SANITIZE_UNDEFINED -faulthandler_read_null(PyObject *self, PyObject *args) -{ - volatile int *x; - volatile int y; - - faulthandler_suppress_crash_report(); - x = NULL; - y = *x; - return PyLong_FromLong(y); - -} static void faulthandler_raise_sigsegv(void) @@ -1158,23 +1146,12 @@ faulthandler_fatal_error_c_thread(PyObject *self, PyObject *args) Py_RETURN_NONE; } -static PyObject* _Py_NO_SANITIZE_UNDEFINED +static PyObject* faulthandler_sigfpe(PyObject *self, PyObject *Py_UNUSED(dummy)) { faulthandler_suppress_crash_report(); - - /* Do an integer division by zero: raise a SIGFPE on Intel CPU, but not on - PowerPC. Use volatile to disable compile-time optimizations. */ - volatile int x = 1, y = 0, z; - z = x / y; - - /* If the division by zero didn't raise a SIGFPE (e.g. on PowerPC), - raise it manually. */ raise(SIGFPE); - - /* This line is never reached, but we pretend to make something with z - to silence a compiler warning. */ - return PyLong_FromLong(z); + Py_UNREACHABLE(); } static PyObject * @@ -1316,10 +1293,6 @@ static PyMethodDef module_methods[] = { "Unregister the handler of the signal " "'signum' registered by register().")}, #endif - {"_read_null", faulthandler_read_null, METH_NOARGS, - PyDoc_STR("_read_null($module, /)\n--\n\n" - "Read from NULL, raise " - "a SIGSEGV or SIGBUS signal depending on the platform.")}, {"_sigsegv", faulthandler_sigsegv, METH_VARARGS, PyDoc_STR("_sigsegv($module, release_gil=False, /)\n--\n\n" "Raise a SIGSEGV signal.")}, From f2de1e6861c27bd498f598efc01600450979b5f9 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sun, 18 May 2025 11:02:29 -0400 Subject: [PATCH 183/989] gh-134144: Fix use-after-free in zapthreads() (#134145) --- Lib/test/test_interpreters/test_api.py | 8 +++++ ...-05-17-14-41-21.gh-issue-134144.xVpZik.rst | 1 + Modules/_testinternalcapi.c | 29 ++++++++++++++++--- Python/pystate.c | 9 ++++-- 4 files changed, 41 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-05-17-14-41-21.gh-issue-134144.xVpZik.rst diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 66c7afce88f0d8..1e2d572b1cbb81 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -1452,6 +1452,14 @@ def test_destroy(self): self.assertFalse( self.interp_exists(interpid)) + with self.subTest('basic C-API'): + interpid = _testinternalcapi.create_interpreter() + self.assertTrue( + self.interp_exists(interpid)) + _testinternalcapi.destroy_interpreter(interpid, basic=True) + self.assertFalse( + self.interp_exists(interpid)) + def test_get_config(self): # This test overlaps with # test.test_capi.test_misc.InterpreterConfigTests. diff --git a/Misc/NEWS.d/next/C_API/2025-05-17-14-41-21.gh-issue-134144.xVpZik.rst b/Misc/NEWS.d/next/C_API/2025-05-17-14-41-21.gh-issue-134144.xVpZik.rst new file mode 100644 index 00000000000000..11c7bd59a4dd1d --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-05-17-14-41-21.gh-issue-134144.xVpZik.rst @@ -0,0 +1 @@ +Fix crash when calling :c:func:`Py_EndInterpreter` with a :term:`thread state` that isn't the initial thread for the interpreter. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 92f744c5a5fc70..76bd76cc6b2490 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1690,11 +1690,12 @@ create_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) static PyObject * destroy_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"id", NULL}; + static char *kwlist[] = {"id", "basic", NULL}; PyObject *idobj = NULL; + int basic = 0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "O:destroy_interpreter", kwlist, - &idobj)) + "O|p:destroy_interpreter", kwlist, + &idobj, &basic)) { return NULL; } @@ -1704,7 +1705,27 @@ destroy_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) return NULL; } - _PyXI_EndInterpreter(interp, NULL, NULL); + if (basic) + { + // Test the basic Py_EndInterpreter with weird out of order thread states + PyThreadState *t1, *t2; + PyThreadState *prev; + t1 = interp->threads.head; + if (t1 == NULL) { + t1 = PyThreadState_New(interp); + } + t2 = PyThreadState_New(interp); + prev = PyThreadState_Swap(t2); + PyThreadState_Clear(t1); + PyThreadState_Delete(t1); + Py_EndInterpreter(t2); + PyThreadState_Swap(prev); + } + else + { + // use the cross interpreter _PyXI_EndInterpreter normally + _PyXI_EndInterpreter(interp, NULL, NULL); + } Py_RETURN_NONE; } diff --git a/Python/pystate.c b/Python/pystate.c index 1ac134400856d4..14ae2748b0bc99 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1908,9 +1908,14 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) static void zapthreads(PyInterpreterState *interp) { + PyThreadState *tstate; /* No need to lock the mutex here because this should only happen - when the threads are all really dead (XXX famous last words). */ - _Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) { + when the threads are all really dead (XXX famous last words). + + Cannot use _Py_FOR_EACH_TSTATE_UNLOCKED because we are freeing + the thread states here. + */ + while ((tstate = interp->threads.head) != NULL) { tstate_verify_not_active(tstate); tstate_delete_common(tstate, 0); free_threadstate((_PyThreadStateImpl *)tstate); From 53da1e8c8ccbe3161ebc42e8b8b7ebd1ab70e05b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 18 May 2025 11:56:20 -0400 Subject: [PATCH 184/989] gh-134173: optimize state transfer between `concurrent.futures.Future` and `asyncio.Future` (#134174) Co-authored-by: Kumar Aditya --- Lib/asyncio/futures.py | 17 +++--- Lib/concurrent/futures/_base.py | 27 +++++++++ Lib/test/test_asyncio/test_futures.py | 58 +++++++++++++++++-- .../test_concurrent_futures/test_future.py | 57 ++++++++++++++++++ ...-05-18-07-25-15.gh-issue-134173.53oOoF.rst | 3 + 5 files changed, 148 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-18-07-25-15.gh-issue-134173.53oOoF.rst diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py index d1df6707302277..6bd00a644789f1 100644 --- a/Lib/asyncio/futures.py +++ b/Lib/asyncio/futures.py @@ -351,22 +351,19 @@ def _set_concurrent_future_state(concurrent, source): def _copy_future_state(source, dest): """Internal helper to copy state from another Future. - The other Future may be a concurrent.futures.Future. + The other Future must be a concurrent.futures.Future. """ - assert source.done() if dest.cancelled(): return assert not dest.done() - if source.cancelled(): + done, cancelled, result, exception = source._get_snapshot() + assert done + if cancelled: dest.cancel() + elif exception is not None: + dest.set_exception(_convert_future_exc(exception)) else: - exception = source.exception() - if exception is not None: - dest.set_exception(_convert_future_exc(exception)) - else: - result = source.result() - dest.set_result(result) - + dest.set_result(result) def _chain_future(source, destination): """Chain two futures so that when one completes, so does the other. diff --git a/Lib/concurrent/futures/_base.py b/Lib/concurrent/futures/_base.py index d98b1ebdd584b5..f506ce68aea5b2 100644 --- a/Lib/concurrent/futures/_base.py +++ b/Lib/concurrent/futures/_base.py @@ -558,6 +558,33 @@ def set_exception(self, exception): self._condition.notify_all() self._invoke_callbacks() + def _get_snapshot(self): + """Get a snapshot of the future's current state. + + This method atomically retrieves the state in one lock acquisition, + which is significantly faster than multiple method calls. + + Returns: + Tuple of (done, cancelled, result, exception) + - done: True if the future is done (cancelled or finished) + - cancelled: True if the future was cancelled + - result: The result if available and not cancelled + - exception: The exception if available and not cancelled + """ + # Fast path: check if already finished without lock + if self._state == FINISHED: + return True, False, self._result, self._exception + + # Need lock for other states since they can change + with self._condition: + # We have to check the state again after acquiring the lock + # because it may have changed in the meantime. + if self._state == FINISHED: + return True, False, self._result, self._exception + if self._state in {CANCELLED, CANCELLED_AND_NOTIFIED}: + return True, True, None, None + return False, False, None, None + __class_getitem__ = classmethod(types.GenericAlias) class Executor(object): diff --git a/Lib/test/test_asyncio/test_futures.py b/Lib/test/test_asyncio/test_futures.py index 8b51522278aaa6..39bef465bdb717 100644 --- a/Lib/test/test_asyncio/test_futures.py +++ b/Lib/test/test_asyncio/test_futures.py @@ -413,7 +413,7 @@ def func_repr(func): def test_copy_state(self): from asyncio.futures import _copy_future_state - f = self._new_future(loop=self.loop) + f = concurrent.futures.Future() f.set_result(10) newf = self._new_future(loop=self.loop) @@ -421,7 +421,7 @@ def test_copy_state(self): self.assertTrue(newf.done()) self.assertEqual(newf.result(), 10) - f_exception = self._new_future(loop=self.loop) + f_exception = concurrent.futures.Future() f_exception.set_exception(RuntimeError()) newf_exception = self._new_future(loop=self.loop) @@ -429,7 +429,7 @@ def test_copy_state(self): self.assertTrue(newf_exception.done()) self.assertRaises(RuntimeError, newf_exception.result) - f_cancelled = self._new_future(loop=self.loop) + f_cancelled = concurrent.futures.Future() f_cancelled.cancel() newf_cancelled = self._new_future(loop=self.loop) @@ -441,7 +441,7 @@ def test_copy_state(self): except BaseException as e: f_exc = e - f_conexc = self._new_future(loop=self.loop) + f_conexc = concurrent.futures.Future() f_conexc.set_exception(f_exc) newf_conexc = self._new_future(loop=self.loop) @@ -454,6 +454,56 @@ def test_copy_state(self): newf_tb = ''.join(traceback.format_tb(newf_exc.__traceback__)) self.assertEqual(newf_tb.count('raise concurrent.futures.InvalidStateError'), 1) + def test_copy_state_from_concurrent_futures(self): + """Test _copy_future_state from concurrent.futures.Future. + + This tests the optimized path using _get_snapshot when available. + """ + from asyncio.futures import _copy_future_state + + # Test with a result + f_concurrent = concurrent.futures.Future() + f_concurrent.set_result(42) + f_asyncio = self._new_future(loop=self.loop) + _copy_future_state(f_concurrent, f_asyncio) + self.assertTrue(f_asyncio.done()) + self.assertEqual(f_asyncio.result(), 42) + + # Test with an exception + f_concurrent_exc = concurrent.futures.Future() + f_concurrent_exc.set_exception(ValueError("test exception")) + f_asyncio_exc = self._new_future(loop=self.loop) + _copy_future_state(f_concurrent_exc, f_asyncio_exc) + self.assertTrue(f_asyncio_exc.done()) + with self.assertRaises(ValueError) as cm: + f_asyncio_exc.result() + self.assertEqual(str(cm.exception), "test exception") + + # Test with cancelled state + f_concurrent_cancelled = concurrent.futures.Future() + f_concurrent_cancelled.cancel() + f_asyncio_cancelled = self._new_future(loop=self.loop) + _copy_future_state(f_concurrent_cancelled, f_asyncio_cancelled) + self.assertTrue(f_asyncio_cancelled.cancelled()) + + # Test that destination already cancelled prevents copy + f_concurrent_result = concurrent.futures.Future() + f_concurrent_result.set_result(10) + f_asyncio_precancelled = self._new_future(loop=self.loop) + f_asyncio_precancelled.cancel() + _copy_future_state(f_concurrent_result, f_asyncio_precancelled) + self.assertTrue(f_asyncio_precancelled.cancelled()) + + # Test exception type conversion + f_concurrent_invalid = concurrent.futures.Future() + f_concurrent_invalid.set_exception(concurrent.futures.InvalidStateError("invalid")) + f_asyncio_invalid = self._new_future(loop=self.loop) + _copy_future_state(f_concurrent_invalid, f_asyncio_invalid) + self.assertTrue(f_asyncio_invalid.done()) + with self.assertRaises(asyncio.exceptions.InvalidStateError) as cm: + f_asyncio_invalid.result() + self.assertEqual(str(cm.exception), "invalid") + def test_iter(self): fut = self._new_future(loop=self.loop) diff --git a/Lib/test/test_concurrent_futures/test_future.py b/Lib/test/test_concurrent_futures/test_future.py index 4066ea1ee4b367..06b11a3bacf13a 100644 --- a/Lib/test/test_concurrent_futures/test_future.py +++ b/Lib/test/test_concurrent_futures/test_future.py @@ -6,6 +6,7 @@ PENDING, RUNNING, CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED, Future) from test import support +from test.support import threading_helper from .util import ( PENDING_FUTURE, RUNNING_FUTURE, CANCELLED_FUTURE, @@ -282,6 +283,62 @@ def test_multiple_set_exception(self): self.assertEqual(f.exception(), e) + def test_get_snapshot(self): + """Test the _get_snapshot method for atomic state retrieval.""" + # Test with a pending future + f = Future() + done, cancelled, result, exception = f._get_snapshot() + self.assertFalse(done) + self.assertFalse(cancelled) + self.assertIsNone(result) + self.assertIsNone(exception) + + # Test with a finished future (successful result) + f = Future() + f.set_result(42) + done, cancelled, result, exception = f._get_snapshot() + self.assertTrue(done) + self.assertFalse(cancelled) + self.assertEqual(result, 42) + self.assertIsNone(exception) + + # Test with a finished future (exception) + f = Future() + exc = ValueError("test error") + f.set_exception(exc) + done, cancelled, result, exception = f._get_snapshot() + self.assertTrue(done) + self.assertFalse(cancelled) + self.assertIsNone(result) + self.assertIs(exception, exc) + + # Test with a cancelled future + f = Future() + f.cancel() + done, cancelled, result, exception = f._get_snapshot() + self.assertTrue(done) + self.assertTrue(cancelled) + self.assertIsNone(result) + self.assertIsNone(exception) + + # Test concurrent access (basic thread safety check) + f = Future() + f.set_result(100) + results = [] + + def get_snapshot(): + for _ in range(1000): + snapshot = f._get_snapshot() + results.append(snapshot) + + threads = [threading.Thread(target=get_snapshot) for _ in range(4)] + with threading_helper.start_threads(threads): + pass + # All snapshots should be identical for a finished future + expected = (True, False, 100, None) + for result in results: + self.assertEqual(result, expected) + def setUpModule(): setup_module() diff --git a/Misc/NEWS.d/next/Library/2025-05-18-07-25-15.gh-issue-134173.53oOoF.rst b/Misc/NEWS.d/next/Library/2025-05-18-07-25-15.gh-issue-134173.53oOoF.rst new file mode 100644 index 00000000000000..57fba5e21a3e9a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-18-07-25-15.gh-issue-134173.53oOoF.rst @@ -0,0 +1,3 @@ +Speed up :mod:`asyncio` performance of transferring state from thread +pool :class:`concurrent.futures.Future` by up to 4.4x. Patch by J. Nick +Koston. From b1c33294ca5ef8d889f2ec87f9a8fa79741f9f7d Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sun, 18 May 2025 16:59:20 +0100 Subject: [PATCH 185/989] gh-134114: Clarify FAQ note about dictonary keys (#134118) --- Doc/faq/design.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Doc/faq/design.rst b/Doc/faq/design.rst index e2710fab9cf800..c758c019ca437b 100644 --- a/Doc/faq/design.rst +++ b/Doc/faq/design.rst @@ -420,10 +420,12 @@ strings representing the files in the current directory. Functions which operate on this output would generally not break if you added another file or two to the directory. -Tuples are immutable, meaning that once a tuple has been created, you can't -replace any of its elements with a new value. Lists are mutable, meaning that -you can always change a list's elements. Only immutable elements can be used as -dictionary keys, and hence only tuples and not lists can be used as keys. +Tuples are :term:`immutable`, meaning that once a tuple has been created, you can't +replace any of its elements with a new value. Lists are :term:`mutable`, meaning that +you can always change a list's elements. Only :term:`hashable` objects can +be used as dictionary keys. Most immutable types are hashable, which is why +tuples, but not lists, can be used as keys. Note, however, that a tuple is +only hashable if all of its elements are hashable. How are lists implemented in CPython? From 4ce91871a9077fcf8806d6fd79bdb32af9587306 Mon Sep 17 00:00:00 2001 From: Yongzi Li <1538321957@qq.com> Date: Mon, 19 May 2025 00:21:02 +0800 Subject: [PATCH 186/989] fix indent in `controlflow.rst` docs (#134008) --- Doc/tutorial/controlflow.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index 9a810f16388b9d..5c0e8f34bf82f4 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -1056,7 +1056,7 @@ Here is an example of a multi-line docstring:: >>> print(my_function.__doc__) Do nothing, but document it. - No, really, it doesn't do anything. + No, really, it doesn't do anything. .. _tut-annotations: From a1e2e6ca91fb7ce13af67254c55f02aab9c95679 Mon Sep 17 00:00:00 2001 From: Dave Jagoda Date: Sun, 18 May 2025 09:26:17 -0700 Subject: [PATCH 187/989] Fix example in Doc/howto/functional.rst (#133978) --- Doc/howto/functional.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index 1f0608fb0fc53f..b4f3463afee812 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -372,8 +372,8 @@ have the form:: for expr2 in sequence2 if condition2 for expr3 in sequence3 - ... if condition3 + ... for exprN in sequenceN if conditionN ) From 2cc99b3dd3db5e9433e839a876d3a8041221cec5 Mon Sep 17 00:00:00 2001 From: Nybblista <170842536+nybblista@users.noreply.github.com> Date: Sun, 18 May 2025 19:26:58 +0300 Subject: [PATCH 188/989] Docs: Fix the `_PyGenObject_HEAD` reference in the `InternalDocs/generators.md` (#133739) --- InternalDocs/generators.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/InternalDocs/generators.md b/InternalDocs/generators.md index 87fbb91236844b..979a5b51521e4e 100644 --- a/InternalDocs/generators.md +++ b/InternalDocs/generators.md @@ -28,9 +28,9 @@ interpreter state. The `frame` of a generator is embedded in the generator object struct as a [`_PyInterpreterFrame`](frames.md) (see `_PyGenObject_HEAD` in -[`pycore_genobject.h`](../Include/internal/pycore_genobject.h)). +[`pycore_interpframe_structs.h`](../Include/internal/pycore_interpframe_structs.h)). This means that we can get the frame from the generator or the generator -from the frame (see `_PyGen_GetGeneratorFromFrame` in the same file). +from the frame (see `_PyGen_GetGeneratorFromFrame` in [`pycore_genobject.h`](../Include/internal/pycore_genobject.h)). Other fields of the generator struct include metadata (such as the name of the generator function) and runtime state information (such as whether its frame is executing, suspended, cleared, etc.). From bb32f3c698f5192dddd2d2f33c7c0a3d05afc223 Mon Sep 17 00:00:00 2001 From: da-woods Date: Sun, 18 May 2025 17:28:43 +0100 Subject: [PATCH 189/989] document `Py_VISIT` as a macro in the docs (#133688) --- Doc/c-api/gcsupport.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/gcsupport.rst b/Doc/c-api/gcsupport.rst index d1f0982b818931..3e23605778f05a 100644 --- a/Doc/c-api/gcsupport.rst +++ b/Doc/c-api/gcsupport.rst @@ -180,9 +180,9 @@ provided. In order to use this macro, the :c:member:`~PyTypeObject.tp_traverse` must name its arguments exactly *visit* and *arg*: -.. c:function:: void Py_VISIT(PyObject *o) +.. c:macro:: Py_VISIT(o) - If *o* is not ``NULL``, call the *visit* callback, with arguments *o* + If the :c:expr:`PyObject *` *o* is not ``NULL``, call the *visit* callback, with arguments *o* and *arg*. If *visit* returns a non-zero value, then return it. Using this macro, :c:member:`~PyTypeObject.tp_traverse` handlers look like:: From 5cbc8c632e860941602e8f7da9aab52fae40aca6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 18 May 2025 21:09:51 +0300 Subject: [PATCH 190/989] gh-133889: Only show the path of the URL in the SimpleHTTPRequestHandler page (GH-134135) The query and fragment are ambiguous and not used. --- Lib/http/server.py | 7 +++++-- Lib/test/test_httpservers.py | 13 +++++++------ .../2025-05-17-12-40-12.gh-issue-133889.Eh-zO4.rst | 3 +++ 3 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-17-12-40-12.gh-issue-133889.Eh-zO4.rst diff --git a/Lib/http/server.py b/Lib/http/server.py index f42e9a375e479a..abf9f87a1fc711 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -818,11 +818,14 @@ def list_directory(self, path): return None list.sort(key=lambda a: a.lower()) r = [] + displaypath = self.path + displaypath = displaypath.split('#', 1)[0] + displaypath = displaypath.split('?', 1)[0] try: - displaypath = urllib.parse.unquote(self.path, + displaypath = urllib.parse.unquote(displaypath, errors='surrogatepass') except UnicodeDecodeError: - displaypath = urllib.parse.unquote(self.path) + displaypath = urllib.parse.unquote(displaypath) displaypath = html.escape(displaypath, quote=False) enc = sys.getfilesystemencoding() title = f'Directory listing for {displaypath}' diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 11c74a02bf2903..df5d2a7bedc4b2 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -627,13 +627,14 @@ def test_list_dir_escape_filename(self): self.check_list_dir_filename(filename) os_helper.unlink(os.path.join(self.tempdir, filename)) - def test_undecodable_parameter(self): - # sanity check using a valid parameter + def test_list_dir_with_query_and_fragment(self): + prefix = f'listing for {self.base_url}/', response) + self.assertIn(prefix + b'h1>', response) response = self.request(self.base_url + '/?x=123').read() - self.assertRegex(response, rf'listing for {self.base_url}/\?x=123'.encode('latin1')) - # now the bogus encoding - response = self.request(self.base_url + '/?x=%bb').read() - self.assertRegex(response, rf'listing for {self.base_url}/\?x=\xef\xbf\xbd'.encode('latin1')) + self.assertIn(prefix + b'title>', response) + self.assertIn(prefix + b'h1>', response) def test_get_dir_redirect_location_domain_injection_bug(self): """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. diff --git a/Misc/NEWS.d/next/Library/2025-05-17-12-40-12.gh-issue-133889.Eh-zO4.rst b/Misc/NEWS.d/next/Library/2025-05-17-12-40-12.gh-issue-133889.Eh-zO4.rst new file mode 100644 index 00000000000000..58b213e29f98d8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-17-12-40-12.gh-issue-133889.Eh-zO4.rst @@ -0,0 +1,3 @@ +The generated directory listing page in +:class:`http.server.SimpleHTTPRequestHandler` now only shows the decoded +path component of the requested URL, and not the query and fragment. From 9983c7d4416cac8deb2fded1ec9c7daf786c3a02 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 18 May 2025 22:21:06 +0300 Subject: [PATCH 191/989] gh-133890: Handle UnicodeEncodeError in tarfile (GH-134147) UnicodeEncodeError is now handled the same way as OSError during TarFile member extraction. --- Lib/tarfile.py | 4 +- Lib/test/test_tarfile.py | 49 +++++++++++++++++-- ...-05-17-18-08-35.gh-issue-133890.onn9_X.rst | 2 + 3 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-17-18-08-35.gh-issue-133890.onn9_X.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 13889d768021b1..212b71f6509740 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2439,7 +2439,7 @@ def _get_extract_tarinfo(self, member, filter_function, path): unfiltered = tarinfo try: tarinfo = filter_function(tarinfo, path) - except (OSError, FilterError) as e: + except (OSError, UnicodeEncodeError, FilterError) as e: self._handle_fatal_error(e) except ExtractError as e: self._handle_nonfatal_error(e) @@ -2460,7 +2460,7 @@ def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): self._extract_member(tarinfo, os.path.join(path, tarinfo.name), set_attrs=set_attrs, numeric_owner=numeric_owner) - except OSError as e: + except (OSError, UnicodeEncodeError) as e: self._handle_fatal_error(e) except ExtractError as e: self._handle_nonfatal_error(e) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2d9649237a9382..2018a20afd1b18 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -3490,11 +3490,12 @@ class ArchiveMaker: with t.open() as tar: ... # `tar` is now a TarFile with 'filename' in it! """ - def __init__(self): + def __init__(self, **kwargs): self.bio = io.BytesIO() + self.tar_kwargs = dict(kwargs) def __enter__(self): - self.tar_w = tarfile.TarFile(mode='w', fileobj=self.bio) + self.tar_w = tarfile.TarFile(mode='w', fileobj=self.bio, **self.tar_kwargs) return self def __exit__(self, *exc): @@ -4073,7 +4074,10 @@ def test_tar_filter(self): # that in the test archive.) with tarfile.TarFile.open(tarname) as tar: for tarinfo in tar.getmembers(): - filtered = tarfile.tar_filter(tarinfo, '') + try: + filtered = tarfile.tar_filter(tarinfo, '') + except UnicodeEncodeError: + continue self.assertIs(filtered.name, tarinfo.name) self.assertIs(filtered.type, tarinfo.type) @@ -4084,11 +4088,48 @@ def test_data_filter(self): for tarinfo in tar.getmembers(): try: filtered = tarfile.data_filter(tarinfo, '') - except tarfile.FilterError: + except (tarfile.FilterError, UnicodeEncodeError): continue self.assertIs(filtered.name, tarinfo.name) self.assertIs(filtered.type, tarinfo.type) + @unittest.skipIf(sys.platform == 'win32', 'requires native bytes paths') + def test_filter_unencodable(self): + # Sanity check using a valid path. + tarinfo = tarfile.TarInfo(os_helper.TESTFN) + filtered = tarfile.tar_filter(tarinfo, '') + self.assertIs(filtered.name, tarinfo.name) + filtered = tarfile.data_filter(tarinfo, '') + self.assertIs(filtered.name, tarinfo.name) + + tarinfo = tarfile.TarInfo('test\x00') + self.assertRaises(ValueError, tarfile.tar_filter, tarinfo, '') + self.assertRaises(ValueError, tarfile.data_filter, tarinfo, '') + tarinfo = tarfile.TarInfo('\ud800') + self.assertRaises(UnicodeEncodeError, tarfile.tar_filter, tarinfo, '') + self.assertRaises(UnicodeEncodeError, tarfile.data_filter, tarinfo, '') + + @unittest.skipIf(sys.platform == 'win32', 'requires native bytes paths') + def test_extract_unencodable(self): + # Create a member with name \xed\xa0\x80 which is UTF-8 encoded + # lone surrogate \ud800. + with ArchiveMaker(encoding='ascii', errors='surrogateescape') as arc: + arc.add('\udced\udca0\udc80') + with os_helper.temp_cwd() as tmp: + tar = arc.open(encoding='utf-8', errors='surrogatepass', + errorlevel=1) + self.assertEqual(tar.getnames(), ['\ud800']) + with self.assertRaises(UnicodeEncodeError): + tar.extractall() + self.assertEqual(os.listdir(), []) + + tar = arc.open(encoding='utf-8', errors='surrogatepass', + errorlevel=0, debug=1) + with support.captured_stderr() as stderr: + tar.extractall() + self.assertEqual(os.listdir(), []) + self.assertIn('tarfile: UnicodeEncodeError ', stderr.getvalue()) + def test_change_default_filter_on_instance(self): tar = tarfile.TarFile(tarname, 'r') def strict_filter(tarinfo, path): diff --git a/Misc/NEWS.d/next/Library/2025-05-17-18-08-35.gh-issue-133890.onn9_X.rst b/Misc/NEWS.d/next/Library/2025-05-17-18-08-35.gh-issue-133890.onn9_X.rst new file mode 100644 index 00000000000000..44565a5424e65b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-17-18-08-35.gh-issue-133890.onn9_X.rst @@ -0,0 +1,2 @@ +The :mod:`tarfile` module now handles :exc:`UnicodeEncodeError` in the same +way as :exc:`OSError` when cannot extract a member. From d6dc33ed8086fbd79f6adaeac4e329f29a13f834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 19 May 2025 11:26:14 +0200 Subject: [PATCH 192/989] gh-134087: enforce signature of `threading.RLock` (#134178) - Reject positional and keyword arguments in `_thread.RLock.__new__`. - Convert `_thread.lock.__new__` to AC. --- Doc/whatsnew/3.15.rst | 9 ++ Lib/test/lock_tests.py | 5 ++ Lib/test/test_threading.py | 6 +- Lib/threading.py | 13 +-- ...-05-18-12-23-07.gh-issue-134087.HilZWl.rst | 3 + Modules/_threadmodule.c | 82 ++++++++++--------- Modules/clinic/_threadmodule.c.h | 50 ++++++++++- 7 files changed, 115 insertions(+), 53 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-18-12-23-07.gh-issue-134087.HilZWl.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 987cf944972329..bf186c191b04d1 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -145,6 +145,15 @@ sysconfig (Contributed by Filipe Laíns in :gh:`92897`.) +threading +--------- + +* Remove support for arbitrary positional or keyword arguments in the C + implementation of :class:`~threading.RLock` objects. This was deprecated + in Python 3.14. + (Contributed by Bénédikt Tran in :gh:`134087`.) + + typing ------ diff --git a/Lib/test/lock_tests.py b/Lib/test/lock_tests.py index 009e04e9c0b522..d64b2b9fe28694 100644 --- a/Lib/test/lock_tests.py +++ b/Lib/test/lock_tests.py @@ -124,6 +124,11 @@ def test_constructor(self): lock = self.locktype() del lock + def test_constructor_noargs(self): + self.assertRaises(TypeError, self.locktype, 1) + self.assertRaises(TypeError, self.locktype, x=1) + self.assertRaises(TypeError, self.locktype, 1, x=2) + def test_repr(self): lock = self.locktype() self.assertRegex(repr(lock), "") diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index abe63c10c0ac7c..b6e2d419019aa1 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2137,8 +2137,7 @@ def test_signature(self): # gh-102029 ] for args, kwargs in arg_types: with self.subTest(args=args, kwargs=kwargs): - with self.assertWarns(DeprecationWarning): - threading.RLock(*args, **kwargs) + self.assertRaises(TypeError, threading.RLock, *args, **kwargs) # Subtypes with custom `__init__` are allowed (but, not recommended): class CustomRLock(self.locktype): @@ -2156,6 +2155,9 @@ class ConditionAsRLockTests(lock_tests.RLockTests): # Condition uses an RLock by default and exports its API. locktype = staticmethod(threading.Condition) + def test_constructor_noargs(self): + self.skipTest("Condition allows positional arguments") + def test_recursion_count(self): self.skipTest("Condition does not expose _recursion_count()") diff --git a/Lib/threading.py b/Lib/threading.py index 39a1a7f4cdfda0..9feada3b8bb15b 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -123,7 +123,7 @@ def gettrace(): Lock = _LockType -def RLock(*args, **kwargs): +def RLock(): """Factory function that returns a new reentrant lock. A reentrant lock must be released by the thread that acquired it. Once a @@ -132,16 +132,9 @@ def RLock(*args, **kwargs): acquired it. """ - if args or kwargs: - import warnings - warnings.warn( - 'Passing arguments to RLock is deprecated and will be removed in 3.15', - DeprecationWarning, - stacklevel=2, - ) if _CRLock is None: - return _PyRLock(*args, **kwargs) - return _CRLock(*args, **kwargs) + return _PyRLock() + return _CRLock() class _RLock: """This class implements reentrant lock objects. diff --git a/Misc/NEWS.d/next/Library/2025-05-18-12-23-07.gh-issue-134087.HilZWl.rst b/Misc/NEWS.d/next/Library/2025-05-18-12-23-07.gh-issue-134087.HilZWl.rst new file mode 100644 index 00000000000000..c4a05965f73ece --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-18-12-23-07.gh-issue-134087.HilZWl.rst @@ -0,0 +1,3 @@ +Remove support for arbitrary positional or keyword arguments in the C +implementation of :class:`threading.RLock` objects. This was deprecated +since Python 3.14. Patch by Bénédikt Tran. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 9776a32755db68..77286ed2a74669 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -19,8 +19,6 @@ # include // SIGINT #endif -#include "clinic/_threadmodule.c.h" - // ThreadError is just an alias to PyExc_RuntimeError #define ThreadError PyExc_RuntimeError @@ -31,6 +29,7 @@ static struct PyModuleDef thread_module; typedef struct { PyTypeObject *excepthook_type; PyTypeObject *lock_type; + PyTypeObject *rlock_type; PyTypeObject *local_type; PyTypeObject *local_dummy_type; PyTypeObject *thread_handle_type; @@ -48,6 +47,17 @@ get_thread_state(PyObject *module) return (thread_module_state *)state; } +static inline thread_module_state* +get_thread_state_by_cls(PyTypeObject *cls) +{ + // Use PyType_GetModuleByDef() to handle (R)Lock subclasses. + PyObject *module = PyType_GetModuleByDef(cls, &thread_module); + if (module == NULL) { + return NULL; + } + return get_thread_state(module); +} + #ifdef MS_WINDOWS typedef HRESULT (WINAPI *PF_GET_THREAD_DESCRIPTION)(HANDLE, PCWSTR*); @@ -59,9 +69,14 @@ static PF_SET_THREAD_DESCRIPTION pSetThreadDescription = NULL; /*[clinic input] module _thread +class _thread.lock "lockobject *" "clinic_state()->lock_type" +class _thread.RLock "rlockobject *" "clinic_state()->rlock_type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=be8dbe5cc4b16df7]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c5a0f8c492a0c263]*/ +#define clinic_state() get_thread_state_by_cls(type) +#include "clinic/_threadmodule.c.h" +#undef clinic_state // _ThreadHandle type @@ -916,25 +931,21 @@ lock__at_fork_reinit(PyObject *op, PyObject *Py_UNUSED(dummy)) } #endif /* HAVE_FORK */ -static lockobject *newlockobject(PyObject *module); +/*[clinic input] +@classmethod +_thread.lock.__new__ as lock_new +[clinic start generated code]*/ static PyObject * -lock_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +lock_new_impl(PyTypeObject *type) +/*[clinic end generated code: output=eab660d5a4c05c8a input=260208a4e277d250]*/ { - // convert to AC? - if (!_PyArg_NoKeywords("lock", kwargs)) { - goto error; - } - if (!_PyArg_CheckPositional("lock", PyTuple_GET_SIZE(args), 0, 0)) { - goto error; + lockobject *self = (lockobject *)type->tp_alloc(type, 0); + if (self == NULL) { + return NULL; } - - PyObject *module = PyType_GetModuleByDef(type, &thread_module); - assert(module != NULL); - return (PyObject *)newlockobject(module); - -error: - return NULL; + self->lock = (PyMutex){0}; + return (PyObject *)self; } @@ -1186,8 +1197,14 @@ PyDoc_STRVAR(rlock_is_owned_doc, \n\ For internal use by `threading.Condition`."); +/*[clinic input] +@classmethod +_thread.RLock.__new__ as rlock_new +[clinic start generated code]*/ + static PyObject * -rlock_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +rlock_new_impl(PyTypeObject *type) +/*[clinic end generated code: output=bb4fb1edf6818df5 input=013591361bf1ac6e]*/ { rlockobject *self = (rlockobject *) type->tp_alloc(type, 0); if (self == NULL) { @@ -1267,20 +1284,6 @@ static PyType_Spec rlock_type_spec = { .slots = rlock_type_slots, }; -static lockobject * -newlockobject(PyObject *module) -{ - thread_module_state *state = get_thread_state(module); - - PyTypeObject *type = state->lock_type; - lockobject *self = (lockobject *)type->tp_alloc(type, 0); - if (self == NULL) { - return NULL; - } - self->lock = (PyMutex){0}; - return self; -} - /* Thread-local objects */ /* Quick overview: @@ -2035,7 +2038,8 @@ Note: the default signal handler for SIGINT raises ``KeyboardInterrupt``." static PyObject * thread_PyThread_allocate_lock(PyObject *module, PyObject *Py_UNUSED(ignored)) { - return (PyObject *) newlockobject(module); + thread_module_state *state = get_thread_state(module); + return lock_new_impl(state->lock_type); } PyDoc_STRVAR(allocate_lock_doc, @@ -2645,15 +2649,13 @@ thread_module_exec(PyObject *module) } // RLock - PyTypeObject *rlock_type = (PyTypeObject *)PyType_FromSpec(&rlock_type_spec); - if (rlock_type == NULL) { + state->rlock_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &rlock_type_spec, NULL); + if (state->rlock_type == NULL) { return -1; } - if (PyModule_AddType(module, rlock_type) < 0) { - Py_DECREF(rlock_type); + if (PyModule_AddType(module, state->rlock_type) < 0) { return -1; } - Py_DECREF(rlock_type); // Local dummy state->local_dummy_type = (PyTypeObject *)PyType_FromSpec(&local_dummy_type_spec); @@ -2740,6 +2742,7 @@ thread_module_traverse(PyObject *module, visitproc visit, void *arg) thread_module_state *state = get_thread_state(module); Py_VISIT(state->excepthook_type); Py_VISIT(state->lock_type); + Py_VISIT(state->rlock_type); Py_VISIT(state->local_type); Py_VISIT(state->local_dummy_type); Py_VISIT(state->thread_handle_type); @@ -2752,6 +2755,7 @@ thread_module_clear(PyObject *module) thread_module_state *state = get_thread_state(module); Py_CLEAR(state->excepthook_type); Py_CLEAR(state->lock_type); + Py_CLEAR(state->rlock_type); Py_CLEAR(state->local_type); Py_CLEAR(state->local_dummy_type); Py_CLEAR(state->thread_handle_type); diff --git a/Modules/clinic/_threadmodule.c.h b/Modules/clinic/_threadmodule.c.h index 8930e54170caf4..fd8e32a2261d38 100644 --- a/Modules/clinic/_threadmodule.c.h +++ b/Modules/clinic/_threadmodule.c.h @@ -6,7 +6,53 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif -#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() +#include "pycore_modsupport.h" // _PyArg_NoKeywords() + +static PyObject * +lock_new_impl(PyTypeObject *type); + +static PyObject * +lock_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + PyTypeObject *base_tp = clinic_state()->lock_type; + + if ((type == base_tp || type->tp_init == base_tp->tp_init) && + !_PyArg_NoPositional("lock", args)) { + goto exit; + } + if ((type == base_tp || type->tp_init == base_tp->tp_init) && + !_PyArg_NoKeywords("lock", kwargs)) { + goto exit; + } + return_value = lock_new_impl(type); + +exit: + return return_value; +} + +static PyObject * +rlock_new_impl(PyTypeObject *type); + +static PyObject * +rlock_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + PyTypeObject *base_tp = clinic_state()->rlock_type; + + if ((type == base_tp || type->tp_init == base_tp->tp_init) && + !_PyArg_NoPositional("RLock", args)) { + goto exit; + } + if ((type == base_tp || type->tp_init == base_tp->tp_init) && + !_PyArg_NoKeywords("RLock", kwargs)) { + goto exit; + } + return_value = rlock_new_impl(type); + +exit: + return return_value; +} #if (defined(HAVE_PTHREAD_GETNAME_NP) || defined(HAVE_PTHREAD_GET_NAME_NP) || defined(MS_WINDOWS)) @@ -103,4 +149,4 @@ _thread_set_name(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb #ifndef _THREAD_SET_NAME_METHODDEF #define _THREAD_SET_NAME_METHODDEF #endif /* !defined(_THREAD_SET_NAME_METHODDEF) */ -/*[clinic end generated code: output=e978dc4615b9bc35 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b381ec5e313198e7 input=a9049054013a1b77]*/ From d55e11b8049e3abf3cc187b4958224b225a39897 Mon Sep 17 00:00:00 2001 From: BecoKo Date: Mon, 19 May 2025 12:33:15 +0300 Subject: [PATCH 193/989] gh-76023: Make os.path.realpath to ignore WinError 1005 in non-strict mode (GH-128328) --- Lib/ntpath.py | 3 ++- .../next/Windows/2025-05-19-03-02-04.gh-issue-76023.vHOf6M.rst | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Windows/2025-05-19-03-02-04.gh-issue-76023.vHOf6M.rst diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 5481bb8888ef59..52ff2af743af6c 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -661,9 +661,10 @@ def _getfinalpathname_nonstrict(path): # 87: ERROR_INVALID_PARAMETER # 123: ERROR_INVALID_NAME # 161: ERROR_BAD_PATHNAME + # 1005: ERROR_UNRECOGNIZED_VOLUME # 1920: ERROR_CANT_ACCESS_FILE # 1921: ERROR_CANT_RESOLVE_FILENAME (implies unfollowable symlink) - allowed_winerror = 1, 2, 3, 5, 21, 32, 50, 53, 65, 67, 87, 123, 161, 1920, 1921 + allowed_winerror = 1, 2, 3, 5, 21, 32, 50, 53, 65, 67, 87, 123, 161, 1005, 1920, 1921 # Non-strict algorithm is to find as much of the target directory # as we can and join the rest. diff --git a/Misc/NEWS.d/next/Windows/2025-05-19-03-02-04.gh-issue-76023.vHOf6M.rst b/Misc/NEWS.d/next/Windows/2025-05-19-03-02-04.gh-issue-76023.vHOf6M.rst new file mode 100644 index 00000000000000..958f4f4a4408f7 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2025-05-19-03-02-04.gh-issue-76023.vHOf6M.rst @@ -0,0 +1 @@ +Make :func:`os.path.realpath` ignore Windows error 1005 when in non-strict mode. From 986c3670285670558b6e6f77ff1507dfcc2c99fa Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 19 May 2025 11:35:22 +0100 Subject: [PATCH 194/989] gh-133779: Revert Windows generation of pyconfig.h and go back to a static header. (GH-133966) Extension builders must specify Py_GIL_DISABLED if they want to link to the free-threaded builds. This was usually the case already, but this change guarantees it in all circumstances. --- Doc/howto/free-threading-extensions.rst | 8 +++++ Doc/whatsnew/3.14.rst | 6 ++++ Include/Python.h | 13 ++++++++ Lib/sysconfig/__init__.py | 2 +- Lib/test/test_sysconfig.py | 5 +-- ...-05-13-13-25-27.gh-issue-133779.-YcTBz.rst | 6 ++++ PC/{pyconfig.h.in => pyconfig.h} | 17 ++++++---- PCbuild/_freeze_module.vcxproj | 27 +--------------- PCbuild/pyproject.props | 8 ++--- PCbuild/pythoncore.vcxproj | 31 ++----------------- PCbuild/regen.targets | 14 ++++----- Tools/msi/dev/dev_files.wxs | 2 +- Tools/peg_generator/pegen/build.py | 2 ++ 13 files changed, 63 insertions(+), 78 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2025-05-13-13-25-27.gh-issue-133779.-YcTBz.rst rename PC/{pyconfig.h.in => pyconfig.h} (97%) diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 3f6ee517050bd8..5a3970f15d52d6 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -23,6 +23,14 @@ You can use it to enable code that only runs under the free-threaded build:: /* code that only runs in the free-threaded build */ #endif +.. note:: + + On Windows, this macro is not defined automatically, but must be specified + to the compiler when building. The :func:`sysconfig.get_config_var` function + can be used to determine whether the current running interpreter had the + macro defined. + + Module Initialization ===================== diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 11361289874c9d..8f39b99e38e3a5 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -829,6 +829,12 @@ Kumar Aditya, Edgar Margffoy, and many others. Some of these contributors are employed by Meta, which has continued to provide significant engineering resources to support this project. +From 3.14, when compiling extension modules for the free-threaded build of +CPython on Windows, the preprocessor variable ``Py_GIL_DISABLED`` now needs to +be specified by the build backend, as it will no longer be determined +automatically by the C compiler. For a running interpreter, the setting that +was used at compile time can be found using :func:`sysconfig.get_config_var`. + .. _whatsnew314-pyrepl-highlighting: diff --git a/Include/Python.h b/Include/Python.h index 64be80145890a3..f34d581f0b4c91 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -59,6 +59,14 @@ # include // __readgsqword() #endif +// Suppress known warnings in Python header files. +#if defined(_MSC_VER) +// Warning that alignas behaviour has changed. Doesn't affect us, because we +// never relied on the old behaviour. +#pragma warning(push) +#pragma warning(disable: 5274) +#endif + // Include Python header files #include "pyport.h" #include "pymacro.h" @@ -138,4 +146,9 @@ #include "cpython/pyfpe.h" #include "cpython/tracemalloc.h" +// Restore warning filter +#ifdef _MSC_VER +#pragma warning(pop) +#endif + #endif /* !Py_PYTHON_H */ diff --git a/Lib/sysconfig/__init__.py b/Lib/sysconfig/__init__.py index 68890b45b82008..49e0986517ce97 100644 --- a/Lib/sysconfig/__init__.py +++ b/Lib/sysconfig/__init__.py @@ -457,7 +457,7 @@ def get_config_h_filename(): """Return the path of pyconfig.h.""" if _PYTHON_BUILD: if os.name == "nt": - inc_dir = os.path.dirname(sys._base_executable) + inc_dir = os.path.join(_PROJECT_BASE, 'PC') else: inc_dir = _PROJECT_BASE else: diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index 53e55383bf9c72..963cf753ce6178 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -531,13 +531,10 @@ def test_srcdir(self): Python_h = os.path.join(srcdir, 'Include', 'Python.h') self.assertTrue(os.path.exists(Python_h), Python_h) # /PC/pyconfig.h.in always exists even if unused - pyconfig_h = os.path.join(srcdir, 'PC', 'pyconfig.h.in') - self.assertTrue(os.path.exists(pyconfig_h), pyconfig_h) pyconfig_h_in = os.path.join(srcdir, 'pyconfig.h.in') self.assertTrue(os.path.exists(pyconfig_h_in), pyconfig_h_in) if os.name == 'nt': - # /pyconfig.h exists on Windows in a build tree - pyconfig_h = os.path.join(sys.executable, '..', 'pyconfig.h') + pyconfig_h = os.path.join(srcdir, 'PC', 'pyconfig.h') self.assertTrue(os.path.exists(pyconfig_h), pyconfig_h) elif os.name == 'posix': makefile_dir = os.path.dirname(sysconfig.get_makefile_filename()) diff --git a/Misc/NEWS.d/next/Windows/2025-05-13-13-25-27.gh-issue-133779.-YcTBz.rst b/Misc/NEWS.d/next/Windows/2025-05-13-13-25-27.gh-issue-133779.-YcTBz.rst new file mode 100644 index 00000000000000..550600d5eebf11 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2025-05-13-13-25-27.gh-issue-133779.-YcTBz.rst @@ -0,0 +1,6 @@ +Reverts the change to generate different :file:`pyconfig.h` files based on +compiler settings, as it was frequently causing extension builds to break. +In particular, the ``Py_GIL_DISABLED`` preprocessor variable must now always +be defined explicitly when compiling for the experimental free-threaded +runtime. The :func:`sysconfig.get_config_var` function can be used to +determine whether the current runtime was compiled with that flag or not. diff --git a/PC/pyconfig.h.in b/PC/pyconfig.h similarity index 97% rename from PC/pyconfig.h.in rename to PC/pyconfig.h index 1d659e7cee682b..0e8379387cd025 100644 --- a/PC/pyconfig.h.in +++ b/PC/pyconfig.h @@ -99,12 +99,17 @@ WIN32 is still required for the locale module. # define Py_DEBUG 1 #endif -/* Define to 1 if you want to disable the GIL */ -/* Uncomment the definition for free-threaded builds, or define it manually - * when compiling extension modules. Note that we test with #ifdef, so - * defining as 0 will still disable the GIL. */ -#ifndef Py_GIL_DISABLED -/* #define Py_GIL_DISABLED 1 */ +/* Define to 1 when compiling for experimental free-threaded builds */ +#ifdef Py_GIL_DISABLED +/* We undefine if it was set to zero because all later checks are #ifdef. + * Note that non-Windows builds do not do this, and so every effort should + * be made to avoid defining the variable at all when not desired. However, + * sysconfig.get_config_var always returns a 1 or a 0, and so it seems likely + * that a build backend will define it with the value. + */ +#if Py_GIL_DISABLED == 0 +#undef Py_GIL_DISABLED +#endif #endif /* Compiler specific defines */ diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 8c161835af9a8c..efff6a58d895cb 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -276,7 +276,7 @@ - + @@ -436,31 +436,6 @@ - - - - - - - - - - @(PyConfigH->'%(FullPath)', ';') - $([System.IO.File]::ReadAllText($(PyConfigH))) - $([System.IO.File]::ReadAllText('$(IntDir)pyconfig.h')) - - - $(PyConfigHText.Replace('/* #define Py_GIL_DISABLED 1 */', '#define Py_GIL_DISABLED 1')) - - - - - diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index 4e414dc913b9d5..7272542e13a5ca 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -10,10 +10,9 @@ $(MSBuildThisFileDirectory)obj\ $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\$(ProjectName)\ $(IntDir.Replace(`\\`, `\`)) - - $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\pythoncore\ $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_frozen\ $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\zlib-ng\ + $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_$(Configuration) $(ProjectName) $(TargetName)$(PyDebugExt) false @@ -49,11 +48,12 @@ <_PlatformPreprocessorDefinition Condition="$(Platform) == 'x64'">_WIN64; <_PlatformPreprocessorDefinition Condition="$(Platform) == 'x64' and $(PlatformToolset) != 'ClangCL'">_M_X64;$(_PlatformPreprocessorDefinition) <_Py3NamePreprocessorDefinition>PY3_DLLNAME=L"$(Py3DllName)$(PyDebugExt)"; + <_FreeThreadedPreprocessorDefinition Condition="$(DisableGil) == 'true'">Py_GIL_DISABLED=1; - $(PySourcePath)Include;$(PySourcePath)Include\internal;$(PySourcePath)Include\internal\mimalloc;$(GeneratedPyConfigDir);$(PySourcePath)PC;%(AdditionalIncludeDirectories) - WIN32;$(_Py3NamePreprocessorDefinition);$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)%(PreprocessorDefinitions) + $(PySourcePath)Include;$(PySourcePath)Include\internal;$(PySourcePath)Include\internal\mimalloc;$(PySourcePath)PC;%(AdditionalIncludeDirectories) + WIN32;$(_Py3NamePreprocessorDefinition)$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)$(_FreeThreadedPreprocessorDefinition)%(PreprocessorDefinitions) _Py_USING_PGO=1;%(PreprocessorDefinitions) MaxSpeed diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 549d6284972afc..32a8f2dbad3d5e 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -102,6 +102,7 @@ /Zm200 %(AdditionalOptions) $(PySourcePath)Modules\_hacl;$(PySourcePath)Modules\_hacl\include;$(PySourcePath)Python;%(AdditionalIncludeDirectories) $(zlibNgDir);$(GeneratedZlibNgDir);%(AdditionalIncludeDirectories) + $(GeneratedJitStencilsDir);%(AdditionalIncludeDirectories) _USRDLL;Py_BUILD_CORE;Py_BUILD_CORE_BUILTIN;Py_ENABLE_SHARED;MS_DLL_ID="$(SysWinVer)";ZLIB_COMPAT;%(PreprocessorDefinitions) _Py_HAVE_ZLIB;%(PreprocessorDefinitions) _Py_JIT;%(PreprocessorDefinitions) @@ -409,7 +410,7 @@ - + @@ -688,34 +689,6 @@ - - - - - - - - - @(PyConfigH->'%(FullPath)', ';') - $([System.IO.File]::ReadAllText($(PyConfigH))) - $([System.IO.File]::ReadAllText('$(IntDir)pyconfig.h')) - - - $(PyConfigHText.Replace('/* #define Py_GIL_DISABLED 1 */', '#define Py_GIL_DISABLED 1')) - - - - - - - - - - - git diff --git a/PCbuild/regen.targets b/PCbuild/regen.targets index 3ad17737807235..21de614e71ddce 100644 --- a/PCbuild/regen.targets +++ b/PCbuild/regen.targets @@ -29,12 +29,12 @@ <_KeywordSources Include="$(PySourcePath)Grammar\python.gram;$(PySourcePath)Grammar\Tokens" /> <_KeywordOutputs Include="$(PySourcePath)Lib\keyword.py" /> - <_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(GeneratedPyConfigDir)pyconfig.h;$(PySourcePath)Tools\jit\**"/> + <_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(PySourcePath)PC\pyconfig.h;$(PySourcePath)Tools\jit\**"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils.h"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils-aarch64-pc-windows-msvc.h" Condition="$(Platform) == 'ARM64'"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils-i686-pc-windows-msvc.h" Condition="$(Platform) == 'Win32'"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils-x86_64-pc-windows-msvc.h" Condition="$(Platform) == 'x64'"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils.h"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-aarch64-pc-windows-msvc.h" Condition="$(Platform) == 'ARM64'"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-i686-pc-windows-msvc.h" Condition="$(Platform) == 'Win32'"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-x86_64-pc-windows-msvc.h" Condition="$(Platform) == 'x64'"/> <_CasesSources Include="$(PySourcePath)Python\bytecodes.c;$(PySourcePath)Python\optimizer_bytecodes.c;"/> <_CasesOutputs Include="$(PySourcePath)Python\generated_cases.c.h;$(PySourcePath)Include\opcode_ids.h;$(PySourcePath)Include\internal\pycore_uop_ids.h;$(PySourcePath)Python\opcode_targets.h;$(PySourcePath)Include\internal\pycore_opcode_metadata.h;$(PySourcePath)Include\internal\pycore_uop_metadata.h;$(PySourcePath)Python\optimizer_cases.c.h;$(PySourcePath)Lib\_opcode_metadata.py"/> <_SbomSources Include="$(PySourcePath)PCbuild\get_externals.bat" /> @@ -116,7 +116,7 @@ @@ -126,7 +126,7 @@ $(JITArgs) --debug + WorkingDirectory="$(GeneratedJitStencilsDir)"/> diff --git a/Tools/msi/dev/dev_files.wxs b/Tools/msi/dev/dev_files.wxs index 4357dc86d9d356..21f9c848cc6be5 100644 --- a/Tools/msi/dev/dev_files.wxs +++ b/Tools/msi/dev/dev_files.wxs @@ -3,7 +3,7 @@ - + diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py index 41338c29bdd9eb..be289c352de585 100644 --- a/Tools/peg_generator/pegen/build.py +++ b/Tools/peg_generator/pegen/build.py @@ -108,6 +108,8 @@ def compile_c_extension( extra_compile_args.append("-DPy_BUILD_CORE_MODULE") # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c extra_compile_args.append("-D_Py_TEST_PEGEN") + if sys.platform == "win32" and sysconfig.get_config_var("Py_GIL_DISABLED"): + extra_compile_args.append("-DPy_GIL_DISABLED") extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") if keep_asserts: extra_compile_args.append("-UNDEBUG") From 605022aeb69ae19cae1c020a6993ab5c433ce907 Mon Sep 17 00:00:00 2001 From: ggqlq <124190229+ggqlq@users.noreply.github.com> Date: Mon, 19 May 2025 20:15:04 +0800 Subject: [PATCH 195/989] gh-131178: Add tests for `http.server` command-line interface (#132540) --- Lib/http/server.py | 8 +- Lib/test/test_httpservers.py | 254 +++++++++++++++++++++++++++++++++++ 2 files changed, 260 insertions(+), 2 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index abf9f87a1fc711..f6d1b998f4201e 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -1000,7 +1000,7 @@ def test(HandlerClass=BaseHTTPRequestHandler, sys.exit(0) -if __name__ == '__main__': +def _main(args=None): import argparse import contextlib @@ -1024,7 +1024,7 @@ def test(HandlerClass=BaseHTTPRequestHandler, parser.add_argument('port', default=8000, type=int, nargs='?', help='bind to this port ' '(default: %(default)s)') - args = parser.parse_args() + args = parser.parse_args(args) if not args.tls_cert and args.tls_key: parser.error("--tls-key requires --tls-cert to be set") @@ -1064,3 +1064,7 @@ def finish_request(self, request, client_address): tls_key=args.tls_key, tls_password=tls_key_password, ) + + +if __name__ == '__main__': + _main() diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index df5d2a7bedc4b2..429d9005bd7605 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -8,6 +8,7 @@ SimpleHTTPRequestHandler from http import server, HTTPStatus +import contextlib import os import socket import sys @@ -20,6 +21,7 @@ import html import http, http.client import urllib.parse +import urllib.request import tempfile import time import datetime @@ -32,6 +34,8 @@ from test.support import ( is_apple, import_helper, os_helper, threading_helper ) +from test.support.script_helper import kill_python, spawn_python +from test.support.socket_helper import find_unused_port try: import ssl @@ -1281,6 +1285,256 @@ def test_server_test_ipv4(self, _): self.assertEqual(mock_server.address_family, socket.AF_INET) +class CommandLineTestCase(unittest.TestCase): + default_port = 8000 + default_bind = None + default_protocol = 'HTTP/1.0' + default_handler = SimpleHTTPRequestHandler + default_server = unittest.mock.ANY + tls_cert = certdata_file('ssl_cert.pem') + tls_key = certdata_file('ssl_key.pem') + tls_password = 'somepass' + tls_cert_options = ['--tls-cert'] + tls_key_options = ['--tls-key'] + tls_password_options = ['--tls-password-file'] + args = { + 'HandlerClass': default_handler, + 'ServerClass': default_server, + 'protocol': default_protocol, + 'port': default_port, + 'bind': default_bind, + 'tls_cert': None, + 'tls_key': None, + 'tls_password': None, + } + + def setUp(self): + super().setUp() + self.tls_password_file = tempfile.mktemp() + with open(self.tls_password_file, 'wb') as f: + f.write(self.tls_password.encode()) + self.addCleanup(os_helper.unlink, self.tls_password_file) + + def invoke_httpd(self, *args, stdout=None, stderr=None): + stdout = StringIO() if stdout is None else stdout + stderr = StringIO() if stderr is None else stderr + with contextlib.redirect_stdout(stdout), \ + contextlib.redirect_stderr(stderr): + server._main(args) + return stdout.getvalue(), stderr.getvalue() + + @mock.patch('http.server.test') + def test_port_flag(self, mock_func): + ports = [8000, 65535] + for port in ports: + with self.subTest(port=port): + self.invoke_httpd(str(port)) + call_args = self.args | dict(port=port) + mock_func.assert_called_once_with(**call_args) + mock_func.reset_mock() + + @mock.patch('http.server.test') + def test_directory_flag(self, mock_func): + options = ['-d', '--directory'] + directories = ['.', '/foo', '\\bar', '/', + 'C:\\', 'C:\\foo', 'C:\\bar', + '/home/user', './foo/foo2', 'D:\\foo\\bar'] + for flag in options: + for directory in directories: + with self.subTest(flag=flag, directory=directory): + self.invoke_httpd(flag, directory) + mock_func.assert_called_once_with(**self.args) + mock_func.reset_mock() + + @mock.patch('http.server.test') + def test_bind_flag(self, mock_func): + options = ['-b', '--bind'] + bind_addresses = ['localhost', '127.0.0.1', '::1', + '0.0.0.0', '8.8.8.8'] + for flag in options: + for bind_address in bind_addresses: + with self.subTest(flag=flag, bind_address=bind_address): + self.invoke_httpd(flag, bind_address) + call_args = self.args | dict(bind=bind_address) + mock_func.assert_called_once_with(**call_args) + mock_func.reset_mock() + + @mock.patch('http.server.test') + def test_protocol_flag(self, mock_func): + options = ['-p', '--protocol'] + protocols = ['HTTP/1.0', 'HTTP/1.1', 'HTTP/2.0', 'HTTP/3.0'] + for flag in options: + for protocol in protocols: + with self.subTest(flag=flag, protocol=protocol): + self.invoke_httpd(flag, protocol) + call_args = self.args | dict(protocol=protocol) + mock_func.assert_called_once_with(**call_args) + mock_func.reset_mock() + + @unittest.skipIf(ssl is None, "requires ssl") + @mock.patch('http.server.test') + def test_tls_cert_and_key_flags(self, mock_func): + for tls_cert_option in self.tls_cert_options: + for tls_key_option in self.tls_key_options: + self.invoke_httpd(tls_cert_option, self.tls_cert, + tls_key_option, self.tls_key) + call_args = self.args | { + 'tls_cert': self.tls_cert, + 'tls_key': self.tls_key, + } + mock_func.assert_called_once_with(**call_args) + mock_func.reset_mock() + + @unittest.skipIf(ssl is None, "requires ssl") + @mock.patch('http.server.test') + def test_tls_cert_and_key_and_password_flags(self, mock_func): + for tls_cert_option in self.tls_cert_options: + for tls_key_option in self.tls_key_options: + for tls_password_option in self.tls_password_options: + self.invoke_httpd(tls_cert_option, + self.tls_cert, + tls_key_option, + self.tls_key, + tls_password_option, + self.tls_password_file) + call_args = self.args | { + 'tls_cert': self.tls_cert, + 'tls_key': self.tls_key, + 'tls_password': self.tls_password, + } + mock_func.assert_called_once_with(**call_args) + mock_func.reset_mock() + + @unittest.skipIf(ssl is None, "requires ssl") + @mock.patch('http.server.test') + def test_missing_tls_cert_flag(self, mock_func): + for tls_key_option in self.tls_key_options: + with self.assertRaises(SystemExit): + self.invoke_httpd(tls_key_option, self.tls_key) + mock_func.reset_mock() + + for tls_password_option in self.tls_password_options: + with self.assertRaises(SystemExit): + self.invoke_httpd(tls_password_option, self.tls_password) + mock_func.reset_mock() + + @unittest.skipIf(ssl is None, "requires ssl") + @mock.patch('http.server.test') + def test_invalid_password_file(self, mock_func): + non_existent_file = 'non_existent_file' + for tls_password_option in self.tls_password_options: + for tls_cert_option in self.tls_cert_options: + with self.assertRaises(SystemExit): + self.invoke_httpd(tls_cert_option, + self.tls_cert, + tls_password_option, + non_existent_file) + + @mock.patch('http.server.test') + def test_no_arguments(self, mock_func): + self.invoke_httpd() + mock_func.assert_called_once_with(**self.args) + mock_func.reset_mock() + + @mock.patch('http.server.test') + def test_help_flag(self, _): + options = ['-h', '--help'] + for option in options: + stdout, stderr = StringIO(), StringIO() + with self.assertRaises(SystemExit): + self.invoke_httpd(option, stdout=stdout, stderr=stderr) + self.assertIn('usage', stdout.getvalue()) + self.assertEqual(stderr.getvalue(), '') + + @mock.patch('http.server.test') + def test_unknown_flag(self, _): + stdout, stderr = StringIO(), StringIO() + with self.assertRaises(SystemExit): + self.invoke_httpd('--unknown-flag', stdout=stdout, stderr=stderr) + self.assertEqual(stdout.getvalue(), '') + self.assertIn('error', stderr.getvalue()) + + +class CommandLineRunTimeTestCase(unittest.TestCase): + served_data = os.urandom(32) + served_file_name = 'served_filename' + tls_cert = certdata_file('ssl_cert.pem') + tls_key = certdata_file('ssl_key.pem') + tls_password = 'somepass' + + def setUp(self): + super().setUp() + with open(self.served_file_name, 'wb') as f: + f.write(self.served_data) + self.addCleanup(os_helper.unlink, self.served_file_name) + self.tls_password_file = tempfile.mktemp() + with open(self.tls_password_file, 'wb') as f: + f.write(self.tls_password.encode()) + self.addCleanup(os_helper.unlink, self.tls_password_file) + + def fetch_file(self, path): + context = ssl.create_default_context() + # allow self-signed certificates + context.check_hostname = False + context.verify_mode = ssl.CERT_NONE + req = urllib.request.Request(path, method='GET') + with urllib.request.urlopen(req, context=context) as res: + return res.read() + + def parse_cli_output(self, output): + matches = re.search(r'\((https?)://([^/:]+):(\d+)/?\)', output) + if matches is None: + return None, None, None + return matches.group(1), matches.group(2), int(matches.group(3)) + + def wait_for_server(self, proc, protocol, port, bind, timeout=50): + """Check the server process output. + + Return True if the server was successfully started + and is listening on the given port and bind address. + """ + while timeout > 0: + line = proc.stdout.readline() + if not line: + time.sleep(0.1) + timeout -= 1 + continue + protocol_, host_, port_ = self.parse_cli_output(line) + if not protocol_ or not host_ or not port_: + time.sleep(0.1) + timeout -= 1 + continue + if protocol_ == protocol and host_ == bind and port_ == port: + return True + break + return False + + def test_http_client(self): + port = find_unused_port() + bind = '127.0.0.1' + proc = spawn_python('-u', '-m', 'http.server', str(port), '-b', bind, + bufsize=1, text=True) + self.addCleanup(kill_python, proc) + self.addCleanup(proc.terminate) + self.assertTrue(self.wait_for_server(proc, 'http', port, bind)) + res = self.fetch_file(f'http://{bind}:{port}/{self.served_file_name}') + self.assertEqual(res, self.served_data) + + def test_https_client(self): + port = find_unused_port() + bind = '127.0.0.1' + proc = spawn_python('-u', '-m', 'http.server', str(port), '-b', bind, + '--tls-cert', self.tls_cert, + '--tls-key', self.tls_key, + '--tls-password-file', self.tls_password_file, + bufsize=1, text=True) + self.addCleanup(kill_python, proc) + self.addCleanup(proc.terminate) + self.assertTrue(self.wait_for_server(proc, 'https', port, bind)) + res = self.fetch_file(f'https://{bind}:{port}/{self.served_file_name}') + self.assertEqual(res, self.served_data) + + def setUpModule(): unittest.addModuleCleanup(os.chdir, os.getcwd()) From 44b73d3cd4466e148460883acf4494124eae8c91 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 19 May 2025 16:27:50 +0300 Subject: [PATCH 196/989] gh-122055: Clarify documentation for empty matches in RE (GH-133169) --- Doc/library/re.rst | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index eb3b1e5549cc98..75ebbf11c8e47c 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -991,8 +991,8 @@ Functions That way, separator components are always found at the same relative indices within the result list. - Empty matches for the pattern split the string only when not adjacent - to a previous empty match. + Adjacent empty matches are not possible, but an empty match can occur + immediately after a non-empty match. .. code:: pycon @@ -1095,9 +1095,12 @@ Functions The optional argument *count* is the maximum number of pattern occurrences to be replaced; *count* must be a non-negative integer. If omitted or zero, all - occurrences will be replaced. Empty matches for the pattern are replaced only - when not adjacent to a previous empty match, so ``sub('x*', '-', 'abxd')`` returns - ``'-a-b--d-'``. + occurrences will be replaced. + + Adjacent empty matches are not possible, but an empty match can occur + immediately after a non-empty match. + As a result, ``sub('x*', '-', 'abxd')`` returns ``'-a-b--d-'`` + instead of ``'-a-b-d-'``. .. index:: single: \g; in regular expressions @@ -1128,8 +1131,7 @@ Functions .. versionchanged:: 3.7 Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter now are errors. - Empty matches for the pattern are replaced when adjacent to a previous - non-empty match. + An empty match can occur immediately after a non-empty match. .. versionchanged:: 3.12 Group *id* can only contain ASCII digits. From c31547a5914db93b8b38c6a5261ef716255f3582 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 19 May 2025 14:30:43 +0100 Subject: [PATCH 197/989] gh-134097: Print number of refs & blocks after each statement in new REPL (gh-134136) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Langa --- Lib/_pyrepl/simple_interact.py | 9 +++++++++ Lib/test/test_pyrepl/test_pyrepl.py | 15 ++++++++++++++- ...2025-05-17-13-46-20.gh-issue-134097.fgkjE1.rst | 1 + 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-17-13-46-20.gh-issue-134097.fgkjE1.rst diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index b3848833e14208..c23894a34b8b53 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -110,6 +110,10 @@ def run_multiline_interactive_console( more_lines = functools.partial(_more_lines, console) input_n = 0 + _is_x_showrefcount_set = sys._xoptions.get("showrefcount") + _is_pydebug_build = hasattr(sys, "gettotalrefcount") + show_ref_count = _is_x_showrefcount_set and _is_pydebug_build + def maybe_run_command(statement: str) -> bool: statement = statement.strip() if statement in console.locals or statement not in REPL_COMMANDS: @@ -167,3 +171,8 @@ def maybe_run_command(statement: str) -> bool: except: console.showtraceback() console.resetbuffer() + if show_ref_count: + console.write( + f"[{sys.gettotalrefcount()} refs," + f" {sys.getallocatedblocks()} blocks]\n" + ) diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index fc8114891d12dd..f80755b3368f8f 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -10,7 +10,7 @@ import tempfile from unittest import TestCase, skipUnless, skipIf from unittest.mock import patch -from test.support import force_not_colorized, make_clean_env +from test.support import force_not_colorized, make_clean_env, Py_DEBUG from test.support import SHORT_TIMEOUT, STDLIB_DIR from test.support.import_helper import import_module from test.support.os_helper import EnvironmentVarGuard, unlink @@ -1610,3 +1610,16 @@ def test_prompt_after_help(self): # Extra stuff (newline and `exit` rewrites) are necessary # because of how run_repl works. self.assertNotIn(">>> \n>>> >>>", cleaned_output) + + @skipUnless(Py_DEBUG, '-X showrefcount requires a Python debug build') + def test_showrefcount(self): + env = os.environ.copy() + env.pop("PYTHON_BASIC_REPL", "") + output, _ = self.run_repl("1\n1+2\nexit()\n", cmdline_args=['-Xshowrefcount'], env=env) + matches = re.findall(r'\[-?\d+ refs, \d+ blocks\]', output) + self.assertEqual(len(matches), 3) + + env["PYTHON_BASIC_REPL"] = "1" + output, _ = self.run_repl("1\n1+2\nexit()\n", cmdline_args=['-Xshowrefcount'], env=env) + matches = re.findall(r'\[-?\d+ refs, \d+ blocks\]', output) + self.assertEqual(len(matches), 3) diff --git a/Misc/NEWS.d/next/Library/2025-05-17-13-46-20.gh-issue-134097.fgkjE1.rst b/Misc/NEWS.d/next/Library/2025-05-17-13-46-20.gh-issue-134097.fgkjE1.rst new file mode 100644 index 00000000000000..0b388d9db380f5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-17-13-46-20.gh-issue-134097.fgkjE1.rst @@ -0,0 +1 @@ +Fix interaction of the new :term:`REPL` and :option:`-X showrefcount <-X>` command line option. From ee36db550076e5a9185444ffbc53eaf8157ef04c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 19 May 2025 15:53:39 +0200 Subject: [PATCH 198/989] gh-125843: indicate which C function caused a `curses.error` (#125844) - Rename error helpers with a `curses_set_error_*` prefix instead of `PyCurses*`. - Cleanly report both NULL and ERR cases. - Raise `curses.error` in `is_linetouched` instead of a `TypeError`. --- Include/py_curses.h | 7 + Lib/test/test_curses.py | 3 + ...-10-22-16-21-55.gh-issue-125843.2ttzYo.rst | 2 + Modules/_cursesmodule.c | 535 ++++++++++++------ 4 files changed, 367 insertions(+), 180 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-22-16-21-55.gh-issue-125843.2ttzYo.rst diff --git a/Include/py_curses.h b/Include/py_curses.h index 49fc3c9d127aa6..0948aabedd4993 100644 --- a/Include/py_curses.h +++ b/Include/py_curses.h @@ -109,6 +109,13 @@ static void **PyCurses_API; static const char catchall_ERR[] = "curses function returned ERR"; static const char catchall_NULL[] = "curses function returned NULL"; +#if defined(CURSES_MODULE) || defined(CURSES_PANEL_MODULE) +/* Error messages shared by the curses package */ +# define CURSES_ERROR_FORMAT "%s() returned %s" +# define CURSES_ERROR_VERBOSE_FORMAT "%s() (called by %s()) returned %s" +# define CURSES_ERROR_MUST_CALL_FORMAT "must call %s() first" +#endif + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py index c307258e5657e2..feca82681deb7c 100644 --- a/Lib/test/test_curses.py +++ b/Lib/test/test_curses.py @@ -130,6 +130,9 @@ def test_use_env(self): curses.use_env(False) curses.use_env(True) + def test_error(self): + self.assertIsSubclass(curses.error, Exception) + def test_create_windows(self): win = curses.newwin(5, 10) self.assertEqual(win.getbegyx(), (0, 0)) diff --git a/Misc/NEWS.d/next/Library/2024-10-22-16-21-55.gh-issue-125843.2ttzYo.rst b/Misc/NEWS.d/next/Library/2024-10-22-16-21-55.gh-issue-125843.2ttzYo.rst new file mode 100644 index 00000000000000..ec8f3a750065ff --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-22-16-21-55.gh-issue-125843.2ttzYo.rst @@ -0,0 +1,2 @@ +If possible, indicate which :mod:`curses` C function or macro is responsible +for raising a :exc:`curses.error` exception. Patch by Bénédikt Tran. diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index cd185bc2b02ea5..ab63fdbe45de61 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -211,18 +211,116 @@ static int curses_start_color_called = FALSE; static const char *curses_screen_encoding = NULL; +/* Utility Error Procedures */ + +static void +_curses_format_error(cursesmodule_state *state, + const char *curses_funcname, + const char *python_funcname, + const char *return_value, + const char *default_message) +{ + assert(!PyErr_Occurred()); + if (python_funcname == NULL && curses_funcname == NULL) { + PyErr_SetString(state->error, default_message); + } + else if (python_funcname == NULL) { + (void)PyErr_Format(state->error, CURSES_ERROR_FORMAT, + curses_funcname, return_value); + } + else { + assert(python_funcname != NULL); + (void)PyErr_Format(state->error, CURSES_ERROR_VERBOSE_FORMAT, + curses_funcname, python_funcname, return_value); + } +} + +/* + * Format a curses error for a function that returned ERR. + * + * Specify a non-NULL 'python_funcname' when the latter differs from + * 'curses_funcname'. If both names are NULL, uses the 'catchall_ERR' + * message instead. + */ +static void +_curses_set_error(cursesmodule_state *state, + const char *curses_funcname, + const char *python_funcname) +{ + _curses_format_error(state, curses_funcname, python_funcname, + "ERR", catchall_ERR); +} + +/* + * Format a curses error for a function that returned NULL. + * + * Specify a non-NULL 'python_funcname' when the latter differs from + * 'curses_funcname'. If both names are NULL, uses the 'catchall_NULL' + * message instead. + */ +static inline void +_curses_set_null_error(cursesmodule_state *state, + const char *curses_funcname, + const char *python_funcname) +{ + _curses_format_error(state, curses_funcname, python_funcname, + "NULL", catchall_NULL); +} + +/* Same as _curses_set_error() for a module object. */ +static void +curses_set_error(PyObject *module, + const char *curses_funcname, + const char *python_funcname) +{ + cursesmodule_state *state = get_cursesmodule_state(module); + _curses_set_error(state, curses_funcname, python_funcname); +} + +/* Same as _curses_set_null_error() for a module object. */ +static void +curses_set_null_error(PyObject *module, + const char *curses_funcname, + const char *python_funcname) +{ + cursesmodule_state *state = get_cursesmodule_state(module); + _curses_set_null_error(state, curses_funcname, python_funcname); +} + +/* Same as _curses_set_error() for a Window object. */ +static void +curses_window_set_error(PyCursesWindowObject *win, + const char *curses_funcname, + const char *python_funcname) +{ + cursesmodule_state *state = get_cursesmodule_state_by_win(win); + _curses_set_error(state, curses_funcname, python_funcname); +} + +/* Same as _curses_set_null_error() for a Window object. */ +static void +curses_window_set_null_error(PyCursesWindowObject *win, + const char *curses_funcname, + const char *python_funcname) +{ + cursesmodule_state *state = get_cursesmodule_state_by_win(win); + _curses_set_null_error(state, curses_funcname, python_funcname); +} + /* Utility Checking Procedures */ /* * Function to check that 'funcname' has been called by testing - * the 'called' boolean. If an error occurs, a PyCursesError is + * the 'called' boolean. If an error occurs, an exception is * set and this returns 0. Otherwise, this returns 1. * * Since this function can be called in functions that do not * have a direct access to the module's state, '_curses.error' * is imported on demand. + * + * Use _PyCursesStatefulCheckFunction() if the module is given. */ -static inline int +static int _PyCursesCheckFunction(int called, const char *funcname) { if (called == TRUE) { @@ -230,7 +328,7 @@ _PyCursesCheckFunction(int called, const char *funcname) } PyObject *exc = PyImport_ImportModuleAttrString("_curses", "error"); if (exc != NULL) { - PyErr_Format(exc, "must call %s() first", funcname); + PyErr_Format(exc, CURSES_ERROR_MUST_CALL_FORMAT, funcname); Py_DECREF(exc); } assert(PyErr_Occurred()); @@ -244,14 +342,15 @@ _PyCursesCheckFunction(int called, const char *funcname) * * The exception type is obtained from the 'module' state. */ -static inline int -_PyCursesStatefulCheckFunction(PyObject *module, int called, const char *funcname) +static int +_PyCursesStatefulCheckFunction(PyObject *module, + int called, const char *funcname) { if (called == TRUE) { return 1; } cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_Format(state->error, "must call %s() first", funcname); + PyErr_Format(state->error, CURSES_ERROR_MUST_CALL_FORMAT, funcname); return 0; } @@ -287,44 +386,41 @@ _PyCursesStatefulCheckFunction(PyObject *module, int called, const char *funcnam /* Utility Functions */ -static inline void -_PyCursesSetError(cursesmodule_state *state, const char *funcname) -{ - if (funcname == NULL) { - PyErr_SetString(state->error, catchall_ERR); - } - else { - PyErr_Format(state->error, "%s() returned ERR", funcname); - } -} - /* - * Check the return code from a curses function and return None - * or raise an exception as appropriate. + * Check the return code from a curses function, returning None + * on success and setting an exception on error. */ +/* + * Return None if 'code' is OK. Otherwise, set an exception + * using curses_set_error() and the remaining arguments, and + * return NULL. + */ static PyObject * -PyCursesCheckERR(PyObject *module, int code, const char *fname) +curses_check_err(PyObject *module, int code, + const char *curses_funcname, + const char *python_funcname) { if (code != ERR) { + assert(code == OK); Py_RETURN_NONE; - } else { - cursesmodule_state *state = get_cursesmodule_state(module); - _PyCursesSetError(state, fname); - return NULL; } + curses_set_error(module, curses_funcname, python_funcname); + return NULL; } +/* Same as curses_check_err() for a Window object. */ static PyObject * -PyCursesCheckERR_ForWin(PyCursesWindowObject *win, int code, const char *fname) +curses_window_check_err(PyCursesWindowObject *win, int code, + const char *curses_funcname, + const char *python_funcname) { if (code != ERR) { + assert(code == OK); Py_RETURN_NONE; - } else { - cursesmodule_state *state = get_cursesmodule_state_by_win(win); - _PyCursesSetError(state, fname); - return NULL; } + curses_window_set_error(win, curses_funcname, python_funcname); + return NULL; } /* Convert an object to a byte (an integer of type chtype): @@ -650,13 +746,16 @@ class component_converter(CConverter): The Window Object ******************************************************************************/ -/* Function prototype macros for Window object - - X - function name - TYPE - parameter Type - ERGSTR - format string for construction of the return value - PARSESTR - format string for argument parsing -*/ +/* + * Macros for creating a PyCursesWindowObject object's method. + * + * Parameters + * + * X The name of the curses C function or macro to invoke. + * TYPE The function parameter(s) type. + * ERGSTR The format string for construction of the return value. + * PARSESTR The format string for argument parsing. + */ #define Window_NoArgNoReturnFunction(X) \ static PyObject *PyCursesWindow_ ## X \ @@ -664,7 +763,7 @@ class component_converter(CConverter): { \ PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); \ int code = X(self->win); \ - return PyCursesCheckERR_ForWin(self, code, # X); \ + return curses_window_check_err(self, code, # X, NULL); \ } #define Window_NoArgTrueFalseFunction(X) \ @@ -717,7 +816,7 @@ class component_converter(CConverter): } \ PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); \ int code = X(self->win, arg1); \ - return PyCursesCheckERR_ForWin(self, code, # X); \ + return curses_window_check_err(self, code, # X, NULL); \ } #define Window_TwoArgNoReturnFunction(X, TYPE, PARSESTR) \ @@ -730,7 +829,7 @@ class component_converter(CConverter): } \ PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); \ int code = X(self->win, arg1, arg2); \ - return PyCursesCheckERR_ForWin(self, code, # X); \ + return curses_window_check_err(self, code, # X, NULL); \ } /* ------------- WINDOW routines --------------- */ @@ -903,13 +1002,15 @@ _curses_window_addch_impl(PyCursesWindowObject *self, int group_left_1, #ifdef HAVE_NCURSESW type = PyCurses_ConvertToCchar_t(self, ch, &cch, wstr); if (type == 2) { - funcname = "add_wch"; wstr[1] = L'\0'; setcchar(&wcval, wstr, attr, PAIR_NUMBER(attr), NULL); - if (coordinates_group) + if (coordinates_group) { rtn = mvwadd_wch(self->win,y,x, &wcval); + funcname = "mvwadd_wch"; + } else { rtn = wadd_wch(self->win, &wcval); + funcname = "wadd_wch"; } } else @@ -917,17 +1018,19 @@ _curses_window_addch_impl(PyCursesWindowObject *self, int group_left_1, type = PyCurses_ConvertToCchar_t(self, ch, &cch); #endif if (type == 1) { - funcname = "addch"; - if (coordinates_group) + if (coordinates_group) { rtn = mvwaddch(self->win,y,x, cch | (attr_t) attr); + funcname = "mvwaddch"; + } else { rtn = waddch(self->win, cch | (attr_t) attr); + funcname = "waddch"; } } else { return NULL; } - return PyCursesCheckERR_ForWin(self, rtn, funcname); + return curses_window_check_err(self, rtn, funcname, "addch"); } /*[clinic input] @@ -987,27 +1090,33 @@ _curses_window_addstr_impl(PyCursesWindowObject *self, int group_left_1, } #ifdef HAVE_NCURSESW if (strtype == 2) { - funcname = "addwstr"; - if (use_xy) + if (use_xy) { rtn = mvwaddwstr(self->win,y,x,wstr); - else + funcname = "mvwaddwstr"; + } + else { rtn = waddwstr(self->win,wstr); + funcname = "waddwstr"; + } PyMem_Free(wstr); } else #endif { const char *str = PyBytes_AS_STRING(bytesobj); - funcname = "addstr"; - if (use_xy) + if (use_xy) { rtn = mvwaddstr(self->win,y,x,str); - else + funcname = "mvwaddstr"; + } + else { rtn = waddstr(self->win,str); + funcname = "waddstr"; + } Py_DECREF(bytesobj); } if (use_attr) (void)wattrset(self->win,attr_old); - return PyCursesCheckERR_ForWin(self, rtn, funcname); + return curses_window_check_err(self, rtn, funcname, "addstr"); } /*[clinic input] @@ -1070,27 +1179,33 @@ _curses_window_addnstr_impl(PyCursesWindowObject *self, int group_left_1, } #ifdef HAVE_NCURSESW if (strtype == 2) { - funcname = "addnwstr"; - if (use_xy) + if (use_xy) { rtn = mvwaddnwstr(self->win,y,x,wstr,n); - else + funcname = "mvwaddnwstr"; + } + else { rtn = waddnwstr(self->win,wstr,n); + funcname = "waddnwstr"; + } PyMem_Free(wstr); } else #endif { const char *str = PyBytes_AS_STRING(bytesobj); - funcname = "addnstr"; - if (use_xy) + if (use_xy) { rtn = mvwaddnstr(self->win,y,x,str,n); - else + funcname = "mvwaddnstr"; + } + else { rtn = waddnstr(self->win,str,n); + funcname = "waddnstr"; + } Py_DECREF(bytesobj); } if (use_attr) (void)wattrset(self->win,attr_old); - return PyCursesCheckERR_ForWin(self, rtn, funcname); + return curses_window_check_err(self, rtn, funcname, "addnstr"); } /*[clinic input] @@ -1114,7 +1229,8 @@ _curses_window_bkgd_impl(PyCursesWindowObject *self, PyObject *ch, long attr) if (!PyCurses_ConvertToChtype(self, ch, &bkgd)) return NULL; - return PyCursesCheckERR_ForWin(self, wbkgd(self->win, bkgd | attr), "bkgd"); + int rtn = wbkgd(self->win, bkgd | attr); + return curses_window_check_err(self, rtn, "wbkgd", "bkgd"); } /*[clinic input] @@ -1130,7 +1246,8 @@ static PyObject * _curses_window_attroff_impl(PyCursesWindowObject *self, long attr) /*[clinic end generated code: output=8a2fcd4df682fc64 input=786beedf06a7befe]*/ { - return PyCursesCheckERR_ForWin(self, wattroff(self->win, (attr_t)attr), "attroff"); + int rtn = wattroff(self->win, (attr_t)attr); + return curses_window_check_err(self, rtn, "wattroff", "attroff"); } /*[clinic input] @@ -1146,7 +1263,8 @@ static PyObject * _curses_window_attron_impl(PyCursesWindowObject *self, long attr) /*[clinic end generated code: output=7afea43b237fa870 input=5a88fba7b1524f32]*/ { - return PyCursesCheckERR_ForWin(self, wattron(self->win, (attr_t)attr), "attron"); + int rtn = wattron(self->win, (attr_t)attr); + return curses_window_check_err(self, rtn, "wattron", "attron"); } /*[clinic input] @@ -1162,7 +1280,8 @@ static PyObject * _curses_window_attrset_impl(PyCursesWindowObject *self, long attr) /*[clinic end generated code: output=84e379bff20c0433 input=42e400c0d0154ab5]*/ { - return PyCursesCheckERR_ForWin(self, wattrset(self->win, (attr_t)attr), "attrset"); + int rtn = wattrset(self->win, (attr_t)attr); + return curses_window_check_err(self, rtn, "wattrset", "attrset"); } /*[clinic input] @@ -1188,7 +1307,7 @@ _curses_window_bkgdset_impl(PyCursesWindowObject *self, PyObject *ch, return NULL; wbkgdset(self->win, bkgd | attr); - return PyCursesCheckERR_ForWin(self, 0, "bkgdset"); + Py_RETURN_NONE; } /*[clinic input] @@ -1344,6 +1463,7 @@ PyCursesWindow_ChgAt(PyObject *op, PyObject *args) PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); int rtn; + const char *funcname; int x, y; int num = -1; short color; @@ -1385,12 +1505,14 @@ PyCursesWindow_ChgAt(PyObject *op, PyObject *args) if (use_xy) { rtn = mvwchgat(self->win,y,x,num,attr,color,NULL); touchline(self->win,y,1); + funcname = "mvwchgat"; } else { getyx(self->win,y,x); rtn = wchgat(self->win,num,attr,color,NULL); touchline(self->win,y,1); + funcname = "wchgat"; } - return PyCursesCheckERR_ForWin(self, rtn, "chgat"); + return curses_window_check_err(self, rtn, funcname, "chgat"); } #endif @@ -1413,12 +1535,17 @@ _curses_window_delch_impl(PyCursesWindowObject *self, int group_right_1, int y, int x) /*[clinic end generated code: output=22e77bb9fa11b461 input=d2f79e630a4fc6d0]*/ { + int rtn; + const char *funcname; if (!group_right_1) { - return PyCursesCheckERR_ForWin(self, wdelch(self->win), "wdelch"); + rtn = wdelch(self->win); + funcname = "wdelch"; } else { - return PyCursesCheckERR_ForWin(self, py_mvwdelch(self->win, y, x), "mvwdelch"); + rtn = py_mvwdelch(self->win, y, x); + funcname = "mvwdelch"; } + return curses_window_check_err(self, rtn, funcname, "delch"); } /*[clinic input] @@ -1453,8 +1580,7 @@ _curses_window_derwin_impl(PyCursesWindowObject *self, int group_left_1, win = derwin(self->win,nlines,ncols,begin_y,begin_x); if (win == NULL) { - cursesmodule_state *state = get_cursesmodule_state_by_win(self); - PyErr_SetString(state->error, catchall_NULL); + curses_window_set_error(self, "derwin", NULL); return NULL; } @@ -1485,17 +1611,20 @@ _curses_window_echochar_impl(PyCursesWindowObject *self, PyObject *ch, if (!PyCurses_ConvertToChtype(self, ch, &ch_)) return NULL; + int rtn; + const char *funcname; #ifdef py_is_pad if (py_is_pad(self->win)) { - return PyCursesCheckERR_ForWin(self, - pechochar(self->win, ch_ | (attr_t)attr), - "echochar"); + rtn = pechochar(self->win, ch_ | (attr_t)attr); + funcname = "pechochar"; } else #endif - return PyCursesCheckERR_ForWin(self, - wechochar(self->win, ch_ | (attr_t)attr), - "echochar"); + { + rtn = wechochar(self->win, ch_ | (attr_t)attr); + funcname = "wechochar"; + } + return curses_window_check_err(self, rtn, funcname, "echochar"); } #ifdef NCURSES_MOUSE_VERSION @@ -1608,7 +1737,8 @@ _curses_window_getkey_impl(PyCursesWindowObject *self, int group_right_1, PyErr_CheckSignals(); if (!PyErr_Occurred()) { cursesmodule_state *state = get_cursesmodule_state_by_win(self); - PyErr_SetString(state->error, "no input"); + const char *funcname = group_right_1 ? "mvwgetch" : "wgetch"; + PyErr_Format(state->error, "getkey(): %s(): no input", funcname); } return NULL; } else if (rtn <= 255) { @@ -1668,7 +1798,8 @@ _curses_window_get_wch_impl(PyCursesWindowObject *self, int group_right_1, /* get_wch() returns ERR in nodelay mode */ cursesmodule_state *state = get_cursesmodule_state_by_win(self); - PyErr_SetString(state->error, "no input"); + const char *funcname = group_right_1 ? "mvwget_wch" : "wget_wch"; + PyErr_Format(state->error, "get_wch(): %s(): no input", funcname); return NULL; } if (ct == KEY_CODE_YES) @@ -1794,10 +1925,12 @@ _curses_window_hline_impl(PyCursesWindowObject *self, int group_left_1, return NULL; if (group_left_1) { if (wmove(self->win, y, x) == ERR) { - return PyCursesCheckERR_ForWin(self, ERR, "wmove"); + curses_window_set_error(self, "wmove", "hline"); + return NULL; } } - return PyCursesCheckERR_ForWin(self, whline(self->win, ch_ | (attr_t)attr, n), "hline"); + int rtn = whline(self->win, ch_ | (attr_t)attr, n); + return curses_window_check_err(self, rtn, "whline", "hline"); } /*[clinic input] @@ -1837,14 +1970,17 @@ _curses_window_insch_impl(PyCursesWindowObject *self, int group_left_1, if (!PyCurses_ConvertToChtype(self, ch, &ch_)) return NULL; + const char *funcname; if (!group_left_1) { rtn = winsch(self->win, ch_ | (attr_t)attr); + funcname = "winsch"; } else { rtn = mvwinsch(self->win, y, x, ch_ | (attr_t)attr); + funcname = "mvwwinsch"; } - return PyCursesCheckERR_ForWin(self, rtn, "insch"); + return curses_window_check_err(self, rtn, funcname, "insch"); } /*[clinic input] @@ -2003,27 +2139,33 @@ _curses_window_insstr_impl(PyCursesWindowObject *self, int group_left_1, } #ifdef HAVE_NCURSESW if (strtype == 2) { - funcname = "inswstr"; - if (use_xy) + if (use_xy) { rtn = mvwins_wstr(self->win,y,x,wstr); - else + funcname = "mvwins_wstr"; + } + else { rtn = wins_wstr(self->win,wstr); + funcname = "wins_wstr"; + } PyMem_Free(wstr); } else #endif { const char *str = PyBytes_AS_STRING(bytesobj); - funcname = "insstr"; - if (use_xy) + if (use_xy) { rtn = mvwinsstr(self->win,y,x,str); - else + funcname = "mvwinsstr"; + } + else { rtn = winsstr(self->win,str); + funcname = "winsstr"; + } Py_DECREF(bytesobj); } if (use_attr) (void)wattrset(self->win,attr_old); - return PyCursesCheckERR_ForWin(self, rtn, funcname); + return curses_window_check_err(self, rtn, funcname, "insstr"); } /*[clinic input] @@ -2088,27 +2230,33 @@ _curses_window_insnstr_impl(PyCursesWindowObject *self, int group_left_1, } #ifdef HAVE_NCURSESW if (strtype == 2) { - funcname = "insn_wstr"; - if (use_xy) + if (use_xy) { rtn = mvwins_nwstr(self->win,y,x,wstr,n); - else + funcname = "mvwins_nwstr"; + } + else { rtn = wins_nwstr(self->win,wstr,n); + funcname = "wins_nwstr"; + } PyMem_Free(wstr); } else #endif { const char *str = PyBytes_AS_STRING(bytesobj); - funcname = "insnstr"; - if (use_xy) + if (use_xy) { rtn = mvwinsnstr(self->win,y,x,str,n); - else + funcname = "mvwinsnstr"; + } + else { rtn = winsnstr(self->win,str,n); + funcname = "winsnstr"; + } Py_DECREF(bytesobj); } if (use_attr) (void)wattrset(self->win,attr_old); - return PyCursesCheckERR_ForWin(self, rtn, funcname); + return curses_window_check_err(self, rtn, funcname, "insnstr"); } /*[clinic input] @@ -2130,8 +2278,7 @@ _curses_window_is_linetouched_impl(PyCursesWindowObject *self, int line) int erg; erg = is_linetouched(self->win, line); if (erg == ERR) { - PyErr_SetString(PyExc_TypeError, - "is_linetouched: line number outside of boundaries"); + curses_window_set_error(self, "is_linetouched", NULL); return NULL; } return PyBool_FromLong(erg); @@ -2195,7 +2342,8 @@ _curses_window_noutrefresh_impl(PyCursesWindowObject *self) rtn = pnoutrefresh(self->win, pminrow, pmincol, sminrow, smincol, smaxrow, smaxcol); Py_END_ALLOW_THREADS - return PyCursesCheckERR_ForWin(self, rtn, "pnoutrefresh"); + return curses_window_check_err(self, rtn, + "pnoutrefresh", "noutrefresh"); } if (group_right_1) { PyErr_SetString(PyExc_TypeError, @@ -2206,7 +2354,7 @@ _curses_window_noutrefresh_impl(PyCursesWindowObject *self) Py_BEGIN_ALLOW_THREADS rtn = wnoutrefresh(self->win); Py_END_ALLOW_THREADS - return PyCursesCheckERR_ForWin(self, rtn, "wnoutrefresh"); + return curses_window_check_err(self, rtn, "wnoutrefresh", "noutrefresh"); } /*[clinic input] @@ -2248,11 +2396,11 @@ _curses_window_overlay_impl(PyCursesWindowObject *self, if (group_right_1) { rtn = copywin(self->win, destwin->win, sminrow, smincol, dminrow, dmincol, dmaxrow, dmaxcol, TRUE); - return PyCursesCheckERR_ForWin(self, rtn, "copywin"); + return curses_window_check_err(self, rtn, "copywin", "overlay"); } else { rtn = overlay(self->win, destwin->win); - return PyCursesCheckERR_ForWin(self, rtn, "overlay"); + return curses_window_check_err(self, rtn, "overlay", NULL); } } @@ -2296,11 +2444,11 @@ _curses_window_overwrite_impl(PyCursesWindowObject *self, if (group_right_1) { rtn = copywin(self->win, destwin->win, sminrow, smincol, dminrow, dmincol, dmaxrow, dmaxcol, FALSE); - return PyCursesCheckERR_ForWin(self, rtn, "copywin"); + return curses_window_check_err(self, rtn, "copywin", "overwrite"); } else { rtn = overwrite(self->win, destwin->win); - return PyCursesCheckERR_ForWin(self, rtn, "overwrite"); + return curses_window_check_err(self, rtn, "overwrite", NULL); } } @@ -2329,7 +2477,7 @@ _curses_window_putwin_impl(PyCursesWindowObject *self, PyObject *file) return PyErr_SetFromErrno(PyExc_OSError); if (_Py_set_inheritable(fileno(fp), 0, NULL) < 0) goto exit; - res = PyCursesCheckERR_ForWin(self, putwin(self->win, fp), "putwin"); + res = curses_window_check_err(self, putwin(self->win, fp), "putwin", NULL); if (res == NULL) goto exit; fseek(fp, 0, 0); @@ -2368,7 +2516,8 @@ static PyObject * _curses_window_redrawln_impl(PyCursesWindowObject *self, int beg, int num) /*[clinic end generated code: output=ea216e334f9ce1b4 input=152155e258a77a7a]*/ { - return PyCursesCheckERR_ForWin(self, wredrawln(self->win,beg,num), "redrawln"); + int rtn = wredrawln(self->win,beg, num); + return curses_window_check_err(self, rtn, "wredrawln", "redrawln"); } /*[clinic input] @@ -2419,7 +2568,7 @@ _curses_window_refresh_impl(PyCursesWindowObject *self, int group_right_1, rtn = prefresh(self->win, pminrow, pmincol, sminrow, smincol, smaxrow, smaxcol); Py_END_ALLOW_THREADS - return PyCursesCheckERR_ForWin(self, rtn, "prefresh"); + return curses_window_check_err(self, rtn, "prefresh", "refresh"); } #endif if (group_right_1) { @@ -2430,7 +2579,7 @@ _curses_window_refresh_impl(PyCursesWindowObject *self, int group_right_1, Py_BEGIN_ALLOW_THREADS rtn = wrefresh(self->win); Py_END_ALLOW_THREADS - return PyCursesCheckERR_ForWin(self, rtn, "prefresh"); + return curses_window_check_err(self, rtn, "wrefresh", "refresh"); } /*[clinic input] @@ -2452,7 +2601,8 @@ _curses_window_setscrreg_impl(PyCursesWindowObject *self, int top, int bottom) /*[clinic end generated code: output=486ab5db218d2b1a input=1b517b986838bf0e]*/ { - return PyCursesCheckERR_ForWin(self, wsetscrreg(self->win, top, bottom), "wsetscrreg"); + int rtn = wsetscrreg(self->win, top, bottom); + return curses_window_check_err(self, rtn, "wsetscrreg", "setscrreg"); } /*[clinic input] @@ -2482,19 +2632,23 @@ _curses_window_subwin_impl(PyCursesWindowObject *self, int group_left_1, /*[clinic end generated code: output=93e898afc348f59a input=2129fa47fd57721c]*/ { WINDOW *win; + const char *funcname; /* printf("Subwin: %i %i %i %i \n", nlines, ncols, begin_y, begin_x); */ #ifdef py_is_pad if (py_is_pad(self->win)) { win = subpad(self->win, nlines, ncols, begin_y, begin_x); + funcname = "subpad"; } else #endif + { win = subwin(self->win, nlines, ncols, begin_y, begin_x); + funcname = "subwin"; + } if (win == NULL) { - cursesmodule_state *state = get_cursesmodule_state_by_win(self); - PyErr_SetString(state->error, catchall_NULL); + curses_window_set_null_error(self, funcname, "subwin"); return NULL; } @@ -2521,12 +2675,17 @@ _curses_window_scroll_impl(PyCursesWindowObject *self, int group_right_1, int lines) /*[clinic end generated code: output=4541a8a11852d360 input=c969ca0cfabbdbec]*/ { + int rtn; + const char *funcname; if (!group_right_1) { - return PyCursesCheckERR_ForWin(self, scroll(self->win), "scroll"); + rtn = scroll(self->win); + funcname = "scroll"; } else { - return PyCursesCheckERR_ForWin(self, wscrl(self->win, lines), "scroll"); + rtn = wscrl(self->win, lines); + funcname = "wscrl"; } + return curses_window_check_err(self, rtn, funcname, "scroll"); } /*[clinic input] @@ -2550,12 +2709,17 @@ _curses_window_touchline_impl(PyCursesWindowObject *self, int start, int count, int group_right_1, int changed) /*[clinic end generated code: output=65d05b3f7438c61d input=a98aa4f79b6be845]*/ { + int rtn; + const char *funcname; if (!group_right_1) { - return PyCursesCheckERR_ForWin(self, touchline(self->win, start, count), "touchline"); + rtn = touchline(self->win, start, count); + funcname = "touchline"; } else { - return PyCursesCheckERR_ForWin(self, wtouchln(self->win, start, count, changed), "touchline"); + rtn = wtouchln(self->win, start, count, changed); + funcname = "wtouchln"; } + return curses_window_check_err(self, rtn, funcname, "touchline"); } /*[clinic input] @@ -2593,10 +2757,13 @@ _curses_window_vline_impl(PyCursesWindowObject *self, int group_left_1, if (!PyCurses_ConvertToChtype(self, ch, &ch_)) return NULL; if (group_left_1) { - if (wmove(self->win, y, x) == ERR) - return PyCursesCheckERR_ForWin(self, ERR, "wmove"); + if (wmove(self->win, y, x) == ERR) { + curses_window_set_error(self, "wmove", "vline"); + return NULL; + } } - return PyCursesCheckERR_ForWin(self, wvline(self->win, ch_ | (attr_t)attr, n), "vline"); + int rtn = wvline(self->win, ch_ | (attr_t)attr, n); + return curses_window_check_err(self, rtn, "wvline", "vline"); } static PyObject * @@ -2761,26 +2928,39 @@ static PyType_Spec PyCursesWindow_Type_spec = { /* -------------------------------------------------------*/ -/* Function Body Macros - They are ugly but very, very useful. ;-) - - X - function name - TYPE - parameter Type - ERGSTR - format string for construction of the return value - PARSESTR - format string for argument parsing - */ +/* + * Macros for implementing simple module's methods. + * + * Parameters + * + * X The name of the curses C function or macro to invoke. + * FLAG When false, prefixes the function name with 'no' at runtime, + * This parameter is present in the signature and auto-generated + * by Argument Clinic. + * + * These macros should only be used for generating the body of + * the module's methods since they need a module reference. + */ #define NoArgNoReturnFunctionBody(X) \ { \ PyCursesStatefulInitialised(module); \ - return PyCursesCheckERR(module, X(), # X); } - -#define NoArgOrFlagNoReturnFunctionBody(X, flag) \ -{ \ - PyCursesStatefulInitialised(module); \ - if (flag) \ - return PyCursesCheckERR(module, X(), # X); \ - else \ - return PyCursesCheckERR(module, no ## X(), # X); \ + return curses_check_err(module, X(), # X, NULL); } + +#define NoArgOrFlagNoReturnFunctionBody(X, FLAG) \ +{ \ + PyCursesStatefulInitialised(module); \ + int rtn; \ + const char *funcname; \ + if (FLAG) { \ + rtn = X(); \ + funcname = # X; \ + } \ + else { \ + rtn = no ## X(); \ + funcname = "no" # X; \ + } \ + return curses_check_err(module, rtn, funcname, # X); \ } #define NoArgReturnIntFunctionBody(X) \ @@ -2788,7 +2968,6 @@ static PyType_Spec PyCursesWindow_Type_spec = { PyCursesStatefulInitialised(module); \ return PyLong_FromLong((long) X()); } - #define NoArgReturnStringFunctionBody(X) \ { \ PyCursesStatefulInitialised(module); \ @@ -2903,9 +3082,8 @@ _curses_color_content_impl(PyObject *module, int color_number) PyCursesStatefulInitialisedColor(module); if (_COLOR_CONTENT_FUNC(color_number, &r, &g, &b) == ERR) { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_Format(state->error, "%s() returned ERR", - Py_STRINGIFY(_COLOR_CONTENT_FUNC)); + const char *funcname = Py_STRINGIFY(_COLOR_CONTENT_FUNC); + curses_set_error(module, funcname, "color_content"); return NULL; } @@ -2959,7 +3137,10 @@ _curses_curs_set_impl(PyObject *module, int visibility) PyCursesStatefulInitialised(module); erg = curs_set(visibility); - if (erg == ERR) return PyCursesCheckERR(module, erg, "curs_set"); + if (erg == ERR) { + curses_set_error(module, "curs_set", NULL); + return NULL; + } return PyLong_FromLong((long) erg); } @@ -3010,7 +3191,7 @@ _curses_delay_output_impl(PyObject *module, int ms) { PyCursesStatefulInitialised(module); - return PyCursesCheckERR(module, delay_output(ms), "delay_output"); + return curses_check_err(module, delay_output(ms), "delay_output", NULL); } /*[clinic input] @@ -3143,8 +3324,7 @@ _curses_getmouse_impl(PyObject *module) rtn = getmouse( &event ); if (rtn == ERR) { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_SetString(state->error, "getmouse() returned ERR"); + curses_set_error(module, "getmouse", NULL); return NULL; } return Py_BuildValue("(hiiik)", @@ -3182,7 +3362,7 @@ _curses_ungetmouse_impl(PyObject *module, short id, int x, int y, int z, event.y = y; event.z = z; event.bstate = bstate; - return PyCursesCheckERR(module, ungetmouse(&event), "ungetmouse"); + return curses_check_err(module, ungetmouse(&event), "ungetmouse", NULL); } #endif @@ -3238,8 +3418,7 @@ _curses_getwin(PyObject *module, PyObject *file) fseek(fp, 0, 0); win = getwin(fp); if (win == NULL) { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_SetString(state->error, catchall_NULL); + curses_set_null_error(module, "getwin", NULL); goto error; } cursesmodule_state *state = get_cursesmodule_state(module); @@ -3268,7 +3447,7 @@ _curses_halfdelay_impl(PyObject *module, unsigned char tenths) { PyCursesStatefulInitialised(module); - return PyCursesCheckERR(module, halfdelay(tenths), "halfdelay"); + return curses_check_err(module, halfdelay(tenths), "halfdelay", NULL); } /*[clinic input] @@ -3353,9 +3532,10 @@ _curses_init_color_impl(PyObject *module, int color_number, short r, short g, PyCursesStatefulInitialised(module); PyCursesStatefulInitialisedColor(module); - return PyCursesCheckERR(module, + return curses_check_err(module, _CURSES_INIT_COLOR_FUNC(color_number, r, g, b), - Py_STRINGIFY(_CURSES_INIT_COLOR_FUNC)); + Py_STRINGIFY(_CURSES_INIT_COLOR_FUNC), + NULL); } /*[clinic input] @@ -3389,9 +3569,8 @@ _curses_init_pair_impl(PyObject *module, int pair_number, int fg, int bg) COLOR_PAIRS - 1); } else { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_Format(state->error, "%s() returned ERR", - Py_STRINGIFY(_CURSES_INIT_PAIR_FUNC)); + const char *funcname = Py_STRINGIFY(_CURSES_INIT_PAIR_FUNC); + curses_set_error(module, funcname, "init_pair"); } return NULL; } @@ -3422,8 +3601,7 @@ _curses_initscr_impl(PyObject *module) win = initscr(); if (win == NULL) { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_SetString(state->error, catchall_NULL); + curses_set_null_error(module, "initscr", NULL); return NULL; } @@ -3628,7 +3806,7 @@ _curses_set_escdelay_impl(PyObject *module, int ms) return NULL; } - return PyCursesCheckERR(module, set_escdelay(ms), "set_escdelay"); + return curses_check_err(module, set_escdelay(ms), "set_escdelay", NULL); } /*[clinic input] @@ -3667,7 +3845,7 @@ _curses_set_tabsize_impl(PyObject *module, int size) return NULL; } - return PyCursesCheckERR(module, set_tabsize(size), "set_tabsize"); + return curses_check_err(module, set_tabsize(size), "set_tabsize", NULL); } #endif @@ -3685,7 +3863,7 @@ _curses_intrflush_impl(PyObject *module, int flag) { PyCursesStatefulInitialised(module); - return PyCursesCheckERR(module, intrflush(NULL, flag), "intrflush"); + return curses_check_err(module, intrflush(NULL, flag), "intrflush", NULL); } /*[clinic input] @@ -3798,7 +3976,7 @@ _curses_meta_impl(PyObject *module, int yes) { PyCursesStatefulInitialised(module); - return PyCursesCheckERR(module, meta(stdscr, yes), "meta"); + return curses_check_err(module, meta(stdscr, yes), "meta", NULL); } #ifdef NCURSES_MOUSE_VERSION @@ -3821,8 +3999,12 @@ _curses_mouseinterval_impl(PyObject *module, int interval) /*[clinic end generated code: output=c4f5ff04354634c5 input=75aaa3f0db10ac4e]*/ { PyCursesStatefulInitialised(module); - - return PyCursesCheckERR(module, mouseinterval(interval), "mouseinterval"); + int value = mouseinterval(interval); + if (value == ERR) { + curses_set_error(module, "mouseinterval", NULL); + return NULL; + } + return PyLong_FromLong(value); } /*[clinic input] @@ -3898,8 +4080,7 @@ _curses_newpad_impl(PyObject *module, int nlines, int ncols) win = newpad(nlines, ncols); if (win == NULL) { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_SetString(state->error, catchall_NULL); + curses_set_null_error(module, "newpad", NULL); return NULL; } @@ -3939,8 +4120,7 @@ _curses_newwin_impl(PyObject *module, int nlines, int ncols, win = newwin(nlines,ncols,begin_y,begin_x); if (win == NULL) { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_SetString(state->error, catchall_NULL); + curses_set_null_error(module, "newwin", NULL); return NULL; } @@ -4059,9 +4239,8 @@ _curses_pair_content_impl(PyObject *module, int pair_number) COLOR_PAIRS - 1); } else { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_Format(state->error, "%s() returned ERR", - Py_STRINGIFY(_CURSES_PAIR_CONTENT_FUNC)); + const char *funcname = Py_STRINGIFY(_CURSES_PAIR_CONTENT_FUNC); + curses_set_error(module, funcname, "pair_content"); } return NULL; } @@ -4105,7 +4284,7 @@ static PyObject * _curses_putp_impl(PyObject *module, const char *string) /*[clinic end generated code: output=e98081d1b8eb5816 input=1601faa828b44cb3]*/ { - return PyCursesCheckERR(module, putp(string), "putp"); + return curses_check_err(module, putp(string), "putp", NULL); } /*[clinic input] @@ -4279,10 +4458,12 @@ _curses_resizeterm_impl(PyObject *module, short nlines, short ncols) /*[clinic end generated code: output=4de3abab50c67f02 input=414e92a63e3e9899]*/ { PyObject *result; + int code; PyCursesStatefulInitialised(module); - result = PyCursesCheckERR(module, resizeterm(nlines, ncols), "resizeterm"); + code = resizeterm(nlines, ncols); + result = curses_check_err(module, code, "resizeterm", NULL); if (!result) return NULL; if (!update_lines_cols(module)) { @@ -4318,10 +4499,12 @@ _curses_resize_term_impl(PyObject *module, short nlines, short ncols) /*[clinic end generated code: output=46c6d749fa291dbd input=276afa43d8ea7091]*/ { PyObject *result; + int code; PyCursesStatefulInitialised(module); - result = PyCursesCheckERR(module, resize_term(nlines, ncols), "resize_term"); + code = resize_term(nlines, ncols); + result = curses_check_err(module, code, "resize_term", NULL); if (!result) return NULL; if (!update_lines_cols(module)) { @@ -4390,8 +4573,7 @@ _curses_start_color_impl(PyObject *module) PyCursesStatefulInitialised(module); if (start_color() == ERR) { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_SetString(state->error, "start_color() returned ERR"); + curses_set_error(module, "start_color", NULL); return NULL; } @@ -4543,8 +4725,7 @@ _curses_tparm_impl(PyObject *module, const char *str, int i1, int i2, int i3, result = tparm((char *)str,i1,i2,i3,i4,i5,i6,i7,i8,i9); if (!result) { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_SetString(state->error, "tparm() returned NULL"); + curses_set_null_error(module, "tparm", NULL); return NULL; } @@ -4570,7 +4751,7 @@ _curses_typeahead_impl(PyObject *module, int fd) { PyCursesStatefulInitialised(module); - return PyCursesCheckERR(module, typeahead( fd ), "typeahead"); + return curses_check_err(module, typeahead(fd), "typeahead", NULL); } #endif @@ -4620,7 +4801,7 @@ _curses_ungetch(PyObject *module, PyObject *ch) if (!PyCurses_ConvertToChtype(NULL, ch, &ch_)) return NULL; - return PyCursesCheckERR(module, ungetch(ch_), "ungetch"); + return curses_check_err(module, ungetch(ch_), "ungetch", NULL); } #ifdef HAVE_NCURSESW @@ -4690,7 +4871,7 @@ _curses_unget_wch(PyObject *module, PyObject *ch) if (!PyCurses_ConvertToWchar_t(ch, &wch)) return NULL; - return PyCursesCheckERR(module, unget_wch(wch), "unget_wch"); + return curses_check_err(module, unget_wch(wch), "unget_wch", NULL); } #endif @@ -4739,13 +4920,7 @@ _curses_use_default_colors_impl(PyObject *module) PyCursesStatefulInitialisedColor(module); code = use_default_colors(); - if (code != ERR) { - Py_RETURN_NONE; - } else { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_SetString(state->error, "use_default_colors() returned ERR"); - return NULL; - } + return curses_check_err(module, code, "use_default_colors", NULL); } /*[clinic input] From b22460c44d1bc597c96d4a3d27ad8373d7952820 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Pelletier?= Date: Mon, 19 May 2025 10:10:17 -0400 Subject: [PATCH 199/989] gh-125225: Fix column misalignment in help('topics') output (gh-125226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'help("topics")' output was misaligned due to "ASSIGNMENTEXPRESSIONS" exceeding the implicit maximum default column width of 19 characters. Reduced the number of columns from 4 to 3 in the listtopics() function to allow more space for longer topic names. Co-authored-by: Łukasz Langa --- Lib/pydoc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index def76d076a2989..7528178fdcae97 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -2110,7 +2110,7 @@ def intro(self): self.output.write(_introdoc()) def list(self, items, columns=4, width=80): - items = list(sorted(items)) + items = sorted(items) colw = width // columns rows = (len(items) + columns - 1) // columns for row in range(rows): @@ -2142,7 +2142,7 @@ def listtopics(self): Here is a list of available topics. Enter any topic name to get more help. ''') - self.list(self.topics.keys()) + self.list(self.topics.keys(), columns=3) def showtopic(self, topic, more_xrefs=''): try: From 71ea6a6798c5853ed33188865a73b044ede8aba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Simon?= Date: Mon, 19 May 2025 16:12:23 +0200 Subject: [PATCH 200/989] gh-134158: Fix PyREPL coloring of double braces in f/t-strings (gh-134159) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Loïc Simon Co-authored-by: Peter Bierma Co-authored-by: Łukasz Langa --- Lib/_pyrepl/utils.py | 8 ++++- Lib/test/test_pyrepl/test_reader.py | 31 +++++++++++++++++++ ...-05-17-20-44-51.gh-issue-134158.ewLNLp.rst | 1 + 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-17-20-44-51.gh-issue-134158.ewLNLp.rst diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 752049ac05acdf..e04fbdc6c8a5c4 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -41,9 +41,15 @@ def from_re(cls, m: Match[str], group: int | str) -> Self: @classmethod def from_token(cls, token: TI, line_len: list[int]) -> Self: + end_offset = -1 + if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE} + and token.string.endswith(("{", "}"))): + # gh-134158: a visible trailing brace comes from a double brace in input + end_offset += 1 + return cls( line_len[token.start[0] - 1] + token.start[1], - line_len[token.end[0] - 1] + token.end[1] - 1, + line_len[token.end[0] - 1] + token.end[1] + end_offset, ) diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 57526f88f9384b..1f655264f1c00a 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -517,6 +517,37 @@ def unfinished_function(): self.assert_screen_equal(reader, code, clean=True) self.assert_screen_equal(reader, expected) + def test_syntax_highlighting_literal_brace_in_fstring_or_tstring(self): + code = dedent( + """\ + f"{{" + f"}}" + f"a{{b" + f"a}}b" + f"a{{b}}c" + t"a{{b}}c" + f"{{{0}}}" + f"{ {0} }" + """ + ) + expected = dedent( + """\ + {s}f"{z}{s}<<{z}{s}"{z} + {s}f"{z}{s}>>{z}{s}"{z} + {s}f"{z}{s}a<<{z}{s}b{z}{s}"{z} + {s}f"{z}{s}a>>{z}{s}b{z}{s}"{z} + {s}f"{z}{s}a<<{z}{s}b>>{z}{s}c{z}{s}"{z} + {s}t"{z}{s}a<<{z}{s}b>>{z}{s}c{z}{s}"{z} + {s}f"{z}{s}<<{z}{o}<{z}{n}0{z}{o}>{z}{s}>>{z}{s}"{z} + {s}f"{z}{o}<{z} {o}<{z}{n}0{z}{o}>{z} {o}>{z}{s}"{z} + """ + ).format(**colors).replace("<", "{").replace(">", "}") + events = code_to_events(code) + reader, _ = handle_all_events(events) + self.assert_screen_equal(reader, code, clean=True) + self.maxDiff=None + self.assert_screen_equal(reader, expected) + def test_control_characters(self): code = 'flag = "🏳️‍🌈"' events = code_to_events(code) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-17-20-44-51.gh-issue-134158.ewLNLp.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-17-20-44-51.gh-issue-134158.ewLNLp.rst new file mode 100644 index 00000000000000..7b8bab739c37c8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-17-20-44-51.gh-issue-134158.ewLNLp.rst @@ -0,0 +1 @@ +Fix coloring of double braces in f-strings and t-strings in the :term:`REPL`. From cc9add695da42defc72e62c5d5389621dac54b2b Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 19 May 2025 10:15:16 -0400 Subject: [PATCH 201/989] gh-128045: Mark unknown opcodes as deopting to themselves (#128044) * Mark unknown opcodes as deopting to themselves --- Include/internal/pycore_opcode_metadata.h | 32 +++++++++++++++++++ .../opcode_metadata_generator.py | 7 ++++ 2 files changed, 39 insertions(+) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index e55e26783a6200..71859476e4f901 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1787,6 +1787,38 @@ const uint8_t _PyOpcode_Caches[256] = { extern const uint8_t _PyOpcode_Deopt[256]; #ifdef NEED_OPCODE_METADATA const uint8_t _PyOpcode_Deopt[256] = { + [119] = 119, + [120] = 120, + [121] = 121, + [122] = 122, + [123] = 123, + [124] = 124, + [125] = 125, + [126] = 126, + [127] = 127, + [211] = 211, + [212] = 212, + [213] = 213, + [214] = 214, + [215] = 215, + [216] = 216, + [217] = 217, + [218] = 218, + [219] = 219, + [220] = 220, + [221] = 221, + [222] = 222, + [223] = 223, + [224] = 224, + [225] = 225, + [226] = 226, + [227] = 227, + [228] = 228, + [229] = 229, + [230] = 230, + [231] = 231, + [232] = 232, + [233] = 233, [BINARY_OP] = BINARY_OP, [BINARY_OP_ADD_FLOAT] = BINARY_OP, [BINARY_OP_ADD_INT] = BINARY_OP, diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 620e4b6f1f4a69..10567204dcc599 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -157,6 +157,13 @@ def generate_deopt_table(analysis: Analysis, out: CWriter) -> None: if inst.family is not None: deopt = inst.family.name deopts.append((inst.name, deopt)) + defined = set(analysis.opmap.values()) + for i in range(256): + if i not in defined: + deopts.append((f'{i}', f'{i}')) + + assert len(deopts) == 256 + assert len(set(x[0] for x in deopts)) == 256 for name, deopt in sorted(deopts): out.emit(f"[{name}] = {deopt},\n") out.emit("};\n\n") From faebf87b3716f7103ee5410456972db36f4b3ada Mon Sep 17 00:00:00 2001 From: Jessica Temporal Date: Mon, 19 May 2025 10:26:04 -0400 Subject: [PATCH 202/989] gh-134214: Fix test case in pyrepl (gh-134223) --- Lib/test/test_pyrepl/test_pyrepl.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index f80755b3368f8f..a1cfbe4c3cadb7 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -1055,11 +1055,15 @@ def test_parse(self): self.assertEqual(actual, parsed) # The parser should not get tripped up by any # other preceding statements - code = f'import xyz\n{code}' - with self.subTest(code=code): + _code = f'import xyz\n{code}' + parser = ImportParser(_code) + actual = parser.parse() + with self.subTest(code=_code): self.assertEqual(actual, parsed) - code = f'import xyz;{code}' - with self.subTest(code=code): + _code = f'import xyz;{code}' + parser = ImportParser(_code) + actual = parser.parse() + with self.subTest(code=_code): self.assertEqual(actual, parsed) def test_parse_error(self): From de70614c1387636c8f835ee3e4eb36c8ae721f21 Mon Sep 17 00:00:00 2001 From: Duprat Date: Mon, 19 May 2025 16:36:09 +0200 Subject: [PATCH 203/989] gh-132795: Add docs for `multiprocessing.Semaphore.locked` (#133299) --- Doc/library/multiprocessing.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 6c43d5fe166e2f..80e33c4a1df015 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -1369,6 +1369,12 @@ object -- see :ref:`multiprocessing-managers`. A solitary difference from its close analog exists: its ``acquire`` method's first argument is named *block*, as is consistent with :meth:`Lock.acquire`. + .. method:: locked() + + Return a boolean indicating whether this object is locked right now. + + .. versionadded:: 3.14 + .. note:: On macOS, this is indistinguishable from :class:`Semaphore` because ``sem_getvalue()`` is not implemented on that platform. @@ -1521,6 +1527,12 @@ object -- see :ref:`multiprocessing-managers`. A solitary difference from its close analog exists: its ``acquire`` method's first argument is named *block*, as is consistent with :meth:`Lock.acquire`. + .. method:: locked() + + Return a boolean indicating whether this object is locked right now. + + .. versionadded:: 3.14 + .. note:: On macOS, ``sem_timedwait`` is unsupported, so calling ``acquire()`` with From 3fa30d9e9c13c4b84bdc9fc04e33130775679e98 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 19 May 2025 11:08:50 -0400 Subject: [PATCH 204/989] gh-128045: Syncs w/ latest opcode metadata (#134231) Fix opcode metadata --- Include/internal/pycore_opcode_metadata.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 71859476e4f901..17811d301a92c3 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1787,8 +1787,6 @@ const uint8_t _PyOpcode_Caches[256] = { extern const uint8_t _PyOpcode_Deopt[256]; #ifdef NEED_OPCODE_METADATA const uint8_t _PyOpcode_Deopt[256] = { - [119] = 119, - [120] = 120, [121] = 121, [122] = 122, [123] = 123, @@ -1796,7 +1794,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [125] = 125, [126] = 126, [127] = 127, - [211] = 211, [212] = 212, [213] = 213, [214] = 214, From c45e661226558e997e265cf53ce1419213cc10b7 Mon Sep 17 00:00:00 2001 From: naya451 <41294408+naya451@users.noreply.github.com> Date: Mon, 19 May 2025 18:10:23 +0300 Subject: [PATCH 205/989] gh-131505: Move len boundary assertions before using len. (#131536) Move len boundary assertions before using len. --- Modules/_io/bytesio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 4cde5f87032a3f..61cfec435fe60f 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -607,9 +607,9 @@ _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer) len = 0; } - memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len); assert(self->pos + len < PY_SSIZE_T_MAX); assert(len >= 0); + memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len); self->pos += len; return PyLong_FromSsize_t(len); From a36ce264a9b6b0a18b78d3b0d79d2b11a685b801 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 19 May 2025 18:39:43 +0300 Subject: [PATCH 206/989] GH-134236: make regen-all (GH-134237) From c4ad92e1555404b54490d1e370788509120675ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Kiss=20Koll=C3=A1r?= Date: Mon, 19 May 2025 12:07:39 -0400 Subject: [PATCH 207/989] Fix typo in get_stack_trace docstring (#134246) --- Modules/_remote_debugging_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 9314ddd9bed5d7..42db93bb5ead0f 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -1765,7 +1765,7 @@ get_async_stack_trace(PyObject* self, PyObject* args) static PyMethodDef methods[] = { {"get_stack_trace", get_stack_trace, METH_VARARGS, - "Get the Python stack from a given pod"}, + "Get the Python stack from a given pid"}, {"get_async_stack_trace", get_async_stack_trace, METH_VARARGS, "Get the asyncio stack from a given pid"}, {"get_all_awaited_by", get_all_awaited_by, METH_VARARGS, From 9859791f9e116c827468f307ac0770286c975c8b Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Mon, 19 May 2025 12:24:08 -0400 Subject: [PATCH 208/989] gh-128639: Don't assume one thread in subinterpreter finalization (gh-128640) Incidentally, this also fixed the warning not showing up if a subinterpreter wasn't cleaned up via _interpreters.destroy. I had to update some of the tests as a result. --- Lib/test/test_interpreters/test_api.py | 60 ++++++++++++++++++- Lib/test/test_interpreters/test_lifecycle.py | 6 +- Lib/test/test_threading.py | 5 +- ...-01-08-12-52-47.gh-issue-128640.9nbh9z.rst | 1 + Programs/_testembed.c | 9 ++- Python/pylifecycle.c | 56 ++++++++--------- 6 files changed, 99 insertions(+), 38 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-01-08-12-52-47.gh-issue-128640.9nbh9z.rst diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 1e2d572b1cbb81..c7ee114fe0838c 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -647,6 +647,59 @@ def test_created_with_capi(self): self.interp_exists(interpid)) + def test_remaining_threads(self): + r_interp, w_interp = self.pipe() + + FINISHED = b'F' + + # It's unlikely, but technically speaking, it's possible + # that the thread could've finished before interp.close() is + # reached, so this test might not properly exercise the case. + # However, it's quite unlikely and I'm too lazy to deal with it. + interp = interpreters.create() + interp.exec(f"""if True: + import os + import threading + import time + + def task(): + time.sleep(1) + os.write({w_interp}, {FINISHED!r}) + + threads = [threading.Thread(target=task) for _ in range(3)] + for t in threads: + t.start() + """) + interp.close() + + self.assertEqual(os.read(r_interp, 1), FINISHED) + + def test_remaining_daemon_threads(self): + interp = _interpreters.create( + types.SimpleNamespace( + use_main_obmalloc=False, + allow_fork=False, + allow_exec=False, + allow_threads=True, + allow_daemon_threads=True, + check_multi_interp_extensions=True, + gil='own', + ) + ) + _interpreters.exec(interp, f"""if True: + import threading + import time + + def task(): + time.sleep(100) + + threads = [threading.Thread(target=task, daemon=True) for _ in range(3)] + for t in threads: + t.start() + """) + _interpreters.destroy(interp) + + class TestInterpreterPrepareMain(TestBase): def test_empty(self): @@ -755,7 +808,10 @@ def script(): spam.eggs() interp = interpreters.create() - interp.exec(script) + try: + interp.exec(script) + finally: + interp.close() """) stdout, stderr = self.assert_python_failure(scriptfile) @@ -764,7 +820,7 @@ def script(): # File "{interpreters.__file__}", line 179, in exec self.assertEqual(stderr, dedent(f"""\ Traceback (most recent call last): - File "{scriptfile}", line 9, in + File "{scriptfile}", line 10, in interp.exec(script) ~~~~~~~~~~~^^^^^^^^ {interpmod_line.strip()} diff --git a/Lib/test/test_interpreters/test_lifecycle.py b/Lib/test/test_interpreters/test_lifecycle.py index ac24f6568acd95..3f9ed1fb501522 100644 --- a/Lib/test/test_interpreters/test_lifecycle.py +++ b/Lib/test/test_interpreters/test_lifecycle.py @@ -132,6 +132,7 @@ def test_sys_path_0(self): 'sub': sys.path[0], }}, indent=4), flush=True) """) + interp.close() ''' # / # pkg/ @@ -172,7 +173,10 @@ def test_gh_109793(self): argv = [sys.executable, '-c', '''if True: from test.support import interpreters interp = interpreters.create() - raise Exception + try: + raise Exception + finally: + interp.close() '''] proc = subprocess.run(argv, capture_output=True, text=True) self.assertIn('Traceback', proc.stderr) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index b6e2d419019aa1..127ef658673612 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1689,10 +1689,7 @@ def f(): _testcapi.run_in_subinterp(%r) """ % (subinterp_code,) - with test.support.SuppressCrashReport(): - rc, out, err = assert_python_failure("-c", script) - self.assertIn("Fatal Python error: Py_EndInterpreter: " - "not the last thread", err.decode()) + assert_python_ok("-c", script) def _check_allowed(self, before_start='', *, allowed=True, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-08-12-52-47.gh-issue-128640.9nbh9z.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-08-12-52-47.gh-issue-128640.9nbh9z.rst new file mode 100644 index 00000000000000..040c6d56c47244 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-08-12-52-47.gh-issue-128640.9nbh9z.rst @@ -0,0 +1 @@ +Fix a crash when using threads inside of a subinterpreter. diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 8e0e330f6605c9..0a19d1126e246e 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1395,9 +1395,12 @@ static int test_audit_subinterpreter(void) PySys_AddAuditHook(_audit_subinterpreter_hook, NULL); _testembed_initialize(); - Py_NewInterpreter(); - Py_NewInterpreter(); - Py_NewInterpreter(); + PyThreadState *tstate = PyThreadState_Get(); + for (int i = 0; i < 3; ++i) + { + Py_EndInterpreter(Py_NewInterpreter()); + PyThreadState_Swap(tstate); + } Py_Finalize(); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 8394245d373030..b6bc2ea5211460 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1992,6 +1992,7 @@ resolve_final_tstate(_PyRuntimeState *runtime) } else { /* Fall back to the current tstate. It's better than nothing. */ + // XXX No it's not main_tstate = tstate; } } @@ -2037,6 +2038,16 @@ _Py_Finalize(_PyRuntimeState *runtime) _PyAtExit_Call(tstate->interp); + /* Clean up any lingering subinterpreters. + + Two preconditions need to be met here: + + - This has to happen before _PyRuntimeState_SetFinalizing is + called, or else threads might get prematurely blocked. + - The world must not be stopped, as finalizers can run. + */ + finalize_subinterpreters(); + assert(_PyThreadState_GET() == tstate); /* Copy the core config, PyInterpreterState_Delete() free @@ -2124,9 +2135,6 @@ _Py_Finalize(_PyRuntimeState *runtime) _PyImport_FiniExternal(tstate->interp); finalize_modules(tstate); - /* Clean up any lingering subinterpreters. */ - finalize_subinterpreters(); - /* Print debug stats if any */ _PyEval_Fini(); @@ -2410,9 +2418,8 @@ Py_NewInterpreter(void) return tstate; } -/* Delete an interpreter and its last thread. This requires that the - given thread state is current, that the thread has no remaining - frames, and that it is its interpreter's only remaining thread. +/* Delete an interpreter. This requires that the given thread state + is current, and that the thread has no remaining frames. It is a fatal error to violate these constraints. (Py_FinalizeEx() doesn't have these constraints -- it zaps @@ -2442,14 +2449,15 @@ Py_EndInterpreter(PyThreadState *tstate) _Py_FinishPendingCalls(tstate); _PyAtExit_Call(tstate->interp); - - if (tstate != interp->threads.head || tstate->next != NULL) { - Py_FatalError("not the last thread"); - } + _PyRuntimeState *runtime = interp->runtime; + _PyEval_StopTheWorldAll(runtime); + PyThreadState *list = _PyThreadState_RemoveExcept(tstate); /* Remaining daemon threads will automatically exit when they attempt to take the GIL (ex: PyEval_RestoreThread()). */ _PyInterpreterState_SetFinalizing(interp, tstate); + _PyEval_StartTheWorldAll(runtime); + _PyThreadState_DeleteList(list, /*is_after_fork=*/0); // XXX Call something like _PyImport_Disable() here? @@ -2480,6 +2488,8 @@ finalize_subinterpreters(void) PyInterpreterState *main_interp = _PyInterpreterState_Main(); assert(final_tstate->interp == main_interp); _PyRuntimeState *runtime = main_interp->runtime; + assert(!runtime->stoptheworld.world_stopped); + assert(_PyRuntimeState_GetFinalizing(runtime) == NULL); struct pyinterpreters *interpreters = &runtime->interpreters; /* Get the first interpreter in the list. */ @@ -2508,27 +2518,17 @@ finalize_subinterpreters(void) /* Clean up all remaining subinterpreters. */ while (interp != NULL) { - assert(!_PyInterpreterState_IsRunningMain(interp)); - - /* Find the tstate to use for fini. We assume the interpreter - will have at most one tstate at this point. */ - PyThreadState *tstate = interp->threads.head; - if (tstate != NULL) { - /* Ideally we would be able to use tstate as-is, and rely - on it being in a ready state: no exception set, not - running anything (tstate->current_frame), matching the - current thread ID (tstate->thread_id). To play it safe, - we always delete it and use a fresh tstate instead. */ - assert(tstate != final_tstate); - _PyThreadState_Attach(tstate); - PyThreadState_Clear(tstate); - _PyThreadState_Detach(tstate); - PyThreadState_Delete(tstate); + /* Make a tstate for finalization. */ + PyThreadState *tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_FINI); + if (tstate == NULL) { + // XXX Some graceful way to always get a thread state? + Py_FatalError("thread state allocation failed"); } - tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_FINI); - /* Destroy the subinterpreter. */ + /* Enter the subinterpreter. */ _PyThreadState_Attach(tstate); + + /* Destroy the subinterpreter. */ Py_EndInterpreter(tstate); assert(_PyThreadState_GET() == NULL); From 8d490b368766ee7b28d2ccf47704b1d4b5f1ea23 Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Mon, 19 May 2025 13:19:24 -0400 Subject: [PATCH 209/989] GH-131798: Narrow the return type of isinstance for some known arguments in the JIT (GH-133172) --- Lib/test/test_capi/test_opt.py | 115 ++++++++++++++++++ ...-04-26-17-50-01.gh-issue-131798.XiOgw5.rst | 2 + Python/optimizer_bytecodes.c | 20 +++ Python/optimizer_cases.c.h | 16 ++- 4 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-26-17-50-01.gh-issue-131798.XiOgw5.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 651148336f7f2c..73ce55400adcdf 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1959,6 +1959,121 @@ def testfunc(n): self.assertNotIn("_GUARD_THIRD_NULL", uops) self.assertNotIn("_GUARD_CALLABLE_ISINSTANCE", uops) + def test_call_isinstance_is_true(self): + def testfunc(n): + x = 0 + for _ in range(n): + y = isinstance(42, int) + if y: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_TO_BOOL_BOOL", uops) + self.assertNotIn("_GUARD_IS_TRUE_POP", uops) + + def test_call_isinstance_is_false(self): + def testfunc(n): + x = 0 + for _ in range(n): + y = isinstance(42, str) + if not y: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_TO_BOOL_BOOL", uops) + self.assertNotIn("_GUARD_IS_FALSE_POP", uops) + + def test_call_isinstance_subclass(self): + def testfunc(n): + x = 0 + for _ in range(n): + y = isinstance(True, int) + if y: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_TO_BOOL_BOOL", uops) + self.assertNotIn("_GUARD_IS_TRUE_POP", uops) + + def test_call_isinstance_unknown_object(self): + def testfunc(n): + x = 0 + for _ in range(n): + # The optimizer doesn't know the return type here: + bar = eval("42") + # This will only narrow to bool: + y = isinstance(bar, int) + if y: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_TO_BOOL_BOOL", uops) + self.assertIn("_GUARD_IS_TRUE_POP", uops) + + def test_call_isinstance_tuple_of_classes(self): + def testfunc(n): + x = 0 + for _ in range(n): + # A tuple of classes is currently not optimized, + # so this is only narrowed to bool: + y = isinstance(42, (int, str)) + if y: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_TO_BOOL_BOOL", uops) + self.assertIn("_GUARD_IS_TRUE_POP", uops) + + def test_call_isinstance_metaclass(self): + class EvenNumberMeta(type): + def __instancecheck__(self, number): + return number % 2 == 0 + + class EvenNumber(metaclass=EvenNumberMeta): + pass + + def testfunc(n): + x = 0 + for _ in range(n): + # Only narrowed to bool + y = isinstance(42, EvenNumber) + if y: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_TO_BOOL_BOOL", uops) + self.assertIn("_GUARD_IS_TRUE_POP", uops) + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-26-17-50-01.gh-issue-131798.XiOgw5.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-26-17-50-01.gh-issue-131798.XiOgw5.rst new file mode 100644 index 00000000000000..45ab1bea6b1dd7 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-26-17-50-01.gh-issue-131798.XiOgw5.rst @@ -0,0 +1,2 @@ +Narrow the return type and constant-evaluate ``CALL_ISINSTANCE`` for a +subset of known values in the JIT. Patch by Tomas Roun diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 7c160cdcb0c149..ad25c68e62aab2 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -890,6 +890,26 @@ dummy_func(void) { } } + op(_CALL_ISINSTANCE, (unused, unused, instance, cls -- res)) { + // the result is always a bool, but sometimes we can + // narrow it down to True or False + res = sym_new_type(ctx, &PyBool_Type); + PyTypeObject *inst_type = sym_get_type(instance); + PyTypeObject *cls_o = (PyTypeObject *)sym_get_const(ctx, cls); + if (inst_type && cls_o && sym_matches_type(cls, &PyType_Type)) { + // isinstance(inst, cls) where both inst and cls have + // known types, meaning we can deduce either True or False + + // The below check is equivalent to PyObject_TypeCheck(inst, cls) + if (inst_type == cls_o || PyType_IsSubtype(inst_type, cls_o)) { + sym_set_const(res, Py_True); + } + else { + sym_set_const(res, Py_False); + } + } + } + op(_GUARD_IS_TRUE_POP, (flag -- )) { if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index deb912662e4b0b..5d469765203833 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2124,8 +2124,22 @@ } case _CALL_ISINSTANCE: { + JitOptSymbol *cls; + JitOptSymbol *instance; JitOptSymbol *res; - res = sym_new_not_null(ctx); + cls = stack_pointer[-1]; + instance = stack_pointer[-2]; + res = sym_new_type(ctx, &PyBool_Type); + PyTypeObject *inst_type = sym_get_type(instance); + PyTypeObject *cls_o = (PyTypeObject *)sym_get_const(ctx, cls); + if (inst_type && cls_o && sym_matches_type(cls, &PyType_Type)) { + if (inst_type == cls_o || PyType_IsSubtype(inst_type, cls_o)) { + sym_set_const(res, Py_True); + } + else { + sym_set_const(res, Py_False); + } + } stack_pointer[-4] = res; stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); From 71c42b778dfc0831734bb7bc6121ffd44beae1d3 Mon Sep 17 00:00:00 2001 From: Semyon Moroz Date: Mon, 19 May 2025 18:07:11 +0000 Subject: [PATCH 210/989] gh-126883: Add check that timezone fields are in range for `datetime.fromisoformat` (#127242) It was previously possible to specify things like `+00:90:00` which would be equivalent to `+01:30:00`, but is not a valid ISO8601 string. --------- Co-authored-by: Erlend E. Aasland Co-authored-by: Paul Ganssle <1377457+pganssle@users.noreply.github.com> --- Lib/_pydatetime.py | 44 +++++++++++++++---- Lib/test/datetimetester.py | 9 ++++ Misc/ACKS | 1 + ...-11-25-10-22-08.gh-issue-126883.MAEF7g.rst | 3 ++ Modules/_datetimemodule.c | 15 +++++++ 5 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-25-10-22-08.gh-issue-126883.MAEF7g.rst diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index 471e89c16a1c0e..71f619024e570d 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -467,6 +467,7 @@ def _parse_isoformat_time(tstr): hour, minute, second, microsecond = time_comps became_next_day = False error_from_components = False + error_from_tz = None if (hour == 24): if all(time_comp == 0 for time_comp in time_comps[1:]): hour = 0 @@ -500,14 +501,22 @@ def _parse_isoformat_time(tstr): else: tzsign = -1 if tstr[tz_pos - 1] == '-' else 1 - td = timedelta(hours=tz_comps[0], minutes=tz_comps[1], - seconds=tz_comps[2], microseconds=tz_comps[3]) - - tzi = timezone(tzsign * td) + try: + # This function is intended to validate datetimes, but because + # we restrict time zones to ±24h, it serves here as well. + _check_time_fields(hour=tz_comps[0], minute=tz_comps[1], + second=tz_comps[2], microsecond=tz_comps[3], + fold=0) + except ValueError as e: + error_from_tz = e + else: + td = timedelta(hours=tz_comps[0], minutes=tz_comps[1], + seconds=tz_comps[2], microseconds=tz_comps[3]) + tzi = timezone(tzsign * td) time_comps.append(tzi) - return time_comps, became_next_day, error_from_components + return time_comps, became_next_day, error_from_components, error_from_tz # tuple[int, int, int] -> tuple[int, int, int] version of date.fromisocalendar def _isoweek_to_gregorian(year, week, day): @@ -1633,9 +1642,21 @@ def fromisoformat(cls, time_string): time_string = time_string.removeprefix('T') try: - return cls(*_parse_isoformat_time(time_string)[0]) - except Exception: - raise ValueError(f'Invalid isoformat string: {time_string!r}') + time_components, _, error_from_components, error_from_tz = ( + _parse_isoformat_time(time_string) + ) + except ValueError: + raise ValueError( + f'Invalid isoformat string: {time_string!r}') from None + else: + if error_from_tz: + raise error_from_tz + if error_from_components: + raise ValueError( + "Minute, second, and microsecond must be 0 when hour is 24" + ) + + return cls(*time_components) def strftime(self, format): """Format using strftime(). The date part of the timestamp passed @@ -1947,11 +1968,16 @@ def fromisoformat(cls, date_string): if tstr: try: - time_components, became_next_day, error_from_components = _parse_isoformat_time(tstr) + (time_components, + became_next_day, + error_from_components, + error_from_tz) = _parse_isoformat_time(tstr) except ValueError: raise ValueError( f'Invalid isoformat string: {date_string!r}') from None else: + if error_from_tz: + raise error_from_tz if error_from_components: raise ValueError("minute, second, and microsecond must be 0 when hour is 24") diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index d1882a310bbbb0..345698cfb5f1a4 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -3571,6 +3571,10 @@ def test_fromisoformat_fails_datetime(self): '2009-04-19T12:30:45.400 +02:30', # Space between ms and timezone (gh-130959) '2009-04-19T12:30:45.400 ', # Trailing space (gh-130959) '2009-04-19T12:30:45. 400', # Space before fraction (gh-130959) + '2009-04-19T12:30:45+00:90:00', # Time zone field out from range + '2009-04-19T12:30:45+00:00:90', # Time zone field out from range + '2009-04-19T12:30:45-00:90:00', # Time zone field out from range + '2009-04-19T12:30:45-00:00:90', # Time zone field out from range ] for bad_str in bad_strs: @@ -4795,6 +4799,11 @@ def test_fromisoformat_fails(self): '12:30:45.400 +02:30', # Space between ms and timezone (gh-130959) '12:30:45.400 ', # Trailing space (gh-130959) '12:30:45. 400', # Space before fraction (gh-130959) + '24:00:00.000001', # Has non-zero microseconds on 24:00 + '24:00:01.000000', # Has non-zero seconds on 24:00 + '24:01:00.000000', # Has non-zero minutes on 24:00 + '12:30:45+00:90:00', # Time zone field out from range + '12:30:45+00:00:90', # Time zone field out from range ] for bad_str in bad_strs: diff --git a/Misc/ACKS b/Misc/ACKS index 610dcf9f4238de..5653c52c9e354e 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1288,6 +1288,7 @@ Paul Moore Ross Moore Ben Morgan Emily Morehouse +Semyon Moroz Derek Morr James A Morrison Martin Morrison diff --git a/Misc/NEWS.d/next/Library/2024-11-25-10-22-08.gh-issue-126883.MAEF7g.rst b/Misc/NEWS.d/next/Library/2024-11-25-10-22-08.gh-issue-126883.MAEF7g.rst new file mode 100644 index 00000000000000..5e3fa39acf1979 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-25-10-22-08.gh-issue-126883.MAEF7g.rst @@ -0,0 +1,3 @@ +Add check that timezone fields are in range for +:meth:`datetime.datetime.fromisoformat` and +:meth:`datetime.time.fromisoformat`. Patch by Semyon Moroz. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 9bba0e3354b26b..313a72e3fe0668 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1088,6 +1088,7 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute, // -3: Failed to parse time component // -4: Failed to parse time separator // -5: Malformed timezone string + // -6: Timezone fields are not in range const char *p = dtstr; const char *p_end = dtstr + dtlen; @@ -1134,6 +1135,11 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute, rv = parse_hh_mm_ss_ff(tzinfo_pos, p_end, &tzhour, &tzminute, &tzsecond, tzmicrosecond); + // Check if timezone fields are in range + if (check_time_args(tzhour, tzminute, tzsecond, *tzmicrosecond, 0) < 0) { + return -6; + } + *tzoffset = tzsign * ((tzhour * 3600) + (tzminute * 60) + tzsecond); *tzmicrosecond *= tzsign; @@ -5039,6 +5045,9 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { &tzoffset, &tzimicrosecond); if (rv < 0) { + if (rv == -6) { + goto error; + } goto invalid_string_error; } @@ -5075,6 +5084,9 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { invalid_string_error: PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", tstr); return NULL; + +error: + return NULL; } @@ -5927,6 +5939,9 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) len -= (p - dt_ptr); rv = parse_isoformat_time(p, len, &hour, &minute, &second, µsecond, &tzoffset, &tzusec); + if (rv == -6) { + goto error; + } } if (rv < 0) { goto invalid_string_error; From e79f640eb698df7659c0ce81474d93bf222094c5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 19 May 2025 14:09:10 -0400 Subject: [PATCH 211/989] Simplify interp_look_up_id() (#134257) Don't use PyInterpreterState_GetID() but get directly the interpreter 'id' member which cannot fail. --- Python/pystate.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Python/pystate.c b/Python/pystate.c index 14ae2748b0bc99..4757a8c3d1476c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1393,10 +1393,8 @@ interp_look_up_id(_PyRuntimeState *runtime, int64_t requested_id) { PyInterpreterState *interp = runtime->interpreters.head; while (interp != NULL) { - int64_t id = PyInterpreterState_GetID(interp); - if (id < 0) { - return NULL; - } + int64_t id = interp->id; + assert(id >= 0); if (requested_id == id) { return interp; } From 871d26987533e81ab63af067e1fc96aa37a26bf7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 19 May 2025 21:17:58 +0300 Subject: [PATCH 212/989] gh-117596: Add more tests for os.path with invalid paths (GH-134189) --- Lib/test/test_ntpath.py | 179 ++++++++++++++++++++++++++++++++++--- Lib/test/test_posixpath.py | 74 +++++++++++++++ 2 files changed, 243 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index c10387b58e3f9c..f83ef225a6e48e 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -124,6 +124,22 @@ def test_splitdrive(self): tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir")) + def test_splitdrive_invalid_paths(self): + splitdrive = ntpath.splitdrive + self.assertEqual(splitdrive('\\\\ser\x00ver\\sha\x00re\\di\x00r'), + ('\\\\ser\x00ver\\sha\x00re', '\\di\x00r')) + self.assertEqual(splitdrive(b'\\\\ser\x00ver\\sha\x00re\\di\x00r'), + (b'\\\\ser\x00ver\\sha\x00re', b'\\di\x00r')) + self.assertEqual(splitdrive("\\\\\udfff\\\udffe\\\udffd"), + ('\\\\\udfff\\\udffe', '\\\udffd')) + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, splitdrive, b'\\\\\xff\\share\\dir') + self.assertRaises(UnicodeDecodeError, splitdrive, b'\\\\server\\\xff\\dir') + self.assertRaises(UnicodeDecodeError, splitdrive, b'\\\\server\\share\\\xff') + else: + self.assertEqual(splitdrive(b'\\\\\xff\\\xfe\\\xfd'), + (b'\\\\\xff\\\xfe', b'\\\xfd')) + def test_splitroot(self): tester("ntpath.splitroot('')", ('', '', '')) tester("ntpath.splitroot('foo')", ('', '', 'foo')) @@ -214,6 +230,22 @@ def test_splitroot(self): tester('ntpath.splitroot(" :/foo")', (" :", "/", "foo")) tester('ntpath.splitroot("/:/foo")', ("", "/", ":/foo")) + def test_splitroot_invalid_paths(self): + splitroot = ntpath.splitroot + self.assertEqual(splitroot('\\\\ser\x00ver\\sha\x00re\\di\x00r'), + ('\\\\ser\x00ver\\sha\x00re', '\\', 'di\x00r')) + self.assertEqual(splitroot(b'\\\\ser\x00ver\\sha\x00re\\di\x00r'), + (b'\\\\ser\x00ver\\sha\x00re', b'\\', b'di\x00r')) + self.assertEqual(splitroot("\\\\\udfff\\\udffe\\\udffd"), + ('\\\\\udfff\\\udffe', '\\', '\udffd')) + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, splitroot, b'\\\\\xff\\share\\dir') + self.assertRaises(UnicodeDecodeError, splitroot, b'\\\\server\\\xff\\dir') + self.assertRaises(UnicodeDecodeError, splitroot, b'\\\\server\\share\\\xff') + else: + self.assertEqual(splitroot(b'\\\\\xff\\\xfe\\\xfd'), + (b'\\\\\xff\\\xfe', b'\\', b'\xfd')) + def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) tester('ntpath.split("\\\\conky\\mountpoint\\foo\\bar")', @@ -226,6 +258,21 @@ def test_split(self): tester('ntpath.split("c:/")', ('c:/', '')) tester('ntpath.split("//conky/mountpoint/")', ('//conky/mountpoint/', '')) + def test_split_invalid_paths(self): + split = ntpath.split + self.assertEqual(split('c:\\fo\x00o\\ba\x00r'), + ('c:\\fo\x00o', 'ba\x00r')) + self.assertEqual(split(b'c:\\fo\x00o\\ba\x00r'), + (b'c:\\fo\x00o', b'ba\x00r')) + self.assertEqual(split('c:\\\udfff\\\udffe'), + ('c:\\\udfff', '\udffe')) + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, split, b'c:\\\xff\\bar') + self.assertRaises(UnicodeDecodeError, split, b'c:\\foo\\\xff') + else: + self.assertEqual(split(b'c:\\\xff\\\xfe'), + (b'c:\\\xff', b'\xfe')) + def test_isabs(self): tester('ntpath.isabs("foo\\bar")', 0) tester('ntpath.isabs("foo/bar")', 0) @@ -333,6 +380,30 @@ def test_join(self): tester("ntpath.join('D:a', './c:b')", 'D:a\\.\\c:b') tester("ntpath.join('D:/a', './c:b')", 'D:\\a\\.\\c:b') + def test_normcase(self): + normcase = ntpath.normcase + self.assertEqual(normcase(''), '') + self.assertEqual(normcase(b''), b'') + self.assertEqual(normcase('ABC'), 'abc') + self.assertEqual(normcase(b'ABC'), b'abc') + self.assertEqual(normcase('\xc4\u0141\u03a8'), '\xe4\u0142\u03c8') + expected = '\u03c9\u2126' if sys.platform == 'win32' else '\u03c9\u03c9' + self.assertEqual(normcase('\u03a9\u2126'), expected) + if sys.platform == 'win32' or sys.getfilesystemencoding() == 'utf-8': + self.assertEqual(normcase('\xc4\u0141\u03a8'.encode()), + '\xe4\u0142\u03c8'.encode()) + self.assertEqual(normcase('\u03a9\u2126'.encode()), + expected.encode()) + + def test_normcase_invalid_paths(self): + normcase = ntpath.normcase + self.assertEqual(normcase('abc\x00def'), 'abc\x00def') + self.assertEqual(normcase(b'abc\x00def'), b'abc\x00def') + self.assertEqual(normcase('\udfff'), '\udfff') + if sys.platform == 'win32': + path = b'ABC' + bytes(range(128, 256)) + self.assertEqual(normcase(path), path.lower()) + def test_normpath(self): tester("ntpath.normpath('A//////././//.//B')", r'A\B') tester("ntpath.normpath('A/./B')", r'A\B') @@ -381,6 +452,21 @@ def test_normpath(self): tester("ntpath.normpath('\\\\')", '\\\\') tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\') + def test_normpath_invalid_paths(self): + normpath = ntpath.normpath + self.assertEqual(normpath('fo\x00o'), 'fo\x00o') + self.assertEqual(normpath(b'fo\x00o'), b'fo\x00o') + self.assertEqual(normpath('fo\x00o\\..\\bar'), 'bar') + self.assertEqual(normpath(b'fo\x00o\\..\\bar'), b'bar') + self.assertEqual(normpath('\udfff'), '\udfff') + self.assertEqual(normpath('\udfff\\..\\foo'), 'foo') + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, normpath, b'\xff') + self.assertRaises(UnicodeDecodeError, normpath, b'\xff\\..\\foo') + else: + self.assertEqual(normpath(b'\xff'), b'\xff') + self.assertEqual(normpath(b'\xff\\..\\foo'), b'foo') + def test_realpath_curdir(self): expected = ntpath.normpath(os.getcwd()) tester("ntpath.realpath('.')", expected) @@ -420,10 +506,6 @@ def test_realpath_basic(self): d = drives.pop().encode() self.assertEqual(ntpath.realpath(d), d) - # gh-106242: Embedded nulls and non-strict fallback to abspath - self.assertEqual(ABSTFN + "\0spam", - ntpath.realpath(os_helper.TESTFN + "\0spam", strict=False)) - @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') def test_realpath_strict(self): @@ -434,8 +516,51 @@ def test_realpath_strict(self): self.addCleanup(os_helper.unlink, ABSTFN) self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN, strict=True) self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN + "2", strict=True) + + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_invalid_paths(self): + realpath = ntpath.realpath + ABSTFN = ntpath.abspath(os_helper.TESTFN) + ABSTFNb = os.fsencode(ABSTFN) + path = ABSTFN + '\x00' + # gh-106242: Embedded nulls and non-strict fallback to abspath + self.assertEqual(realpath(path, strict=False), path) # gh-106242: Embedded nulls should raise OSError (not ValueError) - self.assertRaises(OSError, ntpath.realpath, ABSTFN + "\0spam", strict=True) + self.assertRaises(OSError, realpath, path, strict=True) + path = ABSTFNb + b'\x00' + self.assertEqual(realpath(path, strict=False), path) + self.assertRaises(OSError, realpath, path, strict=True) + path = ABSTFN + '\\nonexistent\\x\x00' + self.assertEqual(realpath(path, strict=False), path) + self.assertRaises(OSError, realpath, path, strict=True) + path = ABSTFNb + b'\\nonexistent\\x\x00' + self.assertEqual(realpath(path, strict=False), path) + self.assertRaises(OSError, realpath, path, strict=True) + path = ABSTFN + '\x00\\..' + self.assertEqual(realpath(path, strict=False), os.getcwd()) + self.assertEqual(realpath(path, strict=True), os.getcwd()) + path = ABSTFNb + b'\x00\\..' + self.assertEqual(realpath(path, strict=False), os.getcwdb()) + self.assertEqual(realpath(path, strict=True), os.getcwdb()) + path = ABSTFN + '\\nonexistent\\x\x00\\..' + self.assertEqual(realpath(path, strict=False), ABSTFN + '\\nonexistent') + self.assertRaises(OSError, realpath, path, strict=True) + path = ABSTFNb + b'\\nonexistent\\x\x00\\..' + self.assertEqual(realpath(path, strict=False), ABSTFNb + b'\\nonexistent') + self.assertRaises(OSError, realpath, path, strict=True) + + path = ABSTFNb + b'\xff' + self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + path = ABSTFNb + b'\\nonexistent\\\xff' + self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + path = ABSTFNb + b'\xff\\..' + self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + path = ABSTFNb + b'\\nonexistent\\\xff\\..' + self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @@ -812,8 +937,6 @@ def test_abspath(self): tester('ntpath.abspath("C:/nul")', "\\\\.\\nul") tester('ntpath.abspath("C:\\nul")', "\\\\.\\nul") self.assertTrue(ntpath.isabs(ntpath.abspath("C:spam"))) - self.assertEqual(ntpath.abspath("C:\x00"), ntpath.join(ntpath.abspath("C:"), "\x00")) - self.assertEqual(ntpath.abspath("\x00:spam"), "\x00:\\spam") tester('ntpath.abspath("//..")', "\\\\") tester('ntpath.abspath("//../")', "\\\\..\\") tester('ntpath.abspath("//../..")', "\\\\..\\") @@ -847,6 +970,26 @@ def test_abspath(self): drive, _ = ntpath.splitdrive(cwd_dir) tester('ntpath.abspath("/abc/")', drive + "\\abc") + def test_abspath_invalid_paths(self): + abspath = ntpath.abspath + if sys.platform == 'win32': + self.assertEqual(abspath("C:\x00"), ntpath.join(abspath("C:"), "\x00")) + self.assertEqual(abspath(b"C:\x00"), ntpath.join(abspath(b"C:"), b"\x00")) + self.assertEqual(abspath("\x00:spam"), "\x00:\\spam") + self.assertEqual(abspath(b"\x00:spam"), b"\x00:\\spam") + self.assertEqual(abspath('c:\\fo\x00o'), 'c:\\fo\x00o') + self.assertEqual(abspath(b'c:\\fo\x00o'), b'c:\\fo\x00o') + self.assertEqual(abspath('c:\\fo\x00o\\..\\bar'), 'c:\\bar') + self.assertEqual(abspath(b'c:\\fo\x00o\\..\\bar'), b'c:\\bar') + self.assertEqual(abspath('c:\\\udfff'), 'c:\\\udfff') + self.assertEqual(abspath('c:\\\udfff\\..\\foo'), 'c:\\foo') + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, abspath, b'c:\\\xff') + self.assertRaises(UnicodeDecodeError, abspath, b'c:\\\xff\\..\\foo') + else: + self.assertEqual(abspath(b'c:\\\xff'), b'c:\\\xff') + self.assertEqual(abspath(b'c:\\\xff\\..\\foo'), b'c:\\foo') + def test_relpath(self): tester('ntpath.relpath("a")', 'a') tester('ntpath.relpath(ntpath.abspath("a"))', 'a') @@ -989,6 +1132,18 @@ def test_ismount(self): self.assertTrue(ntpath.ismount(b"\\\\localhost\\c$")) self.assertTrue(ntpath.ismount(b"\\\\localhost\\c$\\")) + def test_ismount_invalid_paths(self): + ismount = ntpath.ismount + self.assertFalse(ismount("c:\\\udfff")) + if sys.platform == 'win32': + self.assertRaises(ValueError, ismount, "c:\\\x00") + self.assertRaises(ValueError, ismount, b"c:\\\x00") + self.assertRaises(UnicodeDecodeError, ismount, b"c:\\\xff") + else: + self.assertFalse(ismount("c:\\\x00")) + self.assertFalse(ismount(b"c:\\\x00")) + self.assertFalse(ismount(b"c:\\\xff")) + def test_isreserved(self): self.assertFalse(ntpath.isreserved('')) self.assertFalse(ntpath.isreserved('.')) @@ -1095,6 +1250,13 @@ def test_isjunction(self): self.assertFalse(ntpath.isjunction('tmpdir')) self.assertPathEqual(ntpath.realpath('testjunc'), ntpath.realpath('tmpdir')) + def test_isfile_invalid_paths(self): + isfile = ntpath.isfile + self.assertIs(isfile('/tmp\udfffabcds'), False) + self.assertIs(isfile(b'/tmp\xffabcds'), False) + self.assertIs(isfile('/tmp\x00abcds'), False) + self.assertIs(isfile(b'/tmp\x00abcds'), False) + @unittest.skipIf(sys.platform != 'win32', "drive letters are a windows concept") def test_isfile_driveletter(self): drive = os.environ.get('SystemDrive') @@ -1195,9 +1357,6 @@ def _check_function(self, func): def test_path_normcase(self): self._check_function(self.path.normcase) - if sys.platform == 'win32': - self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ') - self.assertEqual(ntpath.normcase('abc\x00def'), 'abc\x00def') def test_path_isabs(self): self._check_function(self.path.isabs) diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index fa19d549c26cfa..c70688ff5e9bec 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -229,6 +229,7 @@ def test_ismount_non_existent(self): finally: safe_rmdir(ABSTFN) + def test_ismount_invalid_paths(self): self.assertIs(posixpath.ismount('/\udfff'), False) self.assertIs(posixpath.ismount(b'/\xff'), False) self.assertIs(posixpath.ismount('/\x00'), False) @@ -489,6 +490,79 @@ def test_realpath_strict(self): finally: os_helper.unlink(ABSTFN) + def test_realpath_invalid_paths(self): + path = '/\x00' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(ValueError, realpath, path, strict=True) + path = b'/\x00' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(ValueError, realpath, path, strict=True) + path = '/nonexistent/x\x00' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + path = b'/nonexistent/x\x00' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + path = '/\x00/..' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(ValueError, realpath, path, strict=True) + path = b'/\x00/..' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(ValueError, realpath, path, strict=True) + path = '/nonexistent/x\x00/..' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + path = b'/nonexistent/x\x00/..' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + + path = '/\udfff' + if sys.platform == 'win32': + self.assertEqual(realpath(path, strict=False), path) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + else: + self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=True) + path = '/nonexistent/\udfff' + if sys.platform == 'win32': + self.assertEqual(realpath(path, strict=False), path) + else: + self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + path = '/\udfff/..' + if sys.platform == 'win32': + self.assertEqual(realpath(path, strict=False), '/') + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + else: + self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=True) + path = '/nonexistent/\udfff/..' + if sys.platform == 'win32': + self.assertEqual(realpath(path, strict=False), '/nonexistent') + else: + self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + + path = b'/\xff' + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + else: + self.assertEqual(realpath(path, strict=False), path) + if support.is_wasi: + self.assertRaises(OSError, realpath, path, strict=True) + else: + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + path = b'/nonexistent/\xff' + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) + else: + self.assertEqual(realpath(path, strict=False), path) + if support.is_wasi: + self.assertRaises(OSError, realpath, path, strict=True) + else: + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash def test_realpath_relative(self): From 27bd08273ce822a4dbe0e73cca47441e99fd6f0d Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Mon, 19 May 2025 14:22:05 -0400 Subject: [PATCH 213/989] Revert "gh-128639: Don't assume one thread in subinterpreter finalization (gh-128640)" (gh-134256) This reverts commit 9859791f9e116c827468f307ac0770286c975c8b. The original change broke the iOS and android buildbots, where the tests are run single-process. --- Lib/test/test_interpreters/test_api.py | 60 +------------------ Lib/test/test_interpreters/test_lifecycle.py | 6 +- Lib/test/test_threading.py | 5 +- ...-01-08-12-52-47.gh-issue-128640.9nbh9z.rst | 1 - Programs/_testembed.c | 9 +-- Python/pylifecycle.c | 56 ++++++++--------- 6 files changed, 38 insertions(+), 99 deletions(-) delete mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-01-08-12-52-47.gh-issue-128640.9nbh9z.rst diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index c7ee114fe0838c..1e2d572b1cbb81 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -647,59 +647,6 @@ def test_created_with_capi(self): self.interp_exists(interpid)) - def test_remaining_threads(self): - r_interp, w_interp = self.pipe() - - FINISHED = b'F' - - # It's unlikely, but technically speaking, it's possible - # that the thread could've finished before interp.close() is - # reached, so this test might not properly exercise the case. - # However, it's quite unlikely and I'm too lazy to deal with it. - interp = interpreters.create() - interp.exec(f"""if True: - import os - import threading - import time - - def task(): - time.sleep(1) - os.write({w_interp}, {FINISHED!r}) - - threads = [threading.Thread(target=task) for _ in range(3)] - for t in threads: - t.start() - """) - interp.close() - - self.assertEqual(os.read(r_interp, 1), FINISHED) - - def test_remaining_daemon_threads(self): - interp = _interpreters.create( - types.SimpleNamespace( - use_main_obmalloc=False, - allow_fork=False, - allow_exec=False, - allow_threads=True, - allow_daemon_threads=True, - check_multi_interp_extensions=True, - gil='own', - ) - ) - _interpreters.exec(interp, f"""if True: - import threading - import time - - def task(): - time.sleep(100) - - threads = [threading.Thread(target=task, daemon=True) for _ in range(3)] - for t in threads: - t.start() - """) - _interpreters.destroy(interp) - - class TestInterpreterPrepareMain(TestBase): def test_empty(self): @@ -808,10 +755,7 @@ def script(): spam.eggs() interp = interpreters.create() - try: - interp.exec(script) - finally: - interp.close() + interp.exec(script) """) stdout, stderr = self.assert_python_failure(scriptfile) @@ -820,7 +764,7 @@ def script(): # File "{interpreters.__file__}", line 179, in exec self.assertEqual(stderr, dedent(f"""\ Traceback (most recent call last): - File "{scriptfile}", line 10, in + File "{scriptfile}", line 9, in interp.exec(script) ~~~~~~~~~~~^^^^^^^^ {interpmod_line.strip()} diff --git a/Lib/test/test_interpreters/test_lifecycle.py b/Lib/test/test_interpreters/test_lifecycle.py index 3f9ed1fb501522..ac24f6568acd95 100644 --- a/Lib/test/test_interpreters/test_lifecycle.py +++ b/Lib/test/test_interpreters/test_lifecycle.py @@ -132,7 +132,6 @@ def test_sys_path_0(self): 'sub': sys.path[0], }}, indent=4), flush=True) """) - interp.close() ''' # / # pkg/ @@ -173,10 +172,7 @@ def test_gh_109793(self): argv = [sys.executable, '-c', '''if True: from test.support import interpreters interp = interpreters.create() - try: - raise Exception - finally: - interp.close() + raise Exception '''] proc = subprocess.run(argv, capture_output=True, text=True) self.assertIn('Traceback', proc.stderr) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 127ef658673612..b6e2d419019aa1 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1689,7 +1689,10 @@ def f(): _testcapi.run_in_subinterp(%r) """ % (subinterp_code,) - assert_python_ok("-c", script) + with test.support.SuppressCrashReport(): + rc, out, err = assert_python_failure("-c", script) + self.assertIn("Fatal Python error: Py_EndInterpreter: " + "not the last thread", err.decode()) def _check_allowed(self, before_start='', *, allowed=True, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-08-12-52-47.gh-issue-128640.9nbh9z.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-08-12-52-47.gh-issue-128640.9nbh9z.rst deleted file mode 100644 index 040c6d56c47244..00000000000000 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-08-12-52-47.gh-issue-128640.9nbh9z.rst +++ /dev/null @@ -1 +0,0 @@ -Fix a crash when using threads inside of a subinterpreter. diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 0a19d1126e246e..8e0e330f6605c9 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1395,12 +1395,9 @@ static int test_audit_subinterpreter(void) PySys_AddAuditHook(_audit_subinterpreter_hook, NULL); _testembed_initialize(); - PyThreadState *tstate = PyThreadState_Get(); - for (int i = 0; i < 3; ++i) - { - Py_EndInterpreter(Py_NewInterpreter()); - PyThreadState_Swap(tstate); - } + Py_NewInterpreter(); + Py_NewInterpreter(); + Py_NewInterpreter(); Py_Finalize(); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index b6bc2ea5211460..8394245d373030 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1992,7 +1992,6 @@ resolve_final_tstate(_PyRuntimeState *runtime) } else { /* Fall back to the current tstate. It's better than nothing. */ - // XXX No it's not main_tstate = tstate; } } @@ -2038,16 +2037,6 @@ _Py_Finalize(_PyRuntimeState *runtime) _PyAtExit_Call(tstate->interp); - /* Clean up any lingering subinterpreters. - - Two preconditions need to be met here: - - - This has to happen before _PyRuntimeState_SetFinalizing is - called, or else threads might get prematurely blocked. - - The world must not be stopped, as finalizers can run. - */ - finalize_subinterpreters(); - assert(_PyThreadState_GET() == tstate); /* Copy the core config, PyInterpreterState_Delete() free @@ -2135,6 +2124,9 @@ _Py_Finalize(_PyRuntimeState *runtime) _PyImport_FiniExternal(tstate->interp); finalize_modules(tstate); + /* Clean up any lingering subinterpreters. */ + finalize_subinterpreters(); + /* Print debug stats if any */ _PyEval_Fini(); @@ -2418,8 +2410,9 @@ Py_NewInterpreter(void) return tstate; } -/* Delete an interpreter. This requires that the given thread state - is current, and that the thread has no remaining frames. +/* Delete an interpreter and its last thread. This requires that the + given thread state is current, that the thread has no remaining + frames, and that it is its interpreter's only remaining thread. It is a fatal error to violate these constraints. (Py_FinalizeEx() doesn't have these constraints -- it zaps @@ -2449,15 +2442,14 @@ Py_EndInterpreter(PyThreadState *tstate) _Py_FinishPendingCalls(tstate); _PyAtExit_Call(tstate->interp); - _PyRuntimeState *runtime = interp->runtime; - _PyEval_StopTheWorldAll(runtime); - PyThreadState *list = _PyThreadState_RemoveExcept(tstate); + + if (tstate != interp->threads.head || tstate->next != NULL) { + Py_FatalError("not the last thread"); + } /* Remaining daemon threads will automatically exit when they attempt to take the GIL (ex: PyEval_RestoreThread()). */ _PyInterpreterState_SetFinalizing(interp, tstate); - _PyEval_StartTheWorldAll(runtime); - _PyThreadState_DeleteList(list, /*is_after_fork=*/0); // XXX Call something like _PyImport_Disable() here? @@ -2488,8 +2480,6 @@ finalize_subinterpreters(void) PyInterpreterState *main_interp = _PyInterpreterState_Main(); assert(final_tstate->interp == main_interp); _PyRuntimeState *runtime = main_interp->runtime; - assert(!runtime->stoptheworld.world_stopped); - assert(_PyRuntimeState_GetFinalizing(runtime) == NULL); struct pyinterpreters *interpreters = &runtime->interpreters; /* Get the first interpreter in the list. */ @@ -2518,17 +2508,27 @@ finalize_subinterpreters(void) /* Clean up all remaining subinterpreters. */ while (interp != NULL) { - /* Make a tstate for finalization. */ - PyThreadState *tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_FINI); - if (tstate == NULL) { - // XXX Some graceful way to always get a thread state? - Py_FatalError("thread state allocation failed"); + assert(!_PyInterpreterState_IsRunningMain(interp)); + + /* Find the tstate to use for fini. We assume the interpreter + will have at most one tstate at this point. */ + PyThreadState *tstate = interp->threads.head; + if (tstate != NULL) { + /* Ideally we would be able to use tstate as-is, and rely + on it being in a ready state: no exception set, not + running anything (tstate->current_frame), matching the + current thread ID (tstate->thread_id). To play it safe, + we always delete it and use a fresh tstate instead. */ + assert(tstate != final_tstate); + _PyThreadState_Attach(tstate); + PyThreadState_Clear(tstate); + _PyThreadState_Detach(tstate); + PyThreadState_Delete(tstate); } - - /* Enter the subinterpreter. */ - _PyThreadState_Attach(tstate); + tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_FINI); /* Destroy the subinterpreter. */ + _PyThreadState_Attach(tstate); Py_EndInterpreter(tstate); assert(_PyThreadState_GET() == NULL); From 92f85ff3a07335e32a0e00a55b7b6aaf3657019b Mon Sep 17 00:00:00 2001 From: Sahil Shah Date: Mon, 19 May 2025 15:28:09 -0400 Subject: [PATCH 214/989] gh-80184: Set getattr(socket, "SOMAXCONN", 5) as the default queue size for TCPServer (GH-134249) socketserver.TCPServer default queue size becomes SOMAXCONN instead of 5 when possible. --- Doc/library/socketserver.rst | 2 ++ Lib/socketserver.py | 2 +- .../next/Library/2025-05-19-17-27-21.gh-issue-80184.LOkbaw.rst | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-19-17-27-21.gh-issue-80184.LOkbaw.rst diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 59cfa136a3b7da..753f12460b824b 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -24,6 +24,8 @@ There are four basic concrete server classes: :meth:`~BaseServer.server_activate`. The other parameters are passed to the :class:`BaseServer` base class. + .. versionchanged:: next + The default queue size is now ``socket.SOMAXCONN`` for :class:`socketserver.TCPServer`. .. class:: UDPServer(server_address, RequestHandlerClass, bind_and_activate=True) diff --git a/Lib/socketserver.py b/Lib/socketserver.py index 35b2723de3babe..93b0a23be27f68 100644 --- a/Lib/socketserver.py +++ b/Lib/socketserver.py @@ -441,7 +441,7 @@ class TCPServer(BaseServer): socket_type = socket.SOCK_STREAM - request_queue_size = 5 + request_queue_size = getattr(socket, "SOMAXCONN", 5) allow_reuse_address = False diff --git a/Misc/NEWS.d/next/Library/2025-05-19-17-27-21.gh-issue-80184.LOkbaw.rst b/Misc/NEWS.d/next/Library/2025-05-19-17-27-21.gh-issue-80184.LOkbaw.rst new file mode 100644 index 00000000000000..089268dc4c3e7b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-19-17-27-21.gh-issue-80184.LOkbaw.rst @@ -0,0 +1 @@ +The default queue size is now ``socket.SOMAXCONN`` for :class:`socketserver.TCPServer`. From 42d03f393313d8a228a45dad1d0897ea99f5ec89 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Mon, 19 May 2025 15:48:55 -0400 Subject: [PATCH 215/989] GH-131798: Split CALL_LIST_APPEND into several uops (GH-134240) --- Include/internal/pycore_opcode_metadata.h | 4 +- Include/internal/pycore_uop_ids.h | 392 +++++++++--------- Include/internal/pycore_uop_metadata.h | 8 + Lib/test/test_capi/test_opt.py | 15 + ...-05-19-15-15-58.gh-issue-131798.PCP71j.rst | 1 + Python/bytecodes.c | 27 +- Python/executor_cases.c.h | 35 +- Python/generated_cases.c.h | 113 +++-- Python/optimizer_bytecodes.c | 15 + Python/optimizer_cases.c.h | 21 + 10 files changed, 371 insertions(+), 260 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-19-15-15-58.gh-issue-131798.PCP71j.rst diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 17811d301a92c3..33e9804e59379b 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1121,7 +1121,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1368,7 +1368,7 @@ _PyOpcode_macro_expansion[256] = { [CALL_KW_NON_PY] = { .nuops = 3, .uops = { { _CHECK_IS_NOT_PY_CALLABLE_KW, OPARG_SIMPLE, 3 }, { _CALL_KW_NON_PY, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_KW_PY] = { .nuops = 5, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION_KW, 2, 1 }, { _PY_FRAME_KW, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_LEN] = { .nuops = 3, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_LEN, OPARG_SIMPLE, 3 }, { _CALL_LEN, OPARG_SIMPLE, 3 } } }, - [CALL_LIST_APPEND] = { .nuops = 1, .uops = { { _CALL_LIST_APPEND, OPARG_SIMPLE, 3 } } }, + [CALL_LIST_APPEND] = { .nuops = 4, .uops = { { _GUARD_CALLABLE_LIST_APPEND, OPARG_SIMPLE, 3 }, { _GUARD_NOS_NOT_NULL, OPARG_SIMPLE, 3 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 3 }, { _CALL_LIST_APPEND, OPARG_SIMPLE, 3 } } }, [CALL_METHOD_DESCRIPTOR_FAST] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_FAST, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_METHOD_DESCRIPTOR_NOARGS] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_NOARGS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 71a288a3a39179..481ec9307db3b5 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -46,124 +46,126 @@ extern "C" { #define _CALL_ISINSTANCE 324 #define _CALL_KW_NON_PY 325 #define _CALL_LEN 326 -#define _CALL_LIST_APPEND CALL_LIST_APPEND -#define _CALL_METHOD_DESCRIPTOR_FAST 327 -#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 328 -#define _CALL_METHOD_DESCRIPTOR_NOARGS 329 -#define _CALL_METHOD_DESCRIPTOR_O 330 -#define _CALL_NON_PY_GENERAL 331 -#define _CALL_STR_1 332 -#define _CALL_TUPLE_1 333 -#define _CALL_TYPE_1 334 -#define _CHECK_AND_ALLOCATE_OBJECT 335 -#define _CHECK_ATTR_CLASS 336 -#define _CHECK_ATTR_METHOD_LAZY_DICT 337 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 338 +#define _CALL_LIST_APPEND 327 +#define _CALL_METHOD_DESCRIPTOR_FAST 328 +#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 329 +#define _CALL_METHOD_DESCRIPTOR_NOARGS 330 +#define _CALL_METHOD_DESCRIPTOR_O 331 +#define _CALL_NON_PY_GENERAL 332 +#define _CALL_STR_1 333 +#define _CALL_TUPLE_1 334 +#define _CALL_TYPE_1 335 +#define _CHECK_AND_ALLOCATE_OBJECT 336 +#define _CHECK_ATTR_CLASS 337 +#define _CHECK_ATTR_METHOD_LAZY_DICT 338 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 339 #define _CHECK_EG_MATCH CHECK_EG_MATCH #define _CHECK_EXC_MATCH CHECK_EXC_MATCH -#define _CHECK_FUNCTION 339 -#define _CHECK_FUNCTION_EXACT_ARGS 340 -#define _CHECK_FUNCTION_VERSION 341 -#define _CHECK_FUNCTION_VERSION_INLINE 342 -#define _CHECK_FUNCTION_VERSION_KW 343 -#define _CHECK_IS_NOT_PY_CALLABLE 344 -#define _CHECK_IS_NOT_PY_CALLABLE_KW 345 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 346 -#define _CHECK_METHOD_VERSION 347 -#define _CHECK_METHOD_VERSION_KW 348 -#define _CHECK_PEP_523 349 -#define _CHECK_PERIODIC 350 -#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 351 -#define _CHECK_RECURSION_REMAINING 352 -#define _CHECK_STACK_SPACE 353 -#define _CHECK_STACK_SPACE_OPERAND 354 -#define _CHECK_VALIDITY 355 -#define _COMPARE_OP 356 -#define _COMPARE_OP_FLOAT 357 -#define _COMPARE_OP_INT 358 -#define _COMPARE_OP_STR 359 -#define _CONTAINS_OP 360 -#define _CONTAINS_OP_DICT 361 -#define _CONTAINS_OP_SET 362 +#define _CHECK_FUNCTION 340 +#define _CHECK_FUNCTION_EXACT_ARGS 341 +#define _CHECK_FUNCTION_VERSION 342 +#define _CHECK_FUNCTION_VERSION_INLINE 343 +#define _CHECK_FUNCTION_VERSION_KW 344 +#define _CHECK_IS_NOT_PY_CALLABLE 345 +#define _CHECK_IS_NOT_PY_CALLABLE_KW 346 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 347 +#define _CHECK_METHOD_VERSION 348 +#define _CHECK_METHOD_VERSION_KW 349 +#define _CHECK_PEP_523 350 +#define _CHECK_PERIODIC 351 +#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 352 +#define _CHECK_RECURSION_REMAINING 353 +#define _CHECK_STACK_SPACE 354 +#define _CHECK_STACK_SPACE_OPERAND 355 +#define _CHECK_VALIDITY 356 +#define _COMPARE_OP 357 +#define _COMPARE_OP_FLOAT 358 +#define _COMPARE_OP_INT 359 +#define _COMPARE_OP_STR 360 +#define _CONTAINS_OP 361 +#define _CONTAINS_OP_DICT 362 +#define _CONTAINS_OP_SET 363 #define _CONVERT_VALUE CONVERT_VALUE #define _COPY COPY #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 363 +#define _CREATE_INIT_FRAME 364 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 364 +#define _DEOPT 365 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 365 -#define _DO_CALL_FUNCTION_EX 366 -#define _DO_CALL_KW 367 +#define _DO_CALL 366 +#define _DO_CALL_FUNCTION_EX 367 +#define _DO_CALL_KW 368 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 368 +#define _ERROR_POP_N 369 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 369 -#define _EXPAND_METHOD_KW 370 -#define _FATAL_ERROR 371 +#define _EXPAND_METHOD 370 +#define _EXPAND_METHOD_KW 371 +#define _FATAL_ERROR 372 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 372 -#define _FOR_ITER_GEN_FRAME 373 -#define _FOR_ITER_TIER_TWO 374 +#define _FOR_ITER 373 +#define _FOR_ITER_GEN_FRAME 374 +#define _FOR_ITER_TIER_TWO 375 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 375 -#define _GUARD_CALLABLE_ISINSTANCE 376 -#define _GUARD_CALLABLE_LEN 377 -#define _GUARD_CALLABLE_STR_1 378 -#define _GUARD_CALLABLE_TUPLE_1 379 -#define _GUARD_CALLABLE_TYPE_1 380 -#define _GUARD_DORV_NO_DICT 381 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 382 -#define _GUARD_GLOBALS_VERSION 383 -#define _GUARD_IS_FALSE_POP 384 -#define _GUARD_IS_NONE_POP 385 -#define _GUARD_IS_NOT_NONE_POP 386 -#define _GUARD_IS_TRUE_POP 387 -#define _GUARD_KEYS_VERSION 388 -#define _GUARD_NOS_DICT 389 -#define _GUARD_NOS_FLOAT 390 -#define _GUARD_NOS_INT 391 -#define _GUARD_NOS_LIST 392 -#define _GUARD_NOS_NULL 393 -#define _GUARD_NOS_TUPLE 394 -#define _GUARD_NOS_UNICODE 395 -#define _GUARD_NOT_EXHAUSTED_LIST 396 -#define _GUARD_NOT_EXHAUSTED_RANGE 397 -#define _GUARD_NOT_EXHAUSTED_TUPLE 398 -#define _GUARD_THIRD_NULL 399 -#define _GUARD_TOS_ANY_SET 400 -#define _GUARD_TOS_DICT 401 -#define _GUARD_TOS_FLOAT 402 -#define _GUARD_TOS_INT 403 -#define _GUARD_TOS_LIST 404 -#define _GUARD_TOS_SLICE 405 -#define _GUARD_TOS_TUPLE 406 -#define _GUARD_TOS_UNICODE 407 -#define _GUARD_TYPE_VERSION 408 -#define _GUARD_TYPE_VERSION_AND_LOCK 409 +#define _GUARD_BINARY_OP_EXTEND 376 +#define _GUARD_CALLABLE_ISINSTANCE 377 +#define _GUARD_CALLABLE_LEN 378 +#define _GUARD_CALLABLE_LIST_APPEND 379 +#define _GUARD_CALLABLE_STR_1 380 +#define _GUARD_CALLABLE_TUPLE_1 381 +#define _GUARD_CALLABLE_TYPE_1 382 +#define _GUARD_DORV_NO_DICT 383 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 384 +#define _GUARD_GLOBALS_VERSION 385 +#define _GUARD_IS_FALSE_POP 386 +#define _GUARD_IS_NONE_POP 387 +#define _GUARD_IS_NOT_NONE_POP 388 +#define _GUARD_IS_TRUE_POP 389 +#define _GUARD_KEYS_VERSION 390 +#define _GUARD_NOS_DICT 391 +#define _GUARD_NOS_FLOAT 392 +#define _GUARD_NOS_INT 393 +#define _GUARD_NOS_LIST 394 +#define _GUARD_NOS_NOT_NULL 395 +#define _GUARD_NOS_NULL 396 +#define _GUARD_NOS_TUPLE 397 +#define _GUARD_NOS_UNICODE 398 +#define _GUARD_NOT_EXHAUSTED_LIST 399 +#define _GUARD_NOT_EXHAUSTED_RANGE 400 +#define _GUARD_NOT_EXHAUSTED_TUPLE 401 +#define _GUARD_THIRD_NULL 402 +#define _GUARD_TOS_ANY_SET 403 +#define _GUARD_TOS_DICT 404 +#define _GUARD_TOS_FLOAT 405 +#define _GUARD_TOS_INT 406 +#define _GUARD_TOS_LIST 407 +#define _GUARD_TOS_SLICE 408 +#define _GUARD_TOS_TUPLE 409 +#define _GUARD_TOS_UNICODE 410 +#define _GUARD_TYPE_VERSION 411 +#define _GUARD_TYPE_VERSION_AND_LOCK 412 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 410 -#define _INIT_CALL_PY_EXACT_ARGS 411 -#define _INIT_CALL_PY_EXACT_ARGS_0 412 -#define _INIT_CALL_PY_EXACT_ARGS_1 413 -#define _INIT_CALL_PY_EXACT_ARGS_2 414 -#define _INIT_CALL_PY_EXACT_ARGS_3 415 -#define _INIT_CALL_PY_EXACT_ARGS_4 416 -#define _INSERT_NULL 417 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 413 +#define _INIT_CALL_PY_EXACT_ARGS 414 +#define _INIT_CALL_PY_EXACT_ARGS_0 415 +#define _INIT_CALL_PY_EXACT_ARGS_1 416 +#define _INIT_CALL_PY_EXACT_ARGS_2 417 +#define _INIT_CALL_PY_EXACT_ARGS_3 418 +#define _INIT_CALL_PY_EXACT_ARGS_4 419 +#define _INSERT_NULL 420 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -173,163 +175,163 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 418 +#define _IS_NONE 421 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 419 -#define _ITER_CHECK_RANGE 420 -#define _ITER_CHECK_TUPLE 421 -#define _ITER_JUMP_LIST 422 -#define _ITER_JUMP_RANGE 423 -#define _ITER_JUMP_TUPLE 424 -#define _ITER_NEXT_LIST 425 -#define _ITER_NEXT_LIST_TIER_TWO 426 -#define _ITER_NEXT_RANGE 427 -#define _ITER_NEXT_TUPLE 428 -#define _JUMP_TO_TOP 429 +#define _ITER_CHECK_LIST 422 +#define _ITER_CHECK_RANGE 423 +#define _ITER_CHECK_TUPLE 424 +#define _ITER_JUMP_LIST 425 +#define _ITER_JUMP_RANGE 426 +#define _ITER_JUMP_TUPLE 427 +#define _ITER_NEXT_LIST 428 +#define _ITER_NEXT_LIST_TIER_TWO 429 +#define _ITER_NEXT_RANGE 430 +#define _ITER_NEXT_TUPLE 431 +#define _JUMP_TO_TOP 432 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 430 -#define _LOAD_ATTR_CLASS 431 +#define _LOAD_ATTR 433 +#define _LOAD_ATTR_CLASS 434 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 432 -#define _LOAD_ATTR_METHOD_LAZY_DICT 433 -#define _LOAD_ATTR_METHOD_NO_DICT 434 -#define _LOAD_ATTR_METHOD_WITH_VALUES 435 -#define _LOAD_ATTR_MODULE 436 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 437 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 438 -#define _LOAD_ATTR_PROPERTY_FRAME 439 -#define _LOAD_ATTR_SLOT 440 -#define _LOAD_ATTR_WITH_HINT 441 +#define _LOAD_ATTR_INSTANCE_VALUE 435 +#define _LOAD_ATTR_METHOD_LAZY_DICT 436 +#define _LOAD_ATTR_METHOD_NO_DICT 437 +#define _LOAD_ATTR_METHOD_WITH_VALUES 438 +#define _LOAD_ATTR_MODULE 439 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 440 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 441 +#define _LOAD_ATTR_PROPERTY_FRAME 442 +#define _LOAD_ATTR_SLOT 443 +#define _LOAD_ATTR_WITH_HINT 444 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 442 +#define _LOAD_BYTECODE 445 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST #define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL -#define _LOAD_CONST_INLINE 443 -#define _LOAD_CONST_INLINE_BORROW 444 +#define _LOAD_CONST_INLINE 446 +#define _LOAD_CONST_INLINE_BORROW 447 #define _LOAD_CONST_MORTAL LOAD_CONST_MORTAL #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 445 -#define _LOAD_FAST_0 446 -#define _LOAD_FAST_1 447 -#define _LOAD_FAST_2 448 -#define _LOAD_FAST_3 449 -#define _LOAD_FAST_4 450 -#define _LOAD_FAST_5 451 -#define _LOAD_FAST_6 452 -#define _LOAD_FAST_7 453 +#define _LOAD_FAST 448 +#define _LOAD_FAST_0 449 +#define _LOAD_FAST_1 450 +#define _LOAD_FAST_2 451 +#define _LOAD_FAST_3 452 +#define _LOAD_FAST_4 453 +#define _LOAD_FAST_5 454 +#define _LOAD_FAST_6 455 +#define _LOAD_FAST_7 456 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 454 -#define _LOAD_FAST_BORROW_0 455 -#define _LOAD_FAST_BORROW_1 456 -#define _LOAD_FAST_BORROW_2 457 -#define _LOAD_FAST_BORROW_3 458 -#define _LOAD_FAST_BORROW_4 459 -#define _LOAD_FAST_BORROW_5 460 -#define _LOAD_FAST_BORROW_6 461 -#define _LOAD_FAST_BORROW_7 462 +#define _LOAD_FAST_BORROW 457 +#define _LOAD_FAST_BORROW_0 458 +#define _LOAD_FAST_BORROW_1 459 +#define _LOAD_FAST_BORROW_2 460 +#define _LOAD_FAST_BORROW_3 461 +#define _LOAD_FAST_BORROW_4 462 +#define _LOAD_FAST_BORROW_5 463 +#define _LOAD_FAST_BORROW_6 464 +#define _LOAD_FAST_BORROW_7 465 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 463 -#define _LOAD_GLOBAL_BUILTINS 464 -#define _LOAD_GLOBAL_MODULE 465 +#define _LOAD_GLOBAL 466 +#define _LOAD_GLOBAL_BUILTINS 467 +#define _LOAD_GLOBAL_MODULE 468 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 466 -#define _LOAD_SMALL_INT_0 467 -#define _LOAD_SMALL_INT_1 468 -#define _LOAD_SMALL_INT_2 469 -#define _LOAD_SMALL_INT_3 470 -#define _LOAD_SPECIAL 471 +#define _LOAD_SMALL_INT 469 +#define _LOAD_SMALL_INT_0 470 +#define _LOAD_SMALL_INT_1 471 +#define _LOAD_SMALL_INT_2 472 +#define _LOAD_SMALL_INT_3 473 +#define _LOAD_SPECIAL 474 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 472 +#define _MAKE_CALLARGS_A_TUPLE 475 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 473 +#define _MAKE_WARM 476 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 474 -#define _MAYBE_EXPAND_METHOD_KW 475 -#define _MONITOR_CALL 476 -#define _MONITOR_CALL_KW 477 -#define _MONITOR_JUMP_BACKWARD 478 -#define _MONITOR_RESUME 479 +#define _MAYBE_EXPAND_METHOD 477 +#define _MAYBE_EXPAND_METHOD_KW 478 +#define _MONITOR_CALL 479 +#define _MONITOR_CALL_KW 480 +#define _MONITOR_JUMP_BACKWARD 481 +#define _MONITOR_RESUME 482 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 480 -#define _POP_JUMP_IF_TRUE 481 +#define _POP_JUMP_IF_FALSE 483 +#define _POP_JUMP_IF_TRUE 484 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 482 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 483 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 484 +#define _POP_TOP_LOAD_CONST_INLINE 485 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 486 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 487 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 485 +#define _PUSH_FRAME 488 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 486 -#define _PY_FRAME_GENERAL 487 -#define _PY_FRAME_KW 488 -#define _QUICKEN_RESUME 489 -#define _REPLACE_WITH_TRUE 490 +#define _PUSH_NULL_CONDITIONAL 489 +#define _PY_FRAME_GENERAL 490 +#define _PY_FRAME_KW 491 +#define _QUICKEN_RESUME 492 +#define _REPLACE_WITH_TRUE 493 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 491 -#define _SEND 492 -#define _SEND_GEN_FRAME 493 +#define _SAVE_RETURN_OFFSET 494 +#define _SEND 495 +#define _SEND_GEN_FRAME 496 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 494 -#define _STORE_ATTR 495 -#define _STORE_ATTR_INSTANCE_VALUE 496 -#define _STORE_ATTR_SLOT 497 -#define _STORE_ATTR_WITH_HINT 498 +#define _START_EXECUTOR 497 +#define _STORE_ATTR 498 +#define _STORE_ATTR_INSTANCE_VALUE 499 +#define _STORE_ATTR_SLOT 500 +#define _STORE_ATTR_WITH_HINT 501 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 499 -#define _STORE_FAST_0 500 -#define _STORE_FAST_1 501 -#define _STORE_FAST_2 502 -#define _STORE_FAST_3 503 -#define _STORE_FAST_4 504 -#define _STORE_FAST_5 505 -#define _STORE_FAST_6 506 -#define _STORE_FAST_7 507 +#define _STORE_FAST 502 +#define _STORE_FAST_0 503 +#define _STORE_FAST_1 504 +#define _STORE_FAST_2 505 +#define _STORE_FAST_3 506 +#define _STORE_FAST_4 507 +#define _STORE_FAST_5 508 +#define _STORE_FAST_6 509 +#define _STORE_FAST_7 510 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 508 -#define _STORE_SUBSCR 509 -#define _STORE_SUBSCR_DICT 510 -#define _STORE_SUBSCR_LIST_INT 511 +#define _STORE_SLICE 511 +#define _STORE_SUBSCR 512 +#define _STORE_SUBSCR_DICT 513 +#define _STORE_SUBSCR_LIST_INT 514 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 512 -#define _TO_BOOL 513 +#define _TIER2_RESUME_CHECK 515 +#define _TO_BOOL 516 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 514 +#define _TO_BOOL_LIST 517 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 515 +#define _TO_BOOL_STR 518 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 516 -#define _UNPACK_SEQUENCE_LIST 517 -#define _UNPACK_SEQUENCE_TUPLE 518 -#define _UNPACK_SEQUENCE_TWO_TUPLE 519 +#define _UNPACK_SEQUENCE 519 +#define _UNPACK_SEQUENCE_LIST 520 +#define _UNPACK_SEQUENCE_TUPLE 521 +#define _UNPACK_SEQUENCE_TWO_TUPLE 522 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 519 +#define MAX_UOP_ID 522 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 88a2e538447034..d90b074b409f88 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -247,6 +247,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_PUSH_FRAME] = 0, [_GUARD_NOS_NULL] = HAS_DEOPT_FLAG, + [_GUARD_NOS_NOT_NULL] = HAS_EXIT_FLAG, [_GUARD_THIRD_NULL] = HAS_DEOPT_FLAG, [_GUARD_CALLABLE_TYPE_1] = HAS_DEOPT_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, @@ -265,6 +266,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_LEN] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_GUARD_CALLABLE_ISINSTANCE] = HAS_DEOPT_FLAG, [_CALL_ISINSTANCE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_CALLABLE_LIST_APPEND] = HAS_DEOPT_FLAG, [_CALL_LIST_APPEND] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -429,6 +431,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_BINARY_OP_EXTEND] = "_GUARD_BINARY_OP_EXTEND", [_GUARD_CALLABLE_ISINSTANCE] = "_GUARD_CALLABLE_ISINSTANCE", [_GUARD_CALLABLE_LEN] = "_GUARD_CALLABLE_LEN", + [_GUARD_CALLABLE_LIST_APPEND] = "_GUARD_CALLABLE_LIST_APPEND", [_GUARD_CALLABLE_STR_1] = "_GUARD_CALLABLE_STR_1", [_GUARD_CALLABLE_TUPLE_1] = "_GUARD_CALLABLE_TUPLE_1", [_GUARD_CALLABLE_TYPE_1] = "_GUARD_CALLABLE_TYPE_1", @@ -444,6 +447,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_NOS_FLOAT] = "_GUARD_NOS_FLOAT", [_GUARD_NOS_INT] = "_GUARD_NOS_INT", [_GUARD_NOS_LIST] = "_GUARD_NOS_LIST", + [_GUARD_NOS_NOT_NULL] = "_GUARD_NOS_NOT_NULL", [_GUARD_NOS_NULL] = "_GUARD_NOS_NULL", [_GUARD_NOS_TUPLE] = "_GUARD_NOS_TUPLE", [_GUARD_NOS_UNICODE] = "_GUARD_NOS_UNICODE", @@ -1072,6 +1076,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _GUARD_NOS_NULL: return 0; + case _GUARD_NOS_NOT_NULL: + return 0; case _GUARD_THIRD_NULL: return 0; case _GUARD_CALLABLE_TYPE_1: @@ -1108,6 +1114,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _CALL_ISINSTANCE: return 4; + case _GUARD_CALLABLE_LIST_APPEND: + return 0; case _CALL_LIST_APPEND: return 3; case _CALL_METHOD_DESCRIPTOR_O: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 73ce55400adcdf..305c53ffd816de 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1959,6 +1959,21 @@ def testfunc(n): self.assertNotIn("_GUARD_THIRD_NULL", uops) self.assertNotIn("_GUARD_CALLABLE_ISINSTANCE", uops) + def test_call_list_append(self): + def testfunc(n): + a = [] + for i in range(n): + a.append(i) + return sum(a) + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, sum(range(TIER2_THRESHOLD))) + uops = get_opnames(ex) + self.assertIn("_CALL_LIST_APPEND", uops) + # We should remove these in the future + self.assertIn("_GUARD_NOS_LIST", uops) + self.assertIn("_GUARD_CALLABLE_LIST_APPEND", uops) + def test_call_isinstance_is_true(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-19-15-15-58.gh-issue-131798.PCP71j.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-19-15-15-58.gh-issue-131798.PCP71j.rst new file mode 100644 index 00000000000000..c816a0afad4166 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-19-15-15-58.gh-issue-131798.PCP71j.rst @@ -0,0 +1 @@ +Split ``CALL_LIST_APPEND`` into several uops. Patch by Diego Russo. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 42e4f581894d1f..c10327916fc867 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4041,6 +4041,11 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); } + op(_GUARD_NOS_NOT_NULL, (nos, unused -- nos, unused)) { + PyObject *o = PyStackRef_AsPyObjectBorrow(nos); + EXIT_IF(o == NULL); + } + op(_GUARD_THIRD_NULL, (null, unused, unused -- null, unused, unused)) { DEOPT_IF(!PyStackRef_IsNull(null)); } @@ -4394,16 +4399,26 @@ dummy_func( _GUARD_CALLABLE_ISINSTANCE + _CALL_ISINSTANCE; + macro(CALL_LIST_APPEND) = + unused/1 + + unused/2 + + _GUARD_CALLABLE_LIST_APPEND + + _GUARD_NOS_NOT_NULL + + _GUARD_NOS_LIST + + _CALL_LIST_APPEND; + + op(_GUARD_CALLABLE_LIST_APPEND, (callable, unused, unused -- callable, unused, unused)){ + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyInterpreterState *interp = tstate->interp; + DEOPT_IF(callable_o != interp->callable_cache.list_append); + } + // This is secretly a super-instruction - inst(CALL_LIST_APPEND, (unused/1, unused/2, callable, self, arg -- )) { + op(_CALL_LIST_APPEND, (callable, self, arg -- )) { assert(oparg == 1); - PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); PyObject *self_o = PyStackRef_AsPyObjectBorrow(self); - PyInterpreterState *interp = tstate->interp; - DEOPT_IF(callable_o != interp->callable_cache.list_append); - DEOPT_IF(self_o == NULL); - DEOPT_IF(!PyList_Check(self_o)); + DEOPT_IF(!PyList_CheckExact(self_o)); DEOPT_IF(!LOCK_OBJECT(self_o)); STAT_INC(CALL, hit); int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 41c9bd5ba7089a..c5e481932e7b29 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5276,6 +5276,17 @@ break; } + case _GUARD_NOS_NOT_NULL: { + _PyStackRef nos; + nos = stack_pointer[-2]; + PyObject *o = PyStackRef_AsPyObjectBorrow(nos); + if (o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + case _GUARD_THIRD_NULL: { _PyStackRef null; null = stack_pointer[-3]; @@ -5920,6 +5931,18 @@ break; } + case _GUARD_CALLABLE_LIST_APPEND: { + _PyStackRef callable; + callable = stack_pointer[-3]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyInterpreterState *interp = tstate->interp; + if (callable_o != interp->callable_cache.list_append) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + case _CALL_LIST_APPEND: { _PyStackRef arg; _PyStackRef self; @@ -5929,18 +5952,8 @@ self = stack_pointer[-2]; callable = stack_pointer[-3]; assert(oparg == 1); - PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); PyObject *self_o = PyStackRef_AsPyObjectBorrow(self); - PyInterpreterState *interp = tstate->interp; - if (callable_o != interp->callable_cache.list_append) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - if (self_o == NULL) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - if (!PyList_Check(self_o)) { + if (!PyList_CheckExact(self_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index b3f2a2067f7f11..cb9bfcbf2a75ac 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3475,58 +3475,79 @@ INSTRUCTION_STATS(CALL_LIST_APPEND); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); _PyStackRef callable; + _PyStackRef nos; _PyStackRef self; _PyStackRef arg; /* Skip 1 cache entry */ /* Skip 2 cache entries */ - arg = stack_pointer[-1]; - self = stack_pointer[-2]; - callable = stack_pointer[-3]; - assert(oparg == 1); - PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - PyObject *self_o = PyStackRef_AsPyObjectBorrow(self); - PyInterpreterState *interp = tstate->interp; - if (callable_o != interp->callable_cache.list_append) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - if (self_o == NULL) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - if (!PyList_Check(self_o)) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - if (!LOCK_OBJECT(self_o)) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - STAT_INC(CALL, hit); - int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); - UNLOCK_OBJECT(self_o); - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(self); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(callable); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (err) { - JUMP_TO_LABEL(error); + // _GUARD_CALLABLE_LIST_APPEND + { + callable = stack_pointer[-3]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyInterpreterState *interp = tstate->interp; + if (callable_o != interp->callable_cache.list_append) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } } - #if TIER_ONE + // _GUARD_NOS_NOT_NULL + { + nos = stack_pointer[-2]; + PyObject *o = PyStackRef_AsPyObjectBorrow(nos); + if (o == NULL) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } + } + // _GUARD_NOS_LIST + { + PyObject *o = PyStackRef_AsPyObjectBorrow(nos); + if (!PyList_CheckExact(o)) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } + } + // _CALL_LIST_APPEND + { + arg = stack_pointer[-1]; + self = nos; + assert(oparg == 1); + PyObject *self_o = PyStackRef_AsPyObjectBorrow(self); + if (!PyList_CheckExact(self_o)) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } + if (!LOCK_OBJECT(self_o)) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } + STAT_INC(CALL, hit); + int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + UNLOCK_OBJECT(self_o); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(self); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (err) { + JUMP_TO_LABEL(error); + } + #if TIER_ONE - assert(next_instr->op.code == POP_TOP); - SKIP_OVER(1); - #endif + assert(next_instr->op.code == POP_TOP); + SKIP_OVER(1); + #endif + } DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index ad25c68e62aab2..708b436f6e4ae6 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1087,6 +1087,13 @@ dummy_func(void) { sym_set_null(null); } + op(_GUARD_NOS_NOT_NULL, (nos, unused -- nos, unused)) { + if (sym_is_not_null(nos)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_non_null(nos); + } + op(_GUARD_THIRD_NULL, (null, unused, unused -- null, unused, unused)) { if (sym_is_null(null)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -1135,6 +1142,14 @@ dummy_func(void) { sym_set_const(callable, isinstance); } + op(_GUARD_CALLABLE_LIST_APPEND, (callable, unused, unused -- callable, unused, unused)) { + PyObject *list_append = _PyInterpreterState_GET()->callable_cache.list_append; + if (sym_get_const(ctx, callable) == list_append) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_const(callable, list_append); + } + // END BYTECODES // } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 5d469765203833..3ac99e5c1b1707 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1935,6 +1935,16 @@ break; } + case _GUARD_NOS_NOT_NULL: { + JitOptSymbol *nos; + nos = stack_pointer[-2]; + if (sym_is_not_null(nos)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_non_null(nos); + break; + } + case _GUARD_THIRD_NULL: { JitOptSymbol *null; null = stack_pointer[-3]; @@ -2146,6 +2156,17 @@ break; } + case _GUARD_CALLABLE_LIST_APPEND: { + JitOptSymbol *callable; + callable = stack_pointer[-3]; + PyObject *list_append = _PyInterpreterState_GET()->callable_cache.list_append; + if (sym_get_const(ctx, callable) == list_append) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_const(callable, list_append); + break; + } + case _CALL_LIST_APPEND: { stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); From 470941782f74288823b445120f6383914b659f23 Mon Sep 17 00:00:00 2001 From: John Keith Hohm Date: Mon, 19 May 2025 16:48:55 -0400 Subject: [PATCH 216/989] gh-88994: Change `datetime.datetime.now` to half-even rounding (#134258) Change `datetime.datetime.now` to half-even rounding for consistency with `datetime.fromtimestamp`. --- Misc/ACKS | 1 + .../next/Library/2025-05-19-18-12-42.gh-issue-88994.7avvVu.rst | 3 +++ Modules/_datetimemodule.c | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-19-18-12-42.gh-issue-88994.7avvVu.rst diff --git a/Misc/ACKS b/Misc/ACKS index 5653c52c9e354e..1b500870dec472 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -795,6 +795,7 @@ Albert Hofkamp Chris Hogan Tomas Hoger Jonathan Hogg +John Keith Hohm Vladyslav Hoi Kamilla Holanda Steve Holden diff --git a/Misc/NEWS.d/next/Library/2025-05-19-18-12-42.gh-issue-88994.7avvVu.rst b/Misc/NEWS.d/next/Library/2025-05-19-18-12-42.gh-issue-88994.7avvVu.rst new file mode 100644 index 00000000000000..554a0c3bcb26bf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-19-18-12-42.gh-issue-88994.7avvVu.rst @@ -0,0 +1,3 @@ +Change :func:`datetime.datetime.now` to half-even rounding for +consistency with :func:`datetime.datetime.fromtimestamp`. Patch by +John Keith Hohm. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 313a72e3fe0668..eb90be81c8d34c 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5551,8 +5551,9 @@ datetime_best_possible(PyObject *cls, TM_FUNC f, PyObject *tzinfo) time_t secs; int us; - if (_PyTime_AsTimevalTime_t(ts, &secs, &us, _PyTime_ROUND_FLOOR) < 0) + if (_PyTime_AsTimevalTime_t(ts, &secs, &us, _PyTime_ROUND_HALF_EVEN) < 0) { return NULL; + } assert(0 <= us && us <= 999999); return datetime_from_timet_and_us(cls, f, secs, us, tzinfo); From 8421b03b16a4852a527256cb7cdce2ab2d318548 Mon Sep 17 00:00:00 2001 From: Tom Wang <85062819+tommix626@users.noreply.github.com> Date: Mon, 19 May 2025 17:21:30 -0400 Subject: [PATCH 217/989] gh-134235: Import Autocomplete for Builtin Modules (GH-134277) * added enhancement auto completing import with sys builtins --------- Co-authored-by: Hunter --- Lib/_pyrepl/_module_completer.py | 5 +++-- Lib/test/test_pyrepl/test_pyrepl.py | 20 +++++++++++++++++++ ...-05-19-15-05-24.gh-issue-134235.pz9PwV.rst | 2 ++ 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-19-15-05-24.gh-issue-134235.pz9PwV.rst diff --git a/Lib/_pyrepl/_module_completer.py b/Lib/_pyrepl/_module_completer.py index 347f05607c75c5..0606797226d1e0 100644 --- a/Lib/_pyrepl/_module_completer.py +++ b/Lib/_pyrepl/_module_completer.py @@ -81,8 +81,9 @@ def find_modules(self, path: str, prefix: str) -> list[str]: def _find_modules(self, path: str, prefix: str) -> list[str]: if not path: # Top-level import (e.g. `import foo`` or `from foo`)` - return [name for _, name, _ in self.global_cache - if name.startswith(prefix)] + builtin_modules = [name for name in sys.builtin_module_names if name.startswith(prefix)] + third_party_modules = [name for _, name, _ in self.global_cache if name.startswith(prefix)] + return sorted(builtin_modules + third_party_modules) if path.startswith('.'): # Convert relative path to absolute path diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index a1cfbe4c3cadb7..59f5d1f893f9fd 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -959,6 +959,26 @@ def test_import_completions(self): output = reader.readline() self.assertEqual(output, expected) + def test_builtin_completion_top_level(self): + import importlib + # Make iter_modules() search only the standard library. + # This makes the test more reliable in case there are + # other user packages/scripts on PYTHONPATH which can + # intefere with the completions. + lib_path = os.path.dirname(importlib.__path__[0]) + sys.path = [lib_path] + + cases = ( + ("import bui\t\n", "import builtins"), + ("from bui\t\n", "from builtins"), + ) + for code, expected in cases: + with self.subTest(code=code): + events = code_to_events(code) + reader = self.prepare_reader(events, namespace={}) + output = reader.readline() + self.assertEqual(output, expected) + def test_relative_import_completions(self): cases = ( ("from .readl\t\n", "from .readline"), diff --git a/Misc/NEWS.d/next/Library/2025-05-19-15-05-24.gh-issue-134235.pz9PwV.rst b/Misc/NEWS.d/next/Library/2025-05-19-15-05-24.gh-issue-134235.pz9PwV.rst new file mode 100644 index 00000000000000..a65df886919145 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-19-15-05-24.gh-issue-134235.pz9PwV.rst @@ -0,0 +1,2 @@ +Updated tab completion on REPL to include builtin modules. Contributed by +Tom Wang, Hunter Young From 1fbb0603a87669562e964cade336b3384778fbe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 19 May 2025 23:59:14 +0200 Subject: [PATCH 218/989] gh-131178: remove runtime tests for `http.server` CLI (#134287) The runtime behavior of `http.server` CLI is hard to test on an arbitrary platform. As such, tests asserting the correctness of `python -m http.server` are temporarily removed and will be rewritten later once a universal solution has been found. --- Lib/test/test_httpservers.py | 83 ------------------------------------ 1 file changed, 83 deletions(-) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 429d9005bd7605..0af1c45ecb2a8b 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -21,7 +21,6 @@ import html import http, http.client import urllib.parse -import urllib.request import tempfile import time import datetime @@ -34,8 +33,6 @@ from test.support import ( is_apple, import_helper, os_helper, threading_helper ) -from test.support.script_helper import kill_python, spawn_python -from test.support.socket_helper import find_unused_port try: import ssl @@ -1455,86 +1452,6 @@ def test_unknown_flag(self, _): self.assertIn('error', stderr.getvalue()) -class CommandLineRunTimeTestCase(unittest.TestCase): - served_data = os.urandom(32) - served_file_name = 'served_filename' - tls_cert = certdata_file('ssl_cert.pem') - tls_key = certdata_file('ssl_key.pem') - tls_password = 'somepass' - - def setUp(self): - super().setUp() - with open(self.served_file_name, 'wb') as f: - f.write(self.served_data) - self.addCleanup(os_helper.unlink, self.served_file_name) - self.tls_password_file = tempfile.mktemp() - with open(self.tls_password_file, 'wb') as f: - f.write(self.tls_password.encode()) - self.addCleanup(os_helper.unlink, self.tls_password_file) - - def fetch_file(self, path): - context = ssl.create_default_context() - # allow self-signed certificates - context.check_hostname = False - context.verify_mode = ssl.CERT_NONE - req = urllib.request.Request(path, method='GET') - with urllib.request.urlopen(req, context=context) as res: - return res.read() - - def parse_cli_output(self, output): - matches = re.search(r'\((https?)://([^/:]+):(\d+)/?\)', output) - if matches is None: - return None, None, None - return matches.group(1), matches.group(2), int(matches.group(3)) - - def wait_for_server(self, proc, protocol, port, bind, timeout=50): - """Check the server process output. - - Return True if the server was successfully started - and is listening on the given port and bind address. - """ - while timeout > 0: - line = proc.stdout.readline() - if not line: - time.sleep(0.1) - timeout -= 1 - continue - protocol_, host_, port_ = self.parse_cli_output(line) - if not protocol_ or not host_ or not port_: - time.sleep(0.1) - timeout -= 1 - continue - if protocol_ == protocol and host_ == bind and port_ == port: - return True - break - return False - - def test_http_client(self): - port = find_unused_port() - bind = '127.0.0.1' - proc = spawn_python('-u', '-m', 'http.server', str(port), '-b', bind, - bufsize=1, text=True) - self.addCleanup(kill_python, proc) - self.addCleanup(proc.terminate) - self.assertTrue(self.wait_for_server(proc, 'http', port, bind)) - res = self.fetch_file(f'http://{bind}:{port}/{self.served_file_name}') - self.assertEqual(res, self.served_data) - - def test_https_client(self): - port = find_unused_port() - bind = '127.0.0.1' - proc = spawn_python('-u', '-m', 'http.server', str(port), '-b', bind, - '--tls-cert', self.tls_cert, - '--tls-key', self.tls_key, - '--tls-password-file', self.tls_password_file, - bufsize=1, text=True) - self.addCleanup(kill_python, proc) - self.addCleanup(proc.terminate) - self.assertTrue(self.wait_for_server(proc, 'https', port, bind)) - res = self.fetch_file(f'https://{bind}:{port}/{self.served_file_name}') - self.assertEqual(res, self.served_data) - - def setUpModule(): unittest.addModuleCleanup(os.chdir, os.getcwd()) From a7f317d7300235d9efff5a0350b1bae14720d42f Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Mon, 19 May 2025 18:00:53 -0400 Subject: [PATCH 219/989] GH-131798: Add _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW (GH-134268) --- Include/internal/pycore_uop_ids.h | 83 ++++++++++--------- Include/internal/pycore_uop_metadata.h | 4 + Lib/test/test_capi/test_opt.py | 12 ++- ...-05-19-20-52-53.gh-issue-134268.HPKX1e.rst | 2 + Python/bytecodes.c | 9 ++ Python/executor_cases.c.h | 34 ++++++++ Python/optimizer_bytecodes.c | 12 ++- Python/optimizer_cases.c.h | 18 +++- 8 files changed, 121 insertions(+), 53 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-19-20-52-53.gh-issue-134268.HPKX1e.rst diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 481ec9307db3b5..322f3cc093f7bb 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -265,73 +265,74 @@ extern "C" { #define _MONITOR_JUMP_BACKWARD 481 #define _MONITOR_RESUME 482 #define _NOP NOP +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 483 #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 483 -#define _POP_JUMP_IF_TRUE 484 +#define _POP_JUMP_IF_FALSE 484 +#define _POP_JUMP_IF_TRUE 485 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 485 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 486 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 487 +#define _POP_TOP_LOAD_CONST_INLINE 486 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 487 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 488 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 488 +#define _PUSH_FRAME 489 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 489 -#define _PY_FRAME_GENERAL 490 -#define _PY_FRAME_KW 491 -#define _QUICKEN_RESUME 492 -#define _REPLACE_WITH_TRUE 493 +#define _PUSH_NULL_CONDITIONAL 490 +#define _PY_FRAME_GENERAL 491 +#define _PY_FRAME_KW 492 +#define _QUICKEN_RESUME 493 +#define _REPLACE_WITH_TRUE 494 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 494 -#define _SEND 495 -#define _SEND_GEN_FRAME 496 +#define _SAVE_RETURN_OFFSET 495 +#define _SEND 496 +#define _SEND_GEN_FRAME 497 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 497 -#define _STORE_ATTR 498 -#define _STORE_ATTR_INSTANCE_VALUE 499 -#define _STORE_ATTR_SLOT 500 -#define _STORE_ATTR_WITH_HINT 501 +#define _START_EXECUTOR 498 +#define _STORE_ATTR 499 +#define _STORE_ATTR_INSTANCE_VALUE 500 +#define _STORE_ATTR_SLOT 501 +#define _STORE_ATTR_WITH_HINT 502 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 502 -#define _STORE_FAST_0 503 -#define _STORE_FAST_1 504 -#define _STORE_FAST_2 505 -#define _STORE_FAST_3 506 -#define _STORE_FAST_4 507 -#define _STORE_FAST_5 508 -#define _STORE_FAST_6 509 -#define _STORE_FAST_7 510 +#define _STORE_FAST 503 +#define _STORE_FAST_0 504 +#define _STORE_FAST_1 505 +#define _STORE_FAST_2 506 +#define _STORE_FAST_3 507 +#define _STORE_FAST_4 508 +#define _STORE_FAST_5 509 +#define _STORE_FAST_6 510 +#define _STORE_FAST_7 511 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 511 -#define _STORE_SUBSCR 512 -#define _STORE_SUBSCR_DICT 513 -#define _STORE_SUBSCR_LIST_INT 514 +#define _STORE_SLICE 512 +#define _STORE_SUBSCR 513 +#define _STORE_SUBSCR_DICT 514 +#define _STORE_SUBSCR_LIST_INT 515 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 515 -#define _TO_BOOL 516 +#define _TIER2_RESUME_CHECK 516 +#define _TO_BOOL 517 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 517 +#define _TO_BOOL_LIST 518 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 518 +#define _TO_BOOL_STR 519 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 519 -#define _UNPACK_SEQUENCE_LIST 520 -#define _UNPACK_SEQUENCE_TUPLE 521 -#define _UNPACK_SEQUENCE_TWO_TUPLE 522 +#define _UNPACK_SEQUENCE 520 +#define _UNPACK_SEQUENCE_LIST 521 +#define _UNPACK_SEQUENCE_TUPLE 522 +#define _UNPACK_SEQUENCE_TWO_TUPLE 523 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 522 +#define MAX_UOP_ID 523 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index d90b074b409f88..b334dd840c8fac 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -305,6 +305,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_START_EXECUTOR] = 0, [_MAKE_WARM] = 0, @@ -553,6 +554,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_MAYBE_EXPAND_METHOD] = "_MAYBE_EXPAND_METHOD", [_MAYBE_EXPAND_METHOD_KW] = "_MAYBE_EXPAND_METHOD_KW", [_NOP] = "_NOP", + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_TOP] = "_POP_TOP", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", @@ -1192,6 +1194,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _POP_TWO_LOAD_CONST_INLINE_BORROW: return 2; + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: + return 4; case _CHECK_FUNCTION: return 0; case _START_EXECUTOR: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 305c53ffd816de..f7a4200e1eec0e 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1955,9 +1955,10 @@ def testfunc(n): self.assertEqual(res, TIER2_THRESHOLD) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_GUARD_THIRD_NULL", uops) self.assertNotIn("_GUARD_CALLABLE_ISINSTANCE", uops) + self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_list_append(self): def testfunc(n): @@ -1987,9 +1988,10 @@ def testfunc(n): self.assertEqual(res, TIER2_THRESHOLD) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_TRUE_POP", uops) + self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_is_false(self): def testfunc(n): @@ -2004,9 +2006,10 @@ def testfunc(n): self.assertEqual(res, TIER2_THRESHOLD) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_FALSE_POP", uops) + self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_subclass(self): def testfunc(n): @@ -2021,9 +2024,10 @@ def testfunc(n): self.assertEqual(res, TIER2_THRESHOLD) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_CALL_ISINSTANCE", uops) + self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_TRUE_POP", uops) + self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_unknown_object(self): def testfunc(n): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-19-20-52-53.gh-issue-134268.HPKX1e.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-19-20-52-53.gh-issue-134268.HPKX1e.rst new file mode 100644 index 00000000000000..98d770cf054926 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-19-20-52-53.gh-issue-134268.HPKX1e.rst @@ -0,0 +1,2 @@ +Add ``_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW`` and use it to further +optimize ``CALL_ISINSTANCE``. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c10327916fc867..477a84b170ab98 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5342,6 +5342,15 @@ dummy_func( value = PyStackRef_FromPyObjectImmortal(ptr); } + tier2 pure op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop1, pop2 -- value)) { + PyStackRef_CLOSE(pop2); + PyStackRef_CLOSE(pop1); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectImmortal(ptr); + } + tier2 op(_CHECK_FUNCTION, (func_version/2 -- )) { assert(PyStackRef_FunctionCheck(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(frame->f_funcobj); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c5e481932e7b29..c3da7a63b9ed86 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7103,6 +7103,40 @@ break; } + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { + _PyStackRef pop2; + _PyStackRef pop1; + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + null = stack_pointer[-3]; + callable = stack_pointer[-4]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop2); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop1); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectImmortal(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _CHECK_FUNCTION: { uint32_t func_version = (uint32_t)CURRENT_OPERAND0(); assert(PyStackRef_FunctionCheck(frame->f_funcobj)); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 708b436f6e4ae6..9811e9f348c2a0 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -550,6 +550,10 @@ dummy_func(void) { value = sym_new_const(ctx, ptr); } + op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused, unused -- value)) { + value = sym_new_const(ctx, ptr); + } + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = bottom; @@ -901,12 +905,12 @@ dummy_func(void) { // known types, meaning we can deduce either True or False // The below check is equivalent to PyObject_TypeCheck(inst, cls) + PyObject *out = Py_False; if (inst_type == cls_o || PyType_IsSubtype(inst_type, cls_o)) { - sym_set_const(res, Py_True); - } - else { - sym_set_const(res, Py_False); + out = Py_True; } + sym_set_const(res, out); + REPLACE_OP(this_instr, _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)out); } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 3ac99e5c1b1707..6e19319fbaef4a 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2143,12 +2143,12 @@ PyTypeObject *inst_type = sym_get_type(instance); PyTypeObject *cls_o = (PyTypeObject *)sym_get_const(ctx, cls); if (inst_type && cls_o && sym_matches_type(cls, &PyType_Type)) { + PyObject *out = Py_False; if (inst_type == cls_o || PyType_IsSubtype(inst_type, cls_o)) { - sym_set_const(res, Py_True); - } - else { - sym_set_const(res, Py_False); + out = Py_True; } + sym_set_const(res, out); + REPLACE_OP(this_instr, _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)out); } stack_pointer[-4] = res; stack_pointer += -3; @@ -2556,6 +2556,16 @@ break; } + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { + JitOptSymbol *value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = sym_new_const(ctx, ptr); + stack_pointer[-4] = value; + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _CHECK_FUNCTION: { break; } From 46d7c114d8cd25e5135bcdf2ea799eea43a41a67 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Mon, 19 May 2025 23:06:04 +0100 Subject: [PATCH 220/989] gh-132983: Add zstd version info to `test.pythoninfo` (#134230) Co-authored-by: Victor Stinner --- Lib/test/pythoninfo.py | 11 +++++++++++ .../2025-05-19-15-30-00.gh-issue-132983.asdsfs.rst | 1 + 2 files changed, 12 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-05-19-15-30-00.gh-issue-132983.asdsfs.rst diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py index 682815c3fdd6e0..e1830f2e6eba97 100644 --- a/Lib/test/pythoninfo.py +++ b/Lib/test/pythoninfo.py @@ -658,6 +658,16 @@ def collect_zlib(info_add): copy_attributes(info_add, zlib, 'zlib.%s', attributes) +def collect_zstd(info_add): + try: + import _zstd + except ImportError: + return + + attributes = ('zstd_version',) + copy_attributes(info_add, _zstd, 'zstd.%s', attributes) + + def collect_expat(info_add): try: from xml.parsers import expat @@ -1051,6 +1061,7 @@ def collect_info(info): collect_tkinter, collect_windows, collect_zlib, + collect_zstd, collect_libregrtest_utils, # Collecting from tests should be last as they have side effects. diff --git a/Misc/NEWS.d/next/Library/2025-05-19-15-30-00.gh-issue-132983.asdsfs.rst b/Misc/NEWS.d/next/Library/2025-05-19-15-30-00.gh-issue-132983.asdsfs.rst new file mode 100644 index 00000000000000..3893eeafa9c594 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-19-15-30-00.gh-issue-132983.asdsfs.rst @@ -0,0 +1 @@ +Add :mod:`!compression.zstd` version information to ``test.pythoninfo``. From 66aaad61037785639aec393be7618cb54b1372dc Mon Sep 17 00:00:00 2001 From: Alek Binion Date: Mon, 19 May 2025 19:59:06 -0400 Subject: [PATCH 221/989] gh-134201: Expand explanation of Base85 encodings in base64 docs (#134288) Explain history of de-facto standard and how to pick between the two Base-85 encoding functions in the base-64 module. --------- Co-authored-by: C.A.M. Gerlach --- Doc/library/base64.rst | 48 ++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 834ab2536e6c14..529a7242443820 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -15,14 +15,9 @@ This module provides functions for encoding binary data to printable ASCII characters and decoding such encodings back to binary data. -It provides encoding and decoding functions for the encodings specified in -:rfc:`4648`, which defines the Base16, Base32, and Base64 algorithms, -and for the de-facto standard Ascii85 and Base85 encodings. - -The :rfc:`4648` encodings are suitable for encoding binary data so that it can be -safely sent by email, used as parts of URLs, or included as part of an HTTP -POST request. The encoding algorithm is not the same as the -:program:`uuencode` program. +This includes the :ref:`encodings specified in ` +:rfc:`4648` (Base64, Base32 and Base16) +and the non-standard :ref:`Base85 encodings `. There are two interfaces provided by this module. The modern interface supports encoding :term:`bytes-like objects ` to ASCII @@ -30,7 +25,7 @@ supports encoding :term:`bytes-like objects ` to ASCII strings containing ASCII to :class:`bytes`. Both base-64 alphabets defined in :rfc:`4648` (normal, and URL- and filesystem-safe) are supported. -The legacy interface does not support decoding from strings, but it does +The :ref:`legacy interface ` does not support decoding from strings, but it does provide functions for encoding and decoding to and from :term:`file objects `. It only supports the Base64 standard alphabet, and it adds newlines every 76 characters as per :rfc:`2045`. Note that if you are looking @@ -46,7 +41,15 @@ package instead. Any :term:`bytes-like objects ` are now accepted by all encoding and decoding functions in this module. Ascii85/Base85 support added. -The modern interface provides: + +.. _base64-rfc-4648: + +RFC 4648 Encodings +------------------ + +The :rfc:`4648` encodings are suitable for encoding binary data so that it can be +safely sent by email, used as parts of URLs, or included as part of an HTTP +POST request. .. function:: b64encode(s, altchars=None) @@ -181,6 +184,26 @@ The modern interface provides: incorrectly padded or if there are non-alphabet characters present in the input. +.. _base64-base-85: + +Base85 Encodings +----------------- + +Base85 encoding is not formally specified but rather a de facto standard, +thus different systems perform the encoding differently. + +The :func:`a85encode` and :func:`b85encode` functions in this module are two implementations of +the de facto standard. You should call the function with the Base85 +implementation used by the software you intend to work with. + +The two functions present in this module differ in how they handle the following: + +* Whether to include enclosing ``<~`` and ``~>`` markers +* Whether to include newline characters +* The set of ASCII characters used for encoding +* Handling of null bytes + +Refer to the documentation of the individual functions for more information. .. function:: a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False) @@ -262,7 +285,10 @@ The modern interface provides: .. versionadded:: 3.13 -The legacy interface: +.. _base64-legacy: + +Legacy Interface +---------------- .. function:: decode(input, output) From e3dda8f81832008adf19906004f0cd53de95dd0b Mon Sep 17 00:00:00 2001 From: Gustaf <79180496+GGyll@users.noreply.github.com> Date: Mon, 19 May 2025 20:54:48 -0400 Subject: [PATCH 222/989] gh-133940: test_strftime incorrectly calculates expected week (GH-134281) Let the system determine the correct tm_wday and tm_isdst. --- Lib/test/test_strftime.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_strftime.py b/Lib/test/test_strftime.py index 752e31359cf206..375f6aaedd8934 100644 --- a/Lib/test/test_strftime.py +++ b/Lib/test/test_strftime.py @@ -39,7 +39,21 @@ def _update_variables(self, now): if now[3] < 12: self.ampm='(AM|am)' else: self.ampm='(PM|pm)' - self.jan1 = time.localtime(time.mktime((now[0], 1, 1, 0, 0, 0, 0, 1, 0))) + jan1 = time.struct_time( + ( + now.tm_year, # Year + 1, # Month (January) + 1, # Day (1st) + 0, # Hour (0) + 0, # Minute (0) + 0, # Second (0) + -1, # tm_wday (will be determined) + 1, # tm_yday (day 1 of the year) + -1, # tm_isdst (let the system determine) + ) + ) + # use mktime to get the correct tm_wday and tm_isdst values + self.jan1 = time.localtime(time.mktime(jan1)) try: if now[8]: self.tz = time.tzname[1] From 652d6938ef8c42c1c4c180c3f0e257c26c6677da Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 20 May 2025 13:33:54 +0900 Subject: [PATCH 223/989] gh-133374: fix test_python_legacy_windows_stdio (GH-134080) --- Lib/test/test_cmd_line.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 1b40e0d05fe3bc..f540973c11e01c 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -972,10 +972,25 @@ def test_python_legacy_windows_fs_encoding(self): @unittest.skipUnless(support.MS_WINDOWS, 'Test only applicable on Windows') def test_python_legacy_windows_stdio(self): - code = "import sys; print(sys.stdin.encoding, sys.stdout.encoding)" - expected = 'cp' - rc, out, err = assert_python_ok('-c', code, PYTHONLEGACYWINDOWSSTDIO='1') - self.assertIn(expected.encode(), out) + # Test that _WindowsConsoleIO is used when PYTHONLEGACYWINDOWSSTDIO + # is not set. + # We cannot use PIPE becase it prevents creating new console. + # So we use exit code. + code = "import sys; sys.exit(type(sys.stdout.buffer.raw).__name__ != '_WindowsConsoleIO')" + env = os.environ.copy() + env["PYTHONLEGACYWINDOWSSTDIO"] = "" + p = subprocess.run([sys.executable, "-c", code], + creationflags=subprocess.CREATE_NEW_CONSOLE, + env=env) + self.assertEqual(p.returncode, 0) + + # Then test that FIleIO is used when PYTHONLEGACYWINDOWSSTDIO is set. + code = "import sys; sys.exit(type(sys.stdout.buffer.raw).__name__ != 'FileIO')" + env["PYTHONLEGACYWINDOWSSTDIO"] = "1" + p = subprocess.run([sys.executable, "-c", code], + creationflags=subprocess.CREATE_NEW_CONSOLE, + env=env) + self.assertEqual(p.returncode, 0) @unittest.skipIf("-fsanitize" in sysconfig.get_config_vars().get('PY_CFLAGS', ()), "PYTHONMALLOCSTATS doesn't work with ASAN") From e29171bf8a26b5faf97222e07a7d5f33c9eb272b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 20 May 2025 10:51:44 +0300 Subject: [PATCH 224/989] Clean up test_posixpath (GH-134315) * Ensure that created files and dirs are always removed after test. Now addCleanup() does not conflict with tearDown(). * Use os_helper.unlink() and os_helper.rmdir(). * Import TESTFN from os_helper. --- Lib/test/test_posixpath.py | 86 ++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 45 deletions(-) diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index c70688ff5e9bec..f3f9895f529470 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -9,7 +9,7 @@ from test import test_genericpath from test.support import import_helper from test.support import os_helper -from test.support.os_helper import FakePath +from test.support.os_helper import FakePath, TESTFN from unittest import mock try: @@ -21,7 +21,7 @@ # An absolute path to a temporary filename for testing. We can't rely on TESTFN # being an absolute path, so we need this. -ABSTFN = abspath(os_helper.TESTFN) +ABSTFN = abspath(TESTFN) def skip_if_ABSTFN_contains_backslash(test): """ @@ -33,21 +33,11 @@ def skip_if_ABSTFN_contains_backslash(test): msg = "ABSTFN is not a posix path - tests fail" return [test, unittest.skip(msg)(test)][found_backslash] -def safe_rmdir(dirname): - try: - os.rmdir(dirname) - except OSError: - pass - class PosixPathTest(unittest.TestCase): def setUp(self): - self.tearDown() - - def tearDown(self): for suffix in ["", "1", "2"]: - os_helper.unlink(os_helper.TESTFN + suffix) - safe_rmdir(os_helper.TESTFN + suffix) + self.assertFalse(posixpath.lexists(ABSTFN + suffix)) def test_join(self): fn = posixpath.join @@ -194,25 +184,28 @@ def test_dirname(self): self.assertEqual(posixpath.dirname(b"//foo//bar"), b"//foo") def test_islink(self): - self.assertIs(posixpath.islink(os_helper.TESTFN + "1"), False) - self.assertIs(posixpath.lexists(os_helper.TESTFN + "2"), False) + self.assertIs(posixpath.islink(TESTFN + "1"), False) + self.assertIs(posixpath.lexists(TESTFN + "2"), False) - with open(os_helper.TESTFN + "1", "wb") as f: + self.addCleanup(os_helper.unlink, TESTFN + "1") + with open(TESTFN + "1", "wb") as f: f.write(b"foo") - self.assertIs(posixpath.islink(os_helper.TESTFN + "1"), False) + self.assertIs(posixpath.islink(TESTFN + "1"), False) if os_helper.can_symlink(): - os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN + "2") - self.assertIs(posixpath.islink(os_helper.TESTFN + "2"), True) - os.remove(os_helper.TESTFN + "1") - self.assertIs(posixpath.islink(os_helper.TESTFN + "2"), True) - self.assertIs(posixpath.exists(os_helper.TESTFN + "2"), False) - self.assertIs(posixpath.lexists(os_helper.TESTFN + "2"), True) - - self.assertIs(posixpath.islink(os_helper.TESTFN + "\udfff"), False) - self.assertIs(posixpath.islink(os.fsencode(os_helper.TESTFN) + b"\xff"), False) - self.assertIs(posixpath.islink(os_helper.TESTFN + "\x00"), False) - self.assertIs(posixpath.islink(os.fsencode(os_helper.TESTFN) + b"\x00"), False) + self.addCleanup(os_helper.unlink, TESTFN + "2") + os.symlink(TESTFN + "1", TESTFN + "2") + self.assertIs(posixpath.islink(TESTFN + "2"), True) + os.remove(TESTFN + "1") + self.assertIs(posixpath.islink(TESTFN + "2"), True) + self.assertIs(posixpath.exists(TESTFN + "2"), False) + self.assertIs(posixpath.lexists(TESTFN + "2"), True) + + def test_islink_invalid_paths(self): + self.assertIs(posixpath.islink(TESTFN + "\udfff"), False) + self.assertIs(posixpath.islink(os.fsencode(TESTFN) + b"\xff"), False) + self.assertIs(posixpath.islink(TESTFN + "\x00"), False) + self.assertIs(posixpath.islink(os.fsencode(TESTFN) + b"\x00"), False) def test_ismount(self): self.assertIs(posixpath.ismount("/"), True) @@ -227,7 +220,7 @@ def test_ismount_non_existent(self): os.mkdir(ABSTFN) self.assertIs(posixpath.ismount(ABSTFN), False) finally: - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN) def test_ismount_invalid_paths(self): self.assertIs(posixpath.ismount('/\udfff'), False) @@ -242,7 +235,7 @@ def test_ismount_symlinks(self): os.symlink("/", ABSTFN) self.assertIs(posixpath.ismount(ABSTFN), False) finally: - os.unlink(ABSTFN) + os_helper.unlink(ABSTFN) @unittest.skipIf(posix is None, "Test requires posix module") def test_ismount_different_device(self): @@ -576,10 +569,10 @@ def test_realpath_relative(self): @skip_if_ABSTFN_contains_backslash def test_realpath_missing_pardir(self): try: - os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN) - self.assertEqual(realpath("nonexistent/../" + os_helper.TESTFN), ABSTFN + "1") + os.symlink(TESTFN + "1", TESTFN) + self.assertEqual(realpath("nonexistent/../" + TESTFN), ABSTFN + "1") finally: - os_helper.unlink(os_helper.TESTFN) + os_helper.unlink(TESTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -675,7 +668,7 @@ def test_realpath_repeated_indirect_symlinks(self): finally: os_helper.unlink(ABSTFN + '/self') os_helper.unlink(ABSTFN + '/link') - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -694,7 +687,7 @@ def test_realpath_deep_recursion(self): finally: for i in range(depth + 1): os_helper.unlink(ABSTFN + '/%d' % i) - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -712,8 +705,8 @@ def test_realpath_resolve_parents(self): self.assertEqual(realpath("a"), ABSTFN + "/y/a") finally: os_helper.unlink(ABSTFN + "/k") - safe_rmdir(ABSTFN + "/y") - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN + "/y") + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -739,9 +732,9 @@ def test_realpath_resolve_before_normalizing(self): ABSTFN + "/k") finally: os_helper.unlink(ABSTFN + "/link-y") - safe_rmdir(ABSTFN + "/k/y") - safe_rmdir(ABSTFN + "/k") - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN + "/k/y") + os_helper.rmdir(ABSTFN + "/k") + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -759,8 +752,8 @@ def test_realpath_resolve_first(self): self.assertEqual(realpath(base + "link/k"), ABSTFN + "/k") finally: os_helper.unlink(ABSTFN + "link") - safe_rmdir(ABSTFN + "/k") - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN + "/k") + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -778,7 +771,7 @@ def test_realpath_unreadable_symlink(self): realpath(ABSTFN, strict=True) finally: os.chmod(ABSTFN, 0o755, follow_symlinks=False) - os.unlink(ABSTFN) + os_helper.unlink(ABSTFN) @skip_if_ABSTFN_contains_backslash def test_realpath_nonterminal_file(self): @@ -817,6 +810,7 @@ def test_realpath_nonterminal_symlink_to_file(self): self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", strict=True) finally: os_helper.unlink(ABSTFN) + os_helper.unlink(ABSTFN + "1") @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -838,6 +832,8 @@ def test_realpath_nonterminal_symlink_to_symlinks_to_file(self): self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", strict=True) finally: os_helper.unlink(ABSTFN) + os_helper.unlink(ABSTFN + "1") + os_helper.unlink(ABSTFN + "2") def test_relpath(self): (real_getcwd, os.getcwd) = (os.getcwd, lambda: r"/home/user/bar") @@ -963,8 +959,8 @@ class PathLikeTests(unittest.TestCase): path = posixpath def setUp(self): - self.file_name = os_helper.TESTFN - self.file_path = FakePath(os_helper.TESTFN) + self.file_name = TESTFN + self.file_path = FakePath(TESTFN) self.addCleanup(os_helper.unlink, self.file_name) with open(self.file_name, 'xb', 0) as file: file.write(b"test_posixpath.PathLikeTests") From 28625d4f956f8d30671aba1daaac9735932983db Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 20 May 2025 04:41:22 -0400 Subject: [PATCH 225/989] gh-128307: Update what's new in 3.13 and 3.14 with create_task changes of asyncio (#134304) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/whatsnew/3.13.rst | 16 ++++++++++++++++ Doc/whatsnew/3.14.rst | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 023c279979d842..580a3d8154dee1 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -730,6 +730,22 @@ asyncio never awaited). (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.) +* The function and methods named ``create_task`` have received a new + ``**kwargs`` argument that is passed through to the task constructor. + This change was accidentally added in 3.13.3, + and broke the API contract for custom task factories. + Several third-party task factories implemented workarounds for this. + In 3.13.4 and later releases the old factory contract is honored + once again (until 3.14). + To keep the workarounds working, the extra ``**kwargs`` argument still + allows passing additional keyword arguments to :class:`~asyncio.Task` + and to custom task factories. + + This affects the following function and methods: + :meth:`asyncio.create_task`, + :meth:`asyncio.loop.create_task`, + :meth:`asyncio.TaskGroup.create_task`. + (Contributed by Thomas Grainger in :gh:`128307`.) base64 ------ diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 8f39b99e38e3a5..b192615e173f11 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1064,6 +1064,24 @@ ast (Contributed by Semyon Moroz in :gh:`133367`.) +asyncio +------- + +* The function and methods named :func:`!create_task` now take an arbitrary + list of keyword arguments. All keyword arguments are passed to the + :class:`~asyncio.Task` constructor or the custom task factory. + (See :meth:`~asyncio.loop.set_task_factory` for details.) + The ``name`` and ``context`` keyword arguments are no longer special; + the name should now be set using the ``name`` keyword argument of the factory, + and ``context`` may be ``None``. + + This affects the following function and methods: + :meth:`asyncio.create_task`, + :meth:`asyncio.loop.create_task`, + :meth:`asyncio.TaskGroup.create_task`. + (Contributed by Thomas Grainger in :gh:`128307`.) + + bdb --- From e007e62ba7403575611145aa2e39900b5f4bc6bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 20 May 2025 11:15:39 +0200 Subject: [PATCH 226/989] gh-125843: fix `test_curses.test_attributes` on x86-64 macOS (#134252) While some `libcurses` functions are meant to return OK on success, this is not always the case for all implementations. As such, we relax the checks on the return values and allow any non-ERR value to be considered equivalent to OK. --- Modules/_cursesmodule.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index ab63fdbe45de61..2e6ec822e2d5b6 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -392,9 +392,9 @@ _PyCursesStatefulCheckFunction(PyObject *module, */ /* - * Return None if 'code' is OK. Otherwise, set an exception - * using curses_set_error() and the remaining arguments, and - * return NULL. + * Return None if 'code' is different from ERR (implementation-defined). + * Otherwise, set an exception using curses_set_error() and the remaining + * arguments, and return NULL. */ static PyObject * curses_check_err(PyObject *module, int code, @@ -402,7 +402,6 @@ curses_check_err(PyObject *module, int code, const char *python_funcname) { if (code != ERR) { - assert(code == OK); Py_RETURN_NONE; } curses_set_error(module, curses_funcname, python_funcname); @@ -416,7 +415,6 @@ curses_window_check_err(PyCursesWindowObject *win, int code, const char *python_funcname) { if (code != ERR) { - assert(code == OK); Py_RETURN_NONE; } curses_window_set_error(win, curses_funcname, python_funcname); @@ -1580,7 +1578,7 @@ _curses_window_derwin_impl(PyCursesWindowObject *self, int group_left_1, win = derwin(self->win,nlines,ncols,begin_y,begin_x); if (win == NULL) { - curses_window_set_error(self, "derwin", NULL); + curses_window_set_null_error(self, "derwin", NULL); return NULL; } From 175ba3639fbc336912f429cf4de43b22e05fd6dc Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 20 May 2025 12:47:27 +0300 Subject: [PATCH 227/989] gh-72902: improve Fraction constructor speed for typical inputs (GH-134320) This moves abc check for numbers.Rational - down. --- Lib/fractions.py | 10 +++++----- .../2025-05-20-11-35-08.gh-issue-72902.jzEI-E.rst | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-20-11-35-08.gh-issue-72902.jzEI-E.rst diff --git a/Lib/fractions.py b/Lib/fractions.py index 8163e3bb594f6b..063f28478c7338 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -238,11 +238,6 @@ def __new__(cls, numerator=0, denominator=None): self._denominator = 1 return self - elif isinstance(numerator, numbers.Rational): - self._numerator = numerator.numerator - self._denominator = numerator.denominator - return self - elif (isinstance(numerator, float) or (not isinstance(numerator, type) and hasattr(numerator, 'as_integer_ratio'))): @@ -278,6 +273,11 @@ def __new__(cls, numerator=0, denominator=None): if m.group('sign') == '-': numerator = -numerator + elif isinstance(numerator, numbers.Rational): + self._numerator = numerator.numerator + self._denominator = numerator.denominator + return self + else: raise TypeError("argument should be a string or a Rational " "instance or have the as_integer_ratio() method") diff --git a/Misc/NEWS.d/next/Library/2025-05-20-11-35-08.gh-issue-72902.jzEI-E.rst b/Misc/NEWS.d/next/Library/2025-05-20-11-35-08.gh-issue-72902.jzEI-E.rst new file mode 100644 index 00000000000000..932e751a32a043 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-20-11-35-08.gh-issue-72902.jzEI-E.rst @@ -0,0 +1,2 @@ +Improve speed (x1.1-1.8) of the :class:`~fractions.Fraction` constructor for +typical inputs (:class:`float`'s, :class:`~decimal.Decimal`'s or strings). From a31bbc951a9d74cd7b9092555c101e51a2b9482b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 20 May 2025 15:08:40 +0300 Subject: [PATCH 228/989] gh-53189: Document peculiarities of InteractiveConsole in relation to pickle (GH-123069) Co-authored-by: Terry Jan Reedy --- Doc/library/code.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/library/code.rst b/Doc/library/code.rst index 8f7692df9fb22d..52587c4dd8f8e8 100644 --- a/Doc/library/code.rst +++ b/Doc/library/code.rst @@ -22,6 +22,12 @@ build applications which provide an interactive interpreter prompt. it defaults to a newly created dictionary with key ``'__name__'`` set to ``'__console__'`` and key ``'__doc__'`` set to ``None``. + Note that functions and classes objects created under an + :class:`!InteractiveInterpreter` instance will belong to the namespace + specified by *locals*. + They are only pickleable if *locals* is the namespace of an existing + module. + .. class:: InteractiveConsole(locals=None, filename="", local_exit=False) From 306f9e04e50c80f7be2a38fc3b64accf59f5164c Mon Sep 17 00:00:00 2001 From: abstractedfox Date: Tue, 20 May 2025 08:24:27 -0400 Subject: [PATCH 229/989] gh-131357: Add tests for zero-sized bytes objects in test_bytes.py (#134234) Co-authored-by: Victor Stinner --- Lib/test/test_capi/test_bytes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 5b61c73381542d..86d5692893fc98 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -22,6 +22,7 @@ def test_check(self): # Test PyBytes_Check() check = _testlimitedcapi.bytes_check self.assertTrue(check(b'abc')) + self.assertTrue(check(b'')) self.assertFalse(check('abc')) self.assertFalse(check(bytearray(b'abc'))) self.assertTrue(check(BytesSubclass(b'abc'))) @@ -36,6 +37,7 @@ def test_checkexact(self): # Test PyBytes_CheckExact() check = _testlimitedcapi.bytes_checkexact self.assertTrue(check(b'abc')) + self.assertTrue(check(b'')) self.assertFalse(check('abc')) self.assertFalse(check(bytearray(b'abc'))) self.assertFalse(check(BytesSubclass(b'abc'))) @@ -108,6 +110,7 @@ def test_asstring(self): self.assertEqual(asstring(b'abc', 4), b'abc\0') self.assertEqual(asstring(b'abc\0def', 8), b'abc\0def\0') + self.assertEqual(asstring(b'', 1), b'\0') self.assertRaises(TypeError, asstring, 'abc', 0) self.assertRaises(TypeError, asstring, object(), 0) @@ -120,6 +123,7 @@ def test_asstringandsize(self): self.assertEqual(asstringandsize(b'abc', 4), (b'abc\0', 3)) self.assertEqual(asstringandsize(b'abc\0def', 8), (b'abc\0def\0', 7)) + self.assertEqual(asstringandsize(b'', 1), (b'\0', 0)) self.assertEqual(asstringandsize_null(b'abc', 4), b'abc\0') self.assertRaises(ValueError, asstringandsize_null, b'abc\0def', 8) self.assertRaises(TypeError, asstringandsize, 'abc', 0) @@ -163,6 +167,7 @@ def test_concat(self, concat=None): self.assertEqual(concat(b'', bytearray(b'def')), b'def') self.assertEqual(concat(memoryview(b'xabcy')[1:4], b'def'), b'abcdef') self.assertEqual(concat(b'abc', memoryview(b'xdefy')[1:4]), b'abcdef') + self.assertEqual(concat(b'', b''), b'') self.assertEqual(concat(b'abc', b'def', True), b'abcdef') self.assertEqual(concat(b'abc', bytearray(b'def'), True), b'abcdef') From 3246ea514d6da6ff09f411e22b3ba61a7de84a74 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Tue, 20 May 2025 08:25:50 -0400 Subject: [PATCH 230/989] gh-75459: Doc: C API: Improve object life cycle documentation (GH-125962) * Add "cyclic isolate" to the glossary. * Add a new "Object Life Cycle" page. * Improve docs for related API, with special focus on cross-references and warnings Co-authored-by: Petr Viktorin Co-authored-by: Peter Bierma --- .gitattributes | 2 + Doc/c-api/allocation.rst | 132 +++++++++-- Doc/c-api/gcsupport.rst | 57 +++++ Doc/c-api/lifecycle.dot | 156 ++++++++++++ Doc/c-api/lifecycle.dot.css | 21 ++ Doc/c-api/lifecycle.dot.pdf | Bin 0 -> 19328 bytes Doc/c-api/lifecycle.dot.svg | 374 +++++++++++++++++++++++++++++ Doc/c-api/lifecycle.rst | 273 +++++++++++++++++++++ Doc/c-api/memory.rst | 18 ++ Doc/c-api/objimpl.rst | 1 + Doc/c-api/type.rst | 25 +- Doc/c-api/typeobj.rst | 458 +++++++++++++++++++++++++++--------- Doc/glossary.rst | 6 + 13 files changed, 1386 insertions(+), 137 deletions(-) create mode 100644 Doc/c-api/lifecycle.dot create mode 100644 Doc/c-api/lifecycle.dot.css create mode 100644 Doc/c-api/lifecycle.dot.pdf create mode 100644 Doc/c-api/lifecycle.dot.svg create mode 100644 Doc/c-api/lifecycle.rst diff --git a/.gitattributes b/.gitattributes index 2f5a030981fb94..5682b9150a36e2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10,6 +10,7 @@ *.ico binary *.jpg binary *.pck binary +*.pdf binary *.png binary *.psd binary *.tar binary @@ -67,6 +68,7 @@ PCbuild/readme.txt dos **/clinic/*.cpp.h generated **/clinic/*.h.h generated *_db.h generated +Doc/c-api/lifecycle.dot.svg generated Doc/data/stable_abi.dat generated Doc/library/token-list.inc generated Include/internal/pycore_ast.h generated diff --git a/Doc/c-api/allocation.rst b/Doc/c-api/allocation.rst index 7cbc99ad145ad4..f8d01a3f29b30e 100644 --- a/Doc/c-api/allocation.rst +++ b/Doc/c-api/allocation.rst @@ -16,7 +16,20 @@ Allocating Objects on the Heap Initialize a newly allocated object *op* with its type and initial reference. Returns the initialized object. Other fields of the object are - not affected. + not initialized. Despite its name, this function is unrelated to the + object's :meth:`~object.__init__` method (:c:member:`~PyTypeObject.tp_init` + slot). Specifically, this function does **not** call the object's + :meth:`!__init__` method. + + In general, consider this function to be a low-level routine. Use + :c:member:`~PyTypeObject.tp_alloc` where possible. + For implementing :c:member:`!tp_alloc` for your type, prefer + :c:func:`PyType_GenericAlloc` or :c:func:`PyObject_New`. + + .. note:: + + This function only initializes the object's memory corresponding to the + initial :c:type:`PyObject` structure. It does not zero the rest. .. c:function:: PyVarObject* PyObject_InitVar(PyVarObject *op, PyTypeObject *type, Py_ssize_t size) @@ -24,38 +37,107 @@ Allocating Objects on the Heap This does everything :c:func:`PyObject_Init` does, and also initializes the length information for a variable-size object. + .. note:: + + This function only initializes some of the object's memory. It does not + zero the rest. + .. c:macro:: PyObject_New(TYPE, typeobj) - Allocate a new Python object using the C structure type *TYPE* - and the Python type object *typeobj* (``PyTypeObject*``). - Fields not defined by the Python object header are not initialized. - The caller will own the only reference to the object - (i.e. its reference count will be one). - The size of the memory allocation is determined from the - :c:member:`~PyTypeObject.tp_basicsize` field of the type object. + Allocates a new Python object using the C structure type *TYPE* and the + Python type object *typeobj* (``PyTypeObject*``) by calling + :c:func:`PyObject_Malloc` to allocate memory and initializing it like + :c:func:`PyObject_Init`. The caller will own the only reference to the + object (i.e. its reference count will be one). + + Avoid calling this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + This macro does not call :c:member:`~PyTypeObject.tp_alloc`, + :c:member:`~PyTypeObject.tp_new` (:meth:`~object.__new__`), or + :c:member:`~PyTypeObject.tp_init` (:meth:`~object.__init__`). + + This cannot be used for objects with :c:macro:`Py_TPFLAGS_HAVE_GC` set in + :c:member:`~PyTypeObject.tp_flags`; use :c:macro:`PyObject_GC_New` instead. + + Memory allocated by this macro must be freed with :c:func:`PyObject_Free` + (usually called via the object's :c:member:`~PyTypeObject.tp_free` slot). + + .. note:: + + The returned memory is not guaranteed to have been completely zeroed + before it was initialized. + + .. note:: + + This macro does not construct a fully initialized object of the given + type; it merely allocates memory and prepares it for further + initialization by :c:member:`~PyTypeObject.tp_init`. To construct a + fully initialized object, call *typeobj* instead. For example:: + + PyObject *foo = PyObject_CallNoArgs((PyObject *)&PyFoo_Type); - Note that this function is unsuitable if *typeobj* has - :c:macro:`Py_TPFLAGS_HAVE_GC` set. For such objects, - use :c:func:`PyObject_GC_New` instead. + .. seealso:: + + * :c:func:`PyObject_Free` + * :c:macro:`PyObject_GC_New` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` .. c:macro:: PyObject_NewVar(TYPE, typeobj, size) - Allocate a new Python object using the C structure type *TYPE* and the - Python type object *typeobj* (``PyTypeObject*``). - Fields not defined by the Python object header - are not initialized. The allocated memory allows for the *TYPE* structure - plus *size* (``Py_ssize_t``) fields of the size - given by the :c:member:`~PyTypeObject.tp_itemsize` field of - *typeobj*. This is useful for implementing objects like tuples, which are - able to determine their size at construction time. Embedding the array of - fields into the same allocation decreases the number of allocations, - improving the memory management efficiency. - - Note that this function is unsuitable if *typeobj* has - :c:macro:`Py_TPFLAGS_HAVE_GC` set. For such objects, - use :c:func:`PyObject_GC_NewVar` instead. + Like :c:macro:`PyObject_New` except: + + * It allocates enough memory for the *TYPE* structure plus *size* + (``Py_ssize_t``) fields of the size given by the + :c:member:`~PyTypeObject.tp_itemsize` field of *typeobj*. + * The memory is initialized like :c:func:`PyObject_InitVar`. + + This is useful for implementing objects like tuples, which are able to + determine their size at construction time. Embedding the array of fields + into the same allocation decreases the number of allocations, improving the + memory management efficiency. + + Avoid calling this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + This cannot be used for objects with :c:macro:`Py_TPFLAGS_HAVE_GC` set in + :c:member:`~PyTypeObject.tp_flags`; use :c:macro:`PyObject_GC_NewVar` + instead. + + Memory allocated by this function must be freed with :c:func:`PyObject_Free` + (usually called via the object's :c:member:`~PyTypeObject.tp_free` slot). + + .. note:: + + The returned memory is not guaranteed to have been completely zeroed + before it was initialized. + + .. note:: + + This macro does not construct a fully initialized object of the given + type; it merely allocates memory and prepares it for further + initialization by :c:member:`~PyTypeObject.tp_init`. To construct a + fully initialized object, call *typeobj* instead. For example:: + + PyObject *list_instance = PyObject_CallNoArgs((PyObject *)&PyList_Type); + + .. seealso:: + + * :c:func:`PyObject_Free` + * :c:macro:`PyObject_GC_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` .. c:function:: void PyObject_Del(void *op) diff --git a/Doc/c-api/gcsupport.rst b/Doc/c-api/gcsupport.rst index 3e23605778f05a..f6fa52b36c5ab3 100644 --- a/Doc/c-api/gcsupport.rst +++ b/Doc/c-api/gcsupport.rst @@ -57,11 +57,49 @@ rules: Analogous to :c:macro:`PyObject_New` but for container objects with the :c:macro:`Py_TPFLAGS_HAVE_GC` flag set. + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + Memory allocated by this macro must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + + .. seealso:: + + * :c:func:`PyObject_GC_Del` + * :c:macro:`PyObject_New` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` + + .. c:macro:: PyObject_GC_NewVar(TYPE, typeobj, size) Analogous to :c:macro:`PyObject_NewVar` but for container objects with the :c:macro:`Py_TPFLAGS_HAVE_GC` flag set. + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + Memory allocated by this macro must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + + .. seealso:: + + * :c:func:`PyObject_GC_Del` + * :c:macro:`PyObject_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` + + .. c:function:: PyObject* PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *type, size_t extra_size) Analogous to :c:macro:`PyObject_GC_New` but allocates *extra_size* @@ -73,6 +111,10 @@ rules: The extra data will be deallocated with the object, but otherwise it is not managed by Python. + Memory allocated by this function must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + .. warning:: The function is marked as unstable because the final mechanism for reserving extra data after an instance is not yet decided. @@ -136,6 +178,21 @@ rules: Releases memory allocated to an object using :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar`. + Do not call this directly to free an object's memory; call the type's + :c:member:`~PyTypeObject.tp_free` slot instead. + + Do not use this for memory allocated by :c:macro:`PyObject_New`, + :c:macro:`PyObject_NewVar`, or related allocation functions; use + :c:func:`PyObject_Free` instead. + + .. seealso:: + + * :c:func:`PyObject_Free` is the non-GC equivalent of this function. + * :c:macro:`PyObject_GC_New` + * :c:macro:`PyObject_GC_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_free` + .. c:function:: void PyObject_GC_UnTrack(void *op) diff --git a/Doc/c-api/lifecycle.dot b/Doc/c-api/lifecycle.dot new file mode 100644 index 00000000000000..dca9f87e9e0aca --- /dev/null +++ b/Doc/c-api/lifecycle.dot @@ -0,0 +1,156 @@ +digraph "Life Events" { + graph [ + fontnames="svg" + fontsize=12.0 + id="life_events_graph" + layout="dot" + margin="0,0" + ranksep=0.25 + stylesheet="lifecycle.dot.css" + ] + node [ + fontname="Courier" + fontsize=12.0 + ] + edge [ + fontname="Times-Italic" + fontsize=12.0 + ] + + "start" [fontname="Times-Italic" shape=plain label=< start > style=invis] + { + rank="same" + "tp_new" [href="typeobj.html#c.PyTypeObject.tp_new" target="_top"] + "tp_alloc" [href="typeobj.html#c.PyTypeObject.tp_alloc" target="_top"] + } + "tp_init" [href="typeobj.html#c.PyTypeObject.tp_init" target="_top"] + "reachable" [fontname="Times-Italic" shape=box] + "tp_traverse" [ + href="typeobj.html#c.PyTypeObject.tp_traverse" + ordering="in" + target="_top" + ] + "finalized?" [ + fontname="Times-Italic" + label=finalized?> + ordering="in" + shape=diamond + tooltip="marked as finalized?" + ] + "tp_finalize" [ + href="typeobj.html#c.PyTypeObject.tp_finalize" + ordering="in" + target="_top" + ] + "tp_clear" [href="typeobj.html#c.PyTypeObject.tp_clear" target="_top"] + "uncollectable" [ + fontname="Times-Italic" + label=(leaked)> + shape=box + tooltip="uncollectable (leaked)" + ] + "tp_dealloc" [ + href="typeobj.html#c.PyTypeObject.tp_dealloc" + ordering="in" + target="_top" + ] + "tp_free" [href="typeobj.html#c.PyTypeObject.tp_free" target="_top"] + + "start" -> "tp_new" [ + label=< type call > + ] + "tp_new" -> "tp_alloc" [ + label=< direct call > arrowhead=empty + labeltooltip="tp_new to tp_alloc: direct call" + tooltip="tp_new to tp_alloc: direct call" + ] + "tp_new" -> "tp_init" [tooltip="tp_new to tp_init"] + "tp_init" -> "reachable" [tooltip="tp_init to reachable"] + "reachable" -> "tp_traverse" [ + dir="back" + label=< not in a
cyclic
isolate > + labeltooltip="tp_traverse to reachable: not in a cyclic isolate" + tooltip="tp_traverse to reachable: not in a cyclic isolate" + ] + "reachable" -> "tp_traverse" [ + label=< periodic
cyclic isolate
detection > + labeltooltip="reachable to tp_traverse: periodic cyclic isolate detection" + tooltip="reachable to tp_traverse: periodic cyclic isolate detection" + ] + "reachable" -> "tp_init" [tooltip="reachable to tp_init"] + "reachable" -> "tp_finalize" [ + dir="back" + label=< resurrected
(maybe remove
finalized mark) > + labeltooltip="tp_finalize to reachable: resurrected (maybe remove finalized mark)" + tooltip="tp_finalize to reachable: resurrected (maybe remove finalized mark)" + ] + "tp_traverse" -> "finalized?" [ + label=< cyclic
isolate > + labeltooltip="tp_traverse to finalized?: cyclic isolate" + tooltip="tp_traverse to finalized?: cyclic isolate" + ] + "reachable" -> "finalized?" [ + label=< no refs > + labeltooltip="reachable to finalized?: no refs" + tooltip="reachable to finalized?: no refs" + ] + "finalized?" -> "tp_finalize" [ + label=< no (mark
as finalized) > + labeltooltip="finalized? to tp_finalize: no (mark as finalized)" + tooltip="finalized? to tp_finalize: no (mark as finalized)" + ] + "finalized?" -> "tp_clear" [ + label=< yes > + labeltooltip="finalized? to tp_clear: yes" + tooltip="finalized? to tp_clear: yes" + ] + "tp_finalize" -> "tp_clear" [ + label=< no refs or
cyclic isolate > + labeltooltip="tp_finalize to tp_clear: no refs or cyclic isolate" + tooltip="tp_finalize to tp_clear: no refs or cyclic isolate" + ] + "tp_finalize" -> "tp_dealloc" [ + arrowtail=empty + dir="back" + href="lifecycle.html#c.PyObject_CallFinalizerFromDealloc" + style=dashed + label=< recommended
call (see
explanation)> + labeltooltip="tp_dealloc to tp_finalize: recommended call (see explanation)" + target="_top" + tooltip="tp_dealloc to tp_finalize: recommended call (see explanation)" + ] + "tp_finalize" -> "tp_dealloc" [ + label=< no refs > + labeltooltip="tp_finalize to tp_dealloc: no refs" + tooltip="tp_finalize to tp_dealloc: no refs" + ] + "tp_clear" -> "tp_dealloc" [ + label=< no refs > + labeltooltip="tp_clear to tp_dealloc: no refs" + tooltip="tp_clear to tp_dealloc: no refs" + ] + "tp_clear" -> "uncollectable" [ + label=< cyclic
isolate > + labeltooltip="tp_clear to uncollectable: cyclic isolate" + tooltip="tp_clear to uncollectable: cyclic isolate" + ] + "uncollectable" -> "tp_dealloc" [ + style=invis + tooltip="uncollectable to tp_dealloc" + ] + "reachable" -> "uncollectable" [ + label=< cyclic
isolate
(no GC
support) > + labeltooltip="reachable to uncollectable: cyclic isolate (no GC support)" + tooltip="reachable to uncollectable: cyclic isolate (no GC support)" + ] + "reachable" -> "tp_dealloc" [ + label=< no refs> + labeltooltip="reachable to tp_dealloc: no refs" + ] + "tp_dealloc" -> "tp_free" [ + arrowhead=empty + label=< direct call > + labeltooltip="tp_dealloc to tp_free: direct call" + tooltip="tp_dealloc to tp_free: direct call" + ] +} diff --git a/Doc/c-api/lifecycle.dot.css b/Doc/c-api/lifecycle.dot.css new file mode 100644 index 00000000000000..3abf95b74da6ba --- /dev/null +++ b/Doc/c-api/lifecycle.dot.css @@ -0,0 +1,21 @@ +#life_events_graph { + --svg-fgcolor: currentcolor; + --svg-bgcolor: transparent; +} +#life_events_graph a { + color: inherit; +} +#life_events_graph [stroke="black"] { + stroke: var(--svg-fgcolor); +} +#life_events_graph text, +#life_events_graph [fill="black"] { + fill: var(--svg-fgcolor); +} +#life_events_graph [fill="white"] { + fill: var(--svg-bgcolor); +} +#life_events_graph [fill="none"] { + /* On links, setting fill will make the entire shape clickable */ + fill: var(--svg-bgcolor); +} diff --git a/Doc/c-api/lifecycle.dot.pdf b/Doc/c-api/lifecycle.dot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ed5b5039c83e2c4a36d77e713c430c8f14901bed GIT binary patch literal 19328 zcmc({1yo$iw(pHYfZ(o;ORz>83GNo$AvEsp8r+@W5+Jxc1b2eFC3ukFmf-Lp*?XUT z?m6$=d&l>U@n}Y~)-36&S*vT+{H=dMr64NK2xQ_yq^duxxJ6`T1+V~Y4J;7(_yEkZ z#x|x-W&l>UCz}!?003YXH@9*!c6j=01$HtPH8!+0GDhU*M|5;@Fa}#Ax@GifeU2s? zKyRB@pRvDT>HCWN?gMzp9|DqRA&pQB!6_lHk1bbLY3}TY4>NgfiAQl(PAft$J8unJ z4XdFyx~2>Jm8d2KxmC2of z6Tf_QFz`KH^d(#BFqS`YCS!u555tR&rL+Q_T8!JtZP4}1U|0WC>XmBJm*M%)%IdM{ zJWWQbULPOi&)^jiL%xM|pSv33qh+qm^YpGyE*>H9fuCnp9EfVu9e#EtpKklOIe1!E zjG7Ike_8LF4Oy-~R~H!CnLoP@@M@16&vs?D^kqWFIy~qe<0QrtGl-NcYFm!Z(-@Am0d1J^DN4er?3R=&8T^e zS@NDu%ForU!X58n85~7sS`8f=u|L!sID_4Fy zxA}3c#(zoU;K(mWJ${6{uVSe*rOKN+;d9RUrk;V2I+NIkGTk+5tc1Eo?8f1xz@Ae8uWSHBQg9#4?RzS-wA){k@ z#q!SHgdUlst0Fg8DN=AE3{@Hxf%7CSs4=1D!iT!tcD?8{tw}F+rZaSgltEzpi^WLD zmv+1aeJ`Pd0VRnT&JV|GIZ_?I3&?VI85o=N(TO~ZSSA=ISP|NTEcgf;4pzbt-6ram zT6q*Qb|v2WFXY+g9k-#kWSlxO#IyTdZJ8wLM-_*YDActV-*R#FQru`L=H0rOT1F`O z7J{eNRx;v}6?;B3Q__U=w4dS;-l;fX6Y#)gQrqTEP*Y8Skd+qQQxyR^F|~MhPP*GX zh&uw$FKKP{Y5y4j{)5vq&Hps@6@dP*~o8Tgi2Pr>BC5>az46k23iu9Y@~308Q632 zD@cQD58zX+VI$Hkh2uqfa5ei0E`RyzYD5CEaCVfYEz7md%sZy?(|RN0B-cKlAtHx` z6ncPY0A={kS%jJW4mb%3L)nPZx2cA6UvX; zi3n&*4&b{!8&DUBe3Kph=ArfRWV*B^ovXtOFMFlExs7A6^s02c-*mix$M;$^+vtZ> z@VYtGNMS$T?ZNi#`ZO~?_Yww)2^qiXHa%5@j<2#Nbkb1&gA<&a_6GxI2pTDIm2C8E z2SK3dHV1H%9D;Bv-0V{fFg)#D(=Jg4>cZD+=|a0AMIs$=)@ok)x#k3%ecR;)s^n6w z6S6Xg%><*OrcmLlP{E{-2-YHKSy0JTm<;a>i>whzdx`>#wULjSEGSAsr66oY#-S+q z-AF);J4ya!BBT0QJU7*O_KCFzaMBI}&fXMbYYSmCM-|e(r6qdLolp_IGl?d8Yy#V) z#KKBe&4u*EuFgKQn}5R|3@}!(A36okggW22?_O5p9m$8gF}TIC{jiK38b$MSPJ(%l z1l&%sFk&51Tr=Ak7~Dpht*%dVGpDPZU9biW!%5xuGCPndm3V_F%_I$c!iyYz_#L^F zI@)&}@u9ApYxFtFc(nuyf$CaW*he@W zBF7qE6+^@RJ9vk{*HZq+(1WHbNPfsNtmor({Q4cp z00AuJTc|UoWqqi4?o%(Xx~u{~vMO?DKFn1Ca|eo%HCsqmacyf9 zddyCIUPFz#v$=?`$O;i6UzWAa4&C*5B6GiJLTqqir(J5Kr4DEiqRQ$SBm}mqh`jEyvDB%BG1{fI-7iY+Z799Cq^Q4wP-2S_w7{)> z5vgv|53xa&>a=9BIVg50Kr`@eOH23TjNZ*iYb>Tm>~_}I6^`xHq`pfw;vBJNb@K!C zCSOe*+U7*_aT|fg%+V2e-i}1kB%~>$DG8mS19dvXOzD+OYbH@VdqG8w-o`q~;PV9r zb*KZxlR0V#UdYqI#gB0mV_Ts?*2KN;7`#ofShtpwIEQ+T-+_%LFlPRIQwO^55??vR zGVBIxI&n)ScvD!jbZg1dTagY|I9d{g&>5KnPkFFu!bjd#=sptFS~nu%f$2SAW26Z> z$b~d+~Z9f;Kpt9D)--wsnkQjY$m# zv=m$<4tAk$IJNGOiMQqKw*4R9YEyEE{ap7~sf+cj$xzxc@*HX)?os;QMALr;5cjc) zFpgqNPKFG|nc$=!eN;NW(0I=*o`}UP(Vo%`BF&jyqkeICz!(7}6avrNo@%hXoulI4_I<+Wq#W|0A zq1`gWbwxk&egbj0ch?bnnoK16yH~BZSs>S)VTTQxamFi}dojJ-o-em*``Ead%sucq+IysTa%?fOM;f>{VidOmazQ}2_Li}j2TSD=(Oq^i-?&5 zl_?rm1jAZ=hC77|z_FMONVvoW^Gz?BKJ#znXF!wkvjo+nX3MQxQPc{$?)6+Q4Vg@B z4^vYQT1VCC7{nRYfIw)aBHwYn-f5t3e}9E?^I@1Q5;j?oTnlarxiwuZbSoyzUGuo6 zmPd!)Bi)N;crW6WnpTKb08zEIX#zV%ORvctbkvRjqFHU**p%PR;Ms~_-CE{e09A^P z59A`$&G*^**@G1=123U5*0$-9JS>wjvuOsl4{lQyYjt7?jTqR;WGWkFfa_(-t|JZM zv%5?ucmxpZRBihwh^VKl%2JfG;zx5k%bCC@M1a=@A*fI4Rk?EO4vL3rw^GH4r=|GG zt)Axy$jlrU4kh1dM@cK`>o^R`N8S5=(vy z#@N_dK+idT*@1tBZNK6)F*hd(Wv8bY4Zz3uY1|Wc{m=e}aKR8>E{we0Xb zbR6Rkas0`j(Q`S#J<;`sA4XAVhQdEQEfo|-5bQ6CW1gCd0q$ADMN1MhQ2CZ29*!>{-jdQa`VC+~TwE?Q2Pl zKovM>#0a@D#H?4>>Ua>rTfNRs)_r#;L29q-wxUng%M7^{*u8a$!;E(H=#QpN620Vo z3|)%E+}609KWjg-mP-|Zn49!woJO<@XrzM9iI)yTGlWmOs-|)`Xy|$vm#)Ah?7jy* ze1?nea5+HBmCKjsHfcDV`X=L@1M^VD7Amu5){Ri99~ws_ng}hrBzh%w#6qcAxt}Nr zfZSPMB8(@kDDFKHd6cBZ zsI~$_#eZU)(onPtshQLXVJe!ZS4(lG$$^qTE|jBeIW4WP)b)hX(NRcxxxQIiKN97j zqVg-TDb#j`iOe6{ElM$(%{)*G68@CQtkZP%?K+{w4W77PZ^N3y`(+ICO3h|dth&fR z-aX?TlYO*?FcyY*d@DE-BwSJ!O>Y=cA5G%lU^i=%g%Mg3^m$$SjjMq{WkXs;MMaj) zR&K$3o_ML$*eN^RWCcA`NdbjnZXRVJ1%S>TOSj(FS4X$&wDR!`3?^=?Yd2UfklUx9 zQ@1)~wi+tfS+UsWiOo!yG-E(-Srv3L&qw z4!JIp?ja3!AM#OOYkohET4>&gnhz>yHy55he{<-WXWN$2$j1E0w}&bDIjXXkXl6&W zm9-5>2~nhDUXlcrrcg=h&D=E9(O1bt6v)zt1&AwSX<=0rsc5{dvY7uVD}?zeG2-f{ zPF-OU-BQ}kB*9_blo_rfw!c2X2CbGUnxjVt&y|V1<*9cF&dL5AgR?G=<17t-TaG~^ zIYS2XAukWL@oE*AJ?KUm>BpG>(c*)LVOucS#uU>wI+AKzstF{_A|>>;iKbC*7*G+B z9q%QBf{}Qh5CTk>zb&<7LfFah+UW*vf0M8YX7uXzVL z+c{|a{XWhv0?SY-U99wGNYZ7cDjJ8ZFh=7j9CFhgirY!X6kzHimHNNn|OV_ zsN$ZZhDVA+3ri3miaTQg;}J4Pnwp=fxM#JhG{saA2sb`CZc-w&PJ^spWJkNf7)Cj?Sf>(tz z7A=`-&;%eS6inaJGUd$Dy4Qf?sT)iB5+k6z#^|$MSiIw%!q>~5HazwclCqE%50Cn) zNhk@quC~TPO1B2SxyXZ+ zosprX>gL~;)FjYH1gCzs8HQ+a^ok{+yg$v+k^R6mpUKB+G6B(uP-1`^UczZxQL@t7 zeA7MqYE`Gn0k$3FS&&y-yE(Gx@$ll2LSX#Oqy~W$g;~ZJYSDavM}Sl-SFh6d`I6Y7 zwYhin3MI0P3b-4#jcDc$p;L~a(*lPTN5y6@V3#8s!E$IN+fm11;BM;A4Xc%5-r|sO zSDWbE>#uSN8Iy?%+dSCT3acE5!vS`pwIb#850E1MonY#R@pI^eJ@SE(3 zJC?3}#ZY1FSuvfNeCk-3w2zcUdK>u(Sd}~xBe}JSnkBE3e~O%7zJ&|lGMqO$v=NRL z(8xO7rPUzjx~RqUF6?a`Q#Dp>{Bf%cxuhJ)M;S-8LiWyy0?|)JUkg%l0>c3%ZVDD) zEo&gjE;~Y=3U>jUY4r)?Hcb4Qdm2!SjKOYFO%Uow&_x?Adx8Mf-{2cBg>>K_z@8w_ zKS=t3bD9d>Qpxp!k$4vg!+fX22uUo#h{3&pfW_GDbyB_tXtX?Di7d4;RQZRk9=T#vMOHUJk~ zzN}M(-B=btPN}-^R{i%&MqR6dva)BjslGnxyV5gvjFS~&9O89Y^7Xu8bvW~B#v31g z-hKI=&^l76OReWC<1n-AZtEjw%U3r)a?!Tfu{xkwNzNoLFn${B=hBY{eR?4!5n?Ms z0)*0ws|ih{v%GqVX6X9n@=Z(G@ev+HezV>VEuP^>pm~3Dtf3B+Ow=oIl;YSAS6+^u z_~P4Fp@*-qhdk4R&f0f+`*=OaXOE!9%N>`Z_veo|UW9gzddg+i2em`oM42G2Si_BN zi!xGvvemUHN@MS=C)ip`nn+g(Eb8>ie$z6ebG6+=^`*&LL`=-sSga4u443+TK{xYt zGC8r*`{=CUQPZ(3-sMSdyHdz|l?F7$lrUo$Ex2Ckp~07Fa=4QNyO)z`bulnDw zNjo39NGvzyEOVb~q{yA0cz*A6^}Wk-*uNFiS~BBWmL>uYoHRUPOjpCFN~$G>sF%eBWQu;JZ>0f>okG8>Eo=EH&q6rDPaN&7nl5%eejBHmrdk{60m(H4X{1 zV{ip&%%!DPS(y^s)@{X^Fxtz3YoOhhHGi=aFU;?68G1VnRjAzenFX>VF_djkt%g_v zMc4>I6Yg`7H$EMB_MUk$$d;irSS#bLL;&@RP07~}3#Fo#m4LKiKtQB8@%jOI@dwXI z8v|OZR~qnSC^h2~zIik~y3%=TpXtpmR#G>4c*co&?|eg<`qnHcYYFbvTD-VyMuyNu z7F)$}<`75>^LE4dA7l8lVu<;@)uM+bpxM~q3PwkUJNpla$}BKA4LwbQALOQ&2Kla% z-_P$3F*{f*af%h%C?$RD->YV3xbS`~rIA=MlxdMAkNPz1>t6nP%f#JVgw&X^UnLh9 zGVwSpG5oTRPv`cflXuTccuRb<$M4mf(O6*mftg%)$^wxTX}CS?6ap4&BC}PBhUt6_ z9Q{2!Gv10Hq$##=1!^jM@Y7nd$rC5#l8(BdF9U&9T`2@7B?0+$_~nk^r6_7cQp}n( z^U|M>h|AI~c7*uiM-u&*vB!Jt{6>z&{qY+_|S#c)qOcSh| z1)tnz@CMoIQJ3%bfxrtF^|5aqBcy|>TMXVy8)@0hfGfmiHOl4qAdAlY;KDdYa{Fcn5N(Uzl31 zpO0r8IHF{$rZ1YxU)5oaeH4aABrdi~>nVO?KfmD2NBj-D4E?lDXqh(cqr z?v@`yymLYpVwi82*aJ_rvSVkIPRcv7juyg+a%^wbcdbQiHvz?X%vpyOYpp1 zYb0`Ubn8KB0kyVI59z%L%v8G?MR|X3!g8RnUaxMrXqkUv!El$>i^W|#6k^*2Yt;(km3HC zj373@wqip2ttwCiGUTogogur&Ah)Uyy((0MB*YWMkYQ_rR357h;Yu0=OUq!_>BaUz zVh5zp6w(5^BWF*!Cr9758{B3|@5#((iRd9lN%_3)LNIKz!;*(zfEsMBDP*-|Hkf8) zswu?6!4@jF#R8j82g`z=|9LbP9|nIR7DzyyUuqRkOnnVgIZs*ermorvd&(P1$UJv` zhM%?Tc$o(@0FXk6QeXH12tzC6sFhAJ|o(IBVGt~WDA@*Q9nws3JM*9oit=5 zK6-b*Y{(>$cpuLKEz0Lst@`I6#weM||S;!~9?I$1kYy2mWAZWn=pbfIL6df42YaNk1F=e*_>; zVCSzV{kn1EHUTWi{m1Tzq%9(1A6^w<2&a8Q3b~(LAOXk`;g|=~PC(msfGT**;ZK;~ z@@rS?X58+qv|xu{g7zWRtH#nphrOZ**8TI&bIUqGZ7cHEl{DC*aq%J6n)Z8bnPOjr zPve-9Ic%ytSa zO=|w75xUTZ{t-G#epb@Yt5qFjZANXBb0cXf`!mgBe%FFt2KL1TMDMz%f99N!O_n?)Y{7@l9qMuw(6JyZM=1ii&d1w{BMTj+0B0p z3ELA#`h%9SJ;Sy?N8~pxdxmnN#*T&#=5|iD4!?QWlSv+I{e;pqG?m5G#b1dUTY%M^ zmBBWSa<(?Mf8`MeTbo{bZ0iQl1~77PfB=ka zTmfe1hz6a6tXe3G6t~xvIsdE8b2XJZdTB*)8}SBpEI(u{RVd; zU^_`;b5k>?U)B7|qwHjCtp))7F8_QY`RB=VN1wzc#;kx}U{2}p5b@u;<#&b722Q`Q zqKbpF@w3Z*S%txl#=mgpe{#_?ViYk0JN)ScS@0j{K-PbFIS!ok7?dIp{Ux0Aow|NY@}AO4o| zFQ*;1xaOT|R zTTF}$X@q5M&9|zz=EK>{7`gq@4QjJxC0`x$oiTpC?Y0$HQ%~d_wdLdULz4rJ8LMrI z+6AuS5!rUz>o{9)4*iIsd)o!9po!veju?L7-X#<|_ipoj{no4fKI(O4qT_+;oQLZ0 zo2^o;OS=QAKt=#m@i%J&DqjC7`WKVthIIva*0TAMBn$!TsQl~JLu$;*O%4M#8WPf@{qmA)%JWAZzD*OO}XN?dfl z!F3!*3rD7G$U+l`1^Pa~AY!`IPdq`grQRc-d5hVjJ!? zM^+=aO#Hq<{9eKB0J3`aemTTfPu6;7t$!0h<<;P(?i7p0iZPie5^iMjF?}TXAC+GvGLf62`8(7adCac*RaGn#+9 zX^8ucfg{@2XgMPT`lyh2+QXdZ@$9ek|mRC^&z``C_m*7#mQF{9I*DR$+!jB8t~Gt5gS?q}O&>NP<7 z>Tntjrt%9_ z3-a2$gud+Ew;Az#I;am~NWn5fHB38v9$KRW@x;|V$dHh9mPe|Q)NSv8by4!~rl65>4Yd>bCj18bVXw0W6MwhenP+J?m z&GNWb*^Y}fnZ7&=vhSo%Kor@y`F2NQ1SF=my%cS4>}OjkE9WIY#Z{9|OPOUHFia-9 zZmYFSQj09j0F_?W$k?fFMMZo*<_oF-t`Ua7jeiRP+(#F&Pd$FCl3oJv8b%yMB01DS zkC~k)wH2%y-j4TDip)PqxbjI0T=_M&^Y<=8ev_Y5z6{Vfp?K~3qn5hVJcd+>%|%rv z#2%ldw^U$iDrMDKYffoS`Df2;XwK<%?+2z}NH!!wc9EWU+rlR+GIKJ zQA*^*I<={6#wx~=9WTA8vSBPRBa<`9UPrp3pLKxocq2yya^Y+o(T}^7&peQI2*V!k zU=MBH=R6){A@=m2&nTE11e=ZrRo5_&M@D%gW;lZmt*h2vW3yN$@6;zH?`cdVwgaOJTZp$`~}h`>+#JRnxSvyqXu=h zeZ?Xq)v&tCj*4sw8EN-e_awKydrk2RYuHq7@sO|Y8NaHo&X98TBG$E`aT5&sL4&=i zX&-5WC}z9_H@L2E2O?VU=?=294nTY{OnkRlZTf3GV4FxFd5}m^c0Nk%5W7OA-aZTbg_5 zx|Lp~7;Sc@XgH@qmUOuPOU`+%)h4tGl9lFOf1^peZ;#o=C4VFP;&>Y_N2~pzA+gj} z@sFUOJnNv5g!sEJwV7`Oc-6=6H_@*IWO2o+#@h zcDn3p=J^L5hhbTOG%!|8z9A7@ z0|O@sk4cAy^Vo{Bk9t)@3=b0Sgc;kUaFuD=G+PGy=1+02(A&p?JY_|0OGLOi0(+7E0yB%3YcNCx1LSvj_FU&jGqc;bHZR{^!LE<`aG%&9 zP6i>EXqzD*^nT>EA6*~xQM!Z-?|Hj{i_^U8!IPad^`kSaS(yE z@L2uH+Dn)pC(E5cR!wsG4?aK~Ao7CFl5?t~581p=9M{(}rnT(7?*sLo*OndMXxthp zVtj;l>J%D!$by|Sw@sTuI=bIPlE}@sc&=u0Mzu@gPKjOM8rEKedl~mydfLUl4*GqC zmU~d$s{o69o`XnqW5rC8{+N3`mF`yy1#e8;v;`|@%z0A!4LJRdXFWlxm_KF zqoaKHtBulk=P|T}mCe`W9x3n+=@BIRF=>L|M)+exg~=Ke-8=;SKl8}CnM^o*im%A` z3>@xe6lo=!W$8arlC^r9o#B|^!jqY_e!J~=$>WvHaq&<{8jjbvE94PM(m%zoz+~ti zabvY6vEL)vySjWF62m@$LTVOr&(%)sm1yRTxDj)=7d8@!=qnORI!VrKLB2r?p@|_` zlrgQJgd}8sC!Jzn5^!*#j1ltGyT^ihs=Fi{ik1?3# zI-UJ>Xjd{9+JkR3PM2w#+A#(WQwl#HAB)6P5(ScgVbMR}dG@t4FNNXuhm&7isIRKq zCfI*Wa2oWU6Irrbc5;9ohWYAE|DBr4{1vxwWr9gfCPP9@il`EHOLR@sAi2x3`ceEf zzmfY`Uxi_!1r5_oEupKxl)xLzT_@TAX)S*O#5(9RdlzW?^;Hq$+(mUF{`(tev_R@! z(uw2?g!3SnNm*VriT>TLd)`4%Vj~9C=m7oTS6_6`BW?>XRcRMzWgm`J0DN8~T3iJ(y>-7pYHh(5r``frsWKw z(4K!hZsvHW8_IUnG5RiO@1P^rOdxytYr8LpW!1fl^l{1ubDu^#XUnBR)1l`Q54d#XhTUq{aAZF=g4@{`(h@_LzbRISjo>h&hg2nu^ft#WsCBg% z(XegeOC(5CFVpGuar8)~)V#dq`Yby+cbN}*{}totNBO|z7eLwwE?2BM+VW*qw^Tg# z9qoW;7A7+z@}HN^e<#aI}4Cm!OY>5y~WJ!R0{*j ze($}(C-X^>y=e=$)}o&iFVib)szkgkiP?J0ztRZ9x|1SbJT{aPIXHV8YydHKi$3|P zZo&4EB4n7S$q5*(>(EWr;s!)nLH6fjN?9b7Z_*Dw&p+{CbAlIChj7YD&?tl*lUl3q zd_^?oaD^Q*O(bs*`kp!>zXCsQbVU_{IeNkxlI*|kCIR4O5|RMi1;9pgulLb{4E%7j zTx)Pae8+)1Oa64F{CWNSD5W z7s17ouKEqSYq9>kyS=F{+m}UH-Z}UxVyy)*Zlj^?E#5*yeT4O9m9q@pmnB$j?Ow)o zIV%}DtLlh!#fnktg*wGyPO&a(+f{eZ>?TDdLNt#6%q+>EIfW}t!ZOr%IE4xIb{gt= z;C*s^YeyDsOTTS>jp>gjmQE8NgJ-R)q>)~!R&CfrYul5Oyyf?QZS!u6;sd&XMWnwm z5y|F+0%Vy z^UFRVm1y^tNMn}L2hq1^nENB@sMwQc$ZM2^h!G8OCo;;K7!%=8_ducK6K&&mwwk02 zgQKCPpP8%+?#Kp9&x>g)w_d0|F$=Y?$YK_>BlU0Ig+S|5Y=RMT0<+WLL|08Vgb zp@TVt!5)r)7!W0PYyxkGFQh095|kTjlCfwZH2?;`#l|sJBcps*BbG_-v`_?=qA-|0 zjO0SeJYIji7O{{9$v6v2*JR}fiVV2}xuyH!f5d(zr@rRZR3eO&PFyI0k`$%FppvYM zK+V}jkI{0 z$dg(o2=G1bK!UE^dbDM7@nXOblQ;o5kRW)Vt1485(+W;>tpdFmZHx?As0S2iA;>Hx zdkiO4Xc!L@uRHS{05c!th&tl z+A+Ty()~;4IH$RPL*bsuz#kNjorRtAFAVp0(b}IB;U5~c|85JEZv3!K4**T<(hGLj z34O7jjxyf3Sei@kBInaR1hR-laX$ht(xW#%wRAqy0XOm`N>3ihbv%<)U9WXu@uJFs z%YzV{S~&?py2k3E_QFP@c18T!mYP^2Zsfk9jKz6nw9MS?G>-a-jCx%f?Ld;??XtOV7z|7W0HW{W4STeDl*`!r`0KX4p0Im{%5` zM{trpyc9iT?o6w(a^KV$?)>8GxFMO;m1;|@Nui*XL{}wF%vuLC)`*Vk>4aF_{~+_uWA#v4^?t`}z9B#zE-QcLZ_- znN8HTQF?Sz6pA9AicPpeuMrM};>2^r6~>`F`HP;AYXJ$$LIAlfqT5)H{~MV4-}vZ%3sip%(fr$&k6!Q3qYS=T$~(#R``d38^i>9&hQMQ|F98bOZyOsNCGbT4Dh>HWj#`TmJ$OQyFPe`2X>`W}|tlXSW zT?BGJwS}GaskhH`m4k_!gNx+}oBz$u%?@IE&hb?AZw3F3U;oh#763aJh>3-ji;bNf zzyV}q0)p6{8^z87WCC$;us?m`-|FDtWM|@L|J4^300$=<6E`P2=(*rO>>R9|e-`{# zX8YWiztes8f1|@?|DWq{S=qS%nc0r2SSzT^AEaN_#-lOhkGUHLW$39F1HL1~$-fpzA)1?ctR#y0B*7 zt(Uf~=lD(+vkg*?Prv5<;j+Zi!^x7iZLLq(B`+~=BYtz^+zf&rO2aVk!n$OwNzLnS z7kP>Chn|Lm@8gRm$l$}3HbcVf4#V4dFY2FGykB2w&zfa;cTcytnjZU6Bz08Z=cCn$ z73Gd1f9Cu&a|NKadk2K@>)Cg?VPJU zyRNc8>uH4v%G<}A2-F$@PXYVn4hG^)-Evp{G`7pE_yZkIfyWE6&B<{2NCBnQmIiP8 zS%7O@nD+=5IlswuU_D2!@&WolAHqJW*xM0bWv#sKEKjV_dr{FuzA@#z+{6>Ck}K}b zEAF4M=U)~`(7za-(#iH4;U(;m<>bcI_iOZphUyn5rerKum&Ek3+KWVrJEvihjcGIU z8yABwW({<{D(hh-zi%N2!1eNf2FFz_>@qoi$NKW!M)rY{bAU&}JJTuI)fz=?%no+Ko(adBZTh^gG)so@ zrxJav+h6)+)lb#(?oK6u{-P!+;#_@uO|wa&eHgfCgrFt(4pu8E+;Z}FFC(j>y`c>< z&*)sy)0#}kUZ|o6k(g@ym?H$=7n_Vrq}PNaK8{|1Hu=Dlcnzedq6LM7>03HWSOxB2 zw{r0JQJ<4+f7Eb9LW1u+ag`NhjE%Z=YEvz01FEFV;sGH&2f-jp?n`rIC5MV3pc z`eLWf?UqN}DtBm}jh~m^RK7a2EWdOQ-{sBi#kk_C7?8#7ez>jNdwgKK)dC|lUd`uA zw*!|z-WS!nc=w#9H^ry9g2;Un8Nx9>BCKRLREYSB0XO%ka$alwnP0e_20Dp#SH`-S z51X9Xl5OytQX|4}?#SPr!bDQ7^*cw|T-oW0^oC?Vb#;S=dWGvWE39<=y^}NI`unTu zFjwfSxG-0uCe}1bb@oW=4Qo;#v$b+$ouBKYT*w~Ny@YUjsNB2G%;G3t%z7h8+1EH| z+TTXX(6H{thpC5TURdx1XEuSpeTdLQwqJ>b7WvfL8HCHOgD?~eO{VKJ9&yh6e##xQ z68Y12Y-YIeMe~WMuF%~5)3dLdgqgLQI-=+vZG4tJi!aT@EmrV>!MU~ zDugB94(u9Z-WO^W`6Ay7x$k{Umakt+WO~C|nV^>eI?Bbw2cf5CO%!g~<3`)>8^Y<) z_eIp6ETctJ1I+NS*iVHU6Z9l|&LQDnAB%MQ?cBuga&z6ppI9MC=zy$xaso(r*nAQAT=vn3l5rys3YtElgV$zNYp#+@0Mzg2?)=DL?L%W6J%h1*7;&*wKdrY0+dR z-e%(+EEngz2Tm9<<>2Nl-G)N?X%dQn?`RULDzqxQ(qPIs6$DnL ze~^`(fvGa;rlM@A^W^IlbqOrgn~EEdURh++bw{P`!IaqNYQ+@p!R($+V!Z_UO$hkW?Nj_DvidIDY1{S@P*yliou_{G-#PEO`+LaHvXoyJc^j3n9 zsEQwQ1e332U#|+TbSn*&_6t)tRADuHeM{3&cv1QKeJkVOXZeB*Y9mKY7uFhQ?$vOH zH}uB!oeAH`-_Y}6vL>V(e~QIV#OE~HWqnO}9?!xB?>HrGE{{2d$5D<|Ol{&gy-01s z9hYmKI0ZX;NG%soAk#?rIU|`54)<;4a9{$e>C5;OUnK;TXl~gB{LU=I7A8zN%Br{z z%t!uFj#iMpgjM~`gC%5Rut?W~7!dZv*8qH396upd!bEBWOleVI^h;XU$3~gE$`^O@ zeUEdG5H{u%zk-Z^-qQ2yX7|Se7Rbr{*9H~#f1}TMy3hWn4J!Yn&xjqj=y}#>_#hI0 z5wV|kM2I|!?|GQ8LS0*MFflXA1BJBS8*w?Y_%Tv?3!H1-EOq?EV%>cc?b3}CC8U1@(C{8>Cih81UVBCzDO zjMIcim^@jt?|CLc+dWWcR!%V{KHTFNx5Depe5hh%qtGUofDQ+0-BFXSKzQ~XNjc7L z@L$7#U!M8Hi>$1FiCozKy>{RqkM;jr-v5?VsAwpNi2i>H1K9tn@lQp?|NAK5*Ba}e z7F~aTQ2vkT=%1qi5h>BS0 zXZvrd%Kxt?`UC@Njs=1|g{hF=^9@i4 zX*415Kk5tX?}IQAv2guf1eEvH6VZ~LkU(l%ruJ831)CGS%1?#Ke-920a9GI7?D}>C z#s{;?*=~kIc7BuNf7^8o@asY5XP**lX0kv#SgiHyJI|%v-#(kYDX{t2=zc0uIy1{t zT4;_<$Q$pYS$J-qip#;s6e%NoGNx(QPqUlL8unr+_qauPf)-hhAji8-b7ap&DESF~o9wqN<{{8p5~Uq&UZ7xS52kK$JFy6mka1Pv z`);6U5(4+}XJ3r;#|!EY+B%9&W?8hQtuvl5*lAabY3$v3SBjK*_)uFnX7A3(D^W)! zj?e7*(U-12bmGNPo6z zKF}WtUQL|4@y%F(a3v^M)VR_RT7i5xDR124n9h#j;SDd1k{hJZNHXnN5&42U zH-Hf0uJ!ZF_8y8M9AG)h%fVyFKn`jRDQyyDdaaU(d=+ZYX_e^DCl)v*=~niQ_y7lqeB$D7kiArh9RuE(lmxd zsgUa3?ajH5cMSievj@?Vm=fu*+3I`!8>Xay_ENG7UBsaFm>@&L&^}|s??B`mpU#ah zKLjG*N+Uh=k+0N_ZI1G$`rF`Mt*!}J+C`mmzG|tnm=2-r6cM`Iqp>w}{X(RgHet3| z{4$>@{1e2UK_OB)lR?8NYx&SMPXu;`k-6S`0EXWx5Y$|{zqR+>XsW1D^;7F`t*Vk* zV`1UpWKlRB)vU3W1gP1Ip4*0uYGA_zgeqWvPO(#HxVl>eE8sMc`w&@YzpIuvSd%<|{rgyl&t_=i-Hi7Jq zp=50GYp(kPnF2X}W2%40OP{B4WpfW>00-NzB2N?d(;i{~>+k7M$=3F1^Y8P-_j^uz zzKsCbe-qfhFL9{Gxlx3`ctPvOTyN(2*o8x>@Gvrqn?0+LkUlg(z3+u86B5gtms)ri z{-Jq{a5SNjm)isfhRiZW2zzP`08MrRK-LTuM9GZ}!NUTeh)D{*h#%uk;~sutJFcZW zyAswVBKww{H>4%Sy13D7WsO1$Nh4xR)2Sm@9qS%ir`L{7{MAC}>5B@3vpZ4-5kz_g p$M!7V4%?s5Zqr0H|LRLeC$NK)o5PcrfFN#e5F! + + + + + + +Life Events + + + + +tp_new + + +tp_new + + + + + +start->tp_new + + + + + + +    type call   + + + + + +tp_alloc + + +tp_alloc + + + + + +tp_new->tp_alloc + + + + + + +  direct call   + + + + + +tp_init + + +tp_init + + + + + +tp_new->tp_init + + + + + + + + +reachable + +reachable + + + +tp_init->reachable + + + + + + + + +reachable->tp_init + + + + + + + + +tp_traverse + + +tp_traverse + + + + + +reachable->tp_traverse + + + + + + +  not in a   +  cyclic   +  isolate   + + + + + +reachable->tp_traverse + + + + + + +  periodic   +  cyclic isolate    +  detection   + + + + + +finalized? + + +marked as +finalized? + + + + + +reachable->finalized? + + + + + + +  no refs   + + + + + +tp_finalize + + +tp_finalize + + + + + +reachable->tp_finalize + + + + + + +  resurrected   +  (maybe remove   +  finalized mark)   + + + + + +uncollectable + + +uncollectable +(leaked) + + + + + +reachable->uncollectable + + + + + + +  cyclic   +  isolate   +  (no GC   +  support)   + + + + + +tp_dealloc + + +tp_dealloc + + + + + +reachable->tp_dealloc + + + +  no refs + + + + + +tp_traverse->finalized? + + + + + + +  cyclic   +  isolate   + + + + + +finalized?->tp_finalize + + + + + + +  no (mark   +  as finalized)   + + + + + +tp_clear + + +tp_clear + + + + + +finalized?->tp_clear + + + + + + +  yes   + + + + + +tp_finalize->tp_clear + + + + + + +  no refs or    +  cyclic isolate   + + + + + +tp_finalize->tp_dealloc + + + + + + +  recommended +  call (see +  explanation) + + + + + +tp_finalize->tp_dealloc + + + + + + +   no refs   + + + + + +tp_clear->uncollectable + + + + + + +  cyclic   +  isolate   + + + + + +tp_clear->tp_dealloc + + + + + + +  no refs   + + + + + + +tp_free + + +tp_free + + + + + +tp_dealloc->tp_free + + + + + + +    direct call   + + + + + diff --git a/Doc/c-api/lifecycle.rst b/Doc/c-api/lifecycle.rst new file mode 100644 index 00000000000000..0e2ffc096caba7 --- /dev/null +++ b/Doc/c-api/lifecycle.rst @@ -0,0 +1,273 @@ +.. highlight:: c + +.. _life-cycle: + +Object Life Cycle +================= + +This section explains how a type's slots relate to each other throughout the +life of an object. It is not intended to be a complete canonical reference for +the slots; instead, refer to the slot-specific documentation in +:ref:`type-structs` for details about a particular slot. + + +Life Events +----------- + +The figure below illustrates the order of events that can occur throughout an +object's life. An arrow from *A* to *B* indicates that event *B* can occur +after event *A* has occurred, with the arrow's label indicating the condition +that must be true for *B* to occur after *A*. + +.. only:: html and not epub + + .. raw:: html + + + + .. raw:: html + :file: lifecycle.dot.svg + + .. raw:: html + + + +.. only:: epub or not (html or latex) + + .. image:: lifecycle.dot.svg + :align: center + :class: invert-in-dark-mode + :alt: Diagram showing events in an object's life. Explained in detail + below. + +.. only:: latex + + .. image:: lifecycle.dot.pdf + :align: center + :class: invert-in-dark-mode + :alt: Diagram showing events in an object's life. Explained in detail + below. + +.. container:: + :name: life-events-graph-description + + Explanation: + + * When a new object is constructed by calling its type: + + #. :c:member:`~PyTypeObject.tp_new` is called to create a new object. + #. :c:member:`~PyTypeObject.tp_alloc` is directly called by + :c:member:`~PyTypeObject.tp_new` to allocate the memory for the new + object. + #. :c:member:`~PyTypeObject.tp_init` initializes the newly created object. + :c:member:`!tp_init` can be called again to re-initialize an object, if + desired. The :c:member:`!tp_init` call can also be skipped entirely, + for example by Python code calling :py:meth:`~object.__new__`. + + * After :c:member:`!tp_init` completes, the object is ready to use. + * Some time after the last reference to an object is removed: + + #. If an object is not marked as *finalized*, it might be finalized by + marking it as *finalized* and calling its + :c:member:`~PyTypeObject.tp_finalize` function. Python does + *not* finalize an object when the last reference to it is deleted; use + :c:func:`PyObject_CallFinalizerFromDealloc` to ensure that + :c:member:`~PyTypeObject.tp_finalize` is always called. + #. If the object is marked as finalized, + :c:member:`~PyTypeObject.tp_clear` might be called by the garbage collector + to clear references held by the object. It is *not* called when the + object's reference count reaches zero. + #. :c:member:`~PyTypeObject.tp_dealloc` is called to destroy the object. + To avoid code duplication, :c:member:`~PyTypeObject.tp_dealloc` typically + calls into :c:member:`~PyTypeObject.tp_clear` to free up the object's + references. + #. When :c:member:`~PyTypeObject.tp_dealloc` finishes object destruction, + it directly calls :c:member:`~PyTypeObject.tp_free` (usually set to + :c:func:`PyObject_Free` or :c:func:`PyObject_GC_Del` automatically as + appropriate for the type) to deallocate the memory. + + * The :c:member:`~PyTypeObject.tp_finalize` function is permitted to add a + reference to the object if desired. If it does, the object is + *resurrected*, preventing its pending destruction. (Only + :c:member:`!tp_finalize` is allowed to resurrect an object; + :c:member:`~PyTypeObject.tp_clear` and + :c:member:`~PyTypeObject.tp_dealloc` cannot without calling into + :c:member:`!tp_finalize`.) Resurrecting an object may + or may not cause the object's *finalized* mark to be removed. Currently, + Python does not remove the *finalized* mark from a resurrected object if + it supports garbage collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` + flag is set) but does remove the mark if the object does not support + garbage collection; either or both of these behaviors may change in the + future. + * :c:member:`~PyTypeObject.tp_dealloc` can optionally call + :c:member:`~PyTypeObject.tp_finalize` via + :c:func:`PyObject_CallFinalizerFromDealloc` if it wishes to reuse that + code to help with object destruction. This is recommended because it + guarantees that :c:member:`!tp_finalize` is always called before + destruction. See the :c:member:`~PyTypeObject.tp_dealloc` documentation + for example code. + * If the object is a member of a :term:`cyclic isolate` and either + :c:member:`~PyTypeObject.tp_clear` fails to break the reference cycle or + the cyclic isolate is not detected (perhaps :func:`gc.disable` was called, + or the :c:macro:`Py_TPFLAGS_HAVE_GC` flag was erroneously omitted in one + of the involved types), the objects remain indefinitely uncollectable + (they "leak"). See :data:`gc.garbage`. + + If the object is marked as supporting garbage collection (the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`), the following events are also possible: + + * The garbage collector occasionally calls + :c:member:`~PyTypeObject.tp_traverse` to identify :term:`cyclic isolates + `. + * When the garbage collector discovers a :term:`cyclic isolate`, it + finalizes one of the objects in the group by marking it as *finalized* and + calling its :c:member:`~PyTypeObject.tp_finalize` function, if it has one. + This repeats until the cyclic isolate doesn't exist or all of the objects + have been finalized. + * :c:member:`~PyTypeObject.tp_finalize` is permitted to resurrect the object + by adding a reference from outside the :term:`cyclic isolate`. The new + reference causes the group of objects to no longer form a cyclic isolate + (the reference cycle may still exist, but if it does the objects are no + longer isolated). + * When the garbage collector discovers a :term:`cyclic isolate` and all of + the objects in the group have already been marked as *finalized*, the + garbage collector clears one or more of the uncleared objects in the group + (possibly concurrently) by calling each's + :c:member:`~PyTypeObject.tp_clear` function. This repeats as long as the + cyclic isolate still exists and not all of the objects have been cleared. + + +Cyclic Isolate Destruction +-------------------------- + +Listed below are the stages of life of a hypothetical :term:`cyclic isolate` +that continues to exist after each member object is finalized or cleared. It +is a memory leak if a cyclic isolate progresses through all of these stages; it should +vanish once all objects are cleared, if not sooner. A cyclic isolate can +vanish either because the reference cycle is broken or because the objects are +no longer isolated due to finalizer resurrection (see +:c:member:`~PyTypeObject.tp_finalize`). + +0. **Reachable** (not yet a cyclic isolate): All objects are in their normal, + reachable state. A reference cycle could exist, but an external reference + means the objects are not yet isolated. +#. **Unreachable but consistent:** The final reference from outside the cyclic + group of objects has been removed, causing the objects to become isolated + (thus a cyclic isolate is born). None of the group's objects have been + finalized or cleared yet. The cyclic isolate remains at this stage until + some future run of the garbage collector (not necessarily the next run + because the next run might not scan every object). +#. **Mix of finalized and not finalized:** Objects in a cyclic isolate are + finalized one at a time, which means that there is a period of time when the + cyclic isolate is composed of a mix of finalized and non-finalized objects. + Finalization order is unspecified, so it can appear random. A finalized + object must behave in a sane manner when non-finalized objects interact with + it, and a non-finalized object must be able to tolerate the finalization of + an arbitrary subset of its referents. +#. **All finalized:** All objects in a cyclic isolate are finalized before any + of them are cleared. +#. **Mix of finalized and cleared:** The objects can be cleared serially or + concurrently (but with the :term:`GIL` held); either way, some will finish + before others. A finalized object must be able to tolerate the clearing of + a subset of its referents. :pep:`442` calls this stage "cyclic trash". +#. **Leaked:** If a cyclic isolate still exists after all objects in the group + have been finalized and cleared, then the objects remain indefinitely + uncollectable (see :data:`gc.garbage`). It is a bug if a cyclic isolate + reaches this stage---it means the :c:member:`~PyTypeObject.tp_clear` methods + of the participating objects have failed to break the reference cycle as + required. + +If :c:member:`~PyTypeObject.tp_clear` did not exist, then Python would have no +way to safely break a reference cycle. Simply destroying an object in a cyclic +isolate would result in a dangling pointer, triggering undefined behavior when +an object referencing the destroyed object is itself destroyed. The clearing +step makes object destruction a two-phase process: first +:c:member:`~PyTypeObject.tp_clear` is called to partially destroy the objects +enough to detangle them from each other, then +:c:member:`~PyTypeObject.tp_dealloc` is called to complete the destruction. + +Unlike clearing, finalization is not a phase of destruction. A finalized +object must still behave properly by continuing to fulfill its design +contracts. An object's finalizer is allowed to execute arbitrary Python code, +and is even allowed to prevent the impending destruction by adding a reference. +The finalizer is only related to destruction by call order---if it runs, it runs +before destruction, which starts with :c:member:`~PyTypeObject.tp_clear` (if +called) and concludes with :c:member:`~PyTypeObject.tp_dealloc`. + +The finalization step is not necessary to safely reclaim the objects in a +cyclic isolate, but its existence makes it easier to design types that behave +in a sane manner when objects are cleared. Clearing an object might +necessarily leave it in a broken, partially destroyed state---it might be +unsafe to call any of the cleared object's methods or access any of its +attributes. With finalization, only finalized objects can possibly interact +with cleared objects; non-finalized objects are guaranteed to interact with +only non-cleared (but potentially finalized) objects. + +To summarize the possible interactions: + +* A non-finalized object might have references to or from non-finalized and + finalized objects, but not to or from cleared objects. +* A finalized object might have references to or from non-finalized, finalized, + and cleared objects. +* A cleared object might have references to or from finalized and cleared + objects, but not to or from non-finalized objects. + +Without any reference cycles, an object can be simply destroyed once its last +reference is deleted; the finalization and clearing steps are not necessary to +safely reclaim unused objects. However, it can be useful to automatically call +:c:member:`~PyTypeObject.tp_finalize` and :c:member:`~PyTypeObject.tp_clear` +before destruction anyway because type design is simplified when all objects +always experience the same series of events regardless of whether they +participated in a cyclic isolate. Python currently only calls +:c:member:`~PyTypeObject.tp_finalize` and :c:member:`~PyTypeObject.tp_clear` as +needed to destroy a cyclic isolate; this may change in a future version. + + +Functions +--------- + +To allocate and free memory, see :ref:`allocating-objects`. + + +.. c:function:: void PyObject_CallFinalizer(PyObject *op) + + Finalizes the object as described in :c:member:`~PyTypeObject.tp_finalize`. + Call this function (or :c:func:`PyObject_CallFinalizerFromDealloc`) instead + of calling :c:member:`~PyTypeObject.tp_finalize` directly because this + function may deduplicate multiple calls to :c:member:`!tp_finalize`. + Currently, calls are only deduplicated if the type supports garbage + collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set); this may + change in the future. + + +.. c:function:: int PyObject_CallFinalizerFromDealloc(PyObject *op) + + Same as :c:func:`PyObject_CallFinalizer` but meant to be called at the + beginning of the object's destructor (:c:member:`~PyTypeObject.tp_dealloc`). + There must not be any references to the object. If the object's finalizer + resurrects the object, this function returns -1; no further destruction + should happen. Otherwise, this function returns 0 and destruction can + continue normally. + + .. seealso:: + + :c:member:`~PyTypeObject.tp_dealloc` for example code. diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index 64ae35daa703b8..61fa49f8681cce 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -376,6 +376,24 @@ The :ref:`default object allocator ` uses the If *p* is ``NULL``, no operation is performed. + Do not call this directly to free an object's memory; call the type's + :c:member:`~PyTypeObject.tp_free` slot instead. + + Do not use this for memory allocated by :c:macro:`PyObject_GC_New` or + :c:macro:`PyObject_GC_NewVar`; use :c:func:`PyObject_GC_Del` instead. + + .. seealso:: + + * :c:func:`PyObject_GC_Del` is the equivalent of this function for memory + allocated by types that support garbage collection. + * :c:func:`PyObject_Malloc` + * :c:func:`PyObject_Realloc` + * :c:func:`PyObject_Calloc` + * :c:macro:`PyObject_New` + * :c:macro:`PyObject_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_free` + .. _default-memory-allocators: diff --git a/Doc/c-api/objimpl.rst b/Doc/c-api/objimpl.rst index 8bd8c107c98bdf..83de4248039949 100644 --- a/Doc/c-api/objimpl.rst +++ b/Doc/c-api/objimpl.rst @@ -12,6 +12,7 @@ object types. .. toctree:: allocation.rst + lifecycle.rst structures.rst typeobj.rst gcsupport.rst diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst index ec2867b0ce09ba..2176b8e492f306 100644 --- a/Doc/c-api/type.rst +++ b/Doc/c-api/type.rst @@ -151,14 +151,29 @@ Type Objects .. c:function:: PyObject* PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) - Generic handler for the :c:member:`~PyTypeObject.tp_alloc` slot of a type object. Use - Python's default memory allocation mechanism to allocate a new instance and - initialize all its contents to ``NULL``. + Generic handler for the :c:member:`~PyTypeObject.tp_alloc` slot of a type + object. Uses Python's default memory allocation mechanism to allocate memory + for a new instance, zeros the memory, then initializes the memory as if by + calling :c:func:`PyObject_Init` or :c:func:`PyObject_InitVar`. + + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + For types that support garbage collection (i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set), this function behaves like + :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar` (except the + memory is guaranteed to be zeroed before initialization), and should be + paired with :c:func:`PyObject_GC_Del` in :c:member:`~PyTypeObject.tp_free`. + Otherwise, it behaves like :c:macro:`PyObject_New` or + :c:macro:`PyObject_NewVar` (except the memory is guaranteed to be zeroed + before initialization) and should be paired with :c:func:`PyObject_Free` in + :c:member:`~PyTypeObject.tp_free`. .. c:function:: PyObject* PyType_GenericNew(PyTypeObject *type, PyObject *args, PyObject *kwds) - Generic handler for the :c:member:`~PyTypeObject.tp_new` slot of a type object. Create a - new instance using the type's :c:member:`~PyTypeObject.tp_alloc` slot. + Generic handler for the :c:member:`~PyTypeObject.tp_new` slot of a type + object. Creates a new instance using the type's + :c:member:`~PyTypeObject.tp_alloc` slot and returns the resulting object. .. c:function:: int PyType_Ready(PyTypeObject *type) diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 3b9f07778d5ace..64353a8daca5f6 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -676,77 +676,122 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_dealloc - A pointer to the instance destructor function. This function must be defined - unless the type guarantees that its instances will never be deallocated (as is - the case for the singletons ``None`` and ``Ellipsis``). The function signature is:: + A pointer to the instance destructor function. The function signature is:: void tp_dealloc(PyObject *self); - The destructor function is called by the :c:func:`Py_DECREF` and - :c:func:`Py_XDECREF` macros when the new reference count is zero. At this point, - the instance is still in existence, but there are no references to it. The - destructor function should free all references which the instance owns, free all - memory buffers owned by the instance (using the freeing function corresponding - to the allocation function used to allocate the buffer), and call the type's - :c:member:`~PyTypeObject.tp_free` function. If the type is not subtypable - (doesn't have the :c:macro:`Py_TPFLAGS_BASETYPE` flag bit set), it is - permissible to call the object deallocator directly instead of via - :c:member:`~PyTypeObject.tp_free`. The object deallocator should be the one used to allocate the - instance; this is normally :c:func:`PyObject_Free` if the instance was allocated - using :c:macro:`PyObject_New` or :c:macro:`PyObject_NewVar`, or - :c:func:`PyObject_GC_Del` if the instance was allocated using - :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar`. - - If the type supports garbage collection (has the :c:macro:`Py_TPFLAGS_HAVE_GC` - flag bit set), the destructor should call :c:func:`PyObject_GC_UnTrack` + The destructor function should remove all references which the instance owns + (e.g., call :c:func:`Py_CLEAR`), free all memory buffers owned by the + instance, and call the type's :c:member:`~PyTypeObject.tp_free` function to + free the object itself. + + No guarantees are made about when an object is destroyed, except: + + * Python will destroy an object immediately or some time after the final + reference to the object is deleted, unless its finalizer + (:c:member:`~PyTypeObject.tp_finalize`) subsequently resurrects the + object. + * An object will not be destroyed while it is being automatically finalized + (:c:member:`~PyTypeObject.tp_finalize`) or automatically cleared + (:c:member:`~PyTypeObject.tp_clear`). + + CPython currently destroys an object immediately from :c:func:`Py_DECREF` + when the new reference count is zero, but this may change in a future + version. + + It is recommended to call :c:func:`PyObject_CallFinalizerFromDealloc` at the + beginning of :c:member:`!tp_dealloc` to guarantee that the object is always + finalized before destruction. + + If the type supports garbage collection (the :c:macro:`Py_TPFLAGS_HAVE_GC` + flag is set), the destructor should call :c:func:`PyObject_GC_UnTrack` before clearing any member fields. - .. code-block:: c + It is permissible to call :c:member:`~PyTypeObject.tp_clear` from + :c:member:`!tp_dealloc` to reduce code duplication and to guarantee that the + object is always cleared before destruction. Beware that + :c:member:`!tp_clear` might have already been called. - static void - foo_dealloc(PyObject *op) - { + If the type is heap allocated (:c:macro:`Py_TPFLAGS_HEAPTYPE`), the + deallocator should release the owned reference to its type object (via + :c:func:`Py_DECREF`) after calling the type deallocator. See the example + code below.:: + + static void + foo_dealloc(PyObject *op) + { foo_object *self = (foo_object *) op; PyObject_GC_UnTrack(self); Py_CLEAR(self->ref); Py_TYPE(self)->tp_free(self); - } + } + + :c:member:`!tp_dealloc` must leave the exception status unchanged. If it + needs to call something that might raise an exception, the exception state + must be backed up first and restored later (after logging any exceptions + with :c:func:`PyErr_WriteUnraisable`). + + Example:: - Finally, if the type is heap allocated (:c:macro:`Py_TPFLAGS_HEAPTYPE`), the - deallocator should release the owned reference to its type object - (via :c:func:`Py_DECREF`) after - calling the type deallocator. In order to avoid dangling pointers, the - recommended way to achieve this is: + static void + foo_dealloc(PyObject *self) + { + PyObject *exc = PyErr_GetRaisedException(); - .. code-block:: c + if (PyObject_CallFinalizerFromDealloc(self) < 0) { + // self was resurrected. + goto done; + } - static void - foo_dealloc(PyObject *op) - { - PyTypeObject *tp = Py_TYPE(op); - // free references and buffers here - tp->tp_free(op); - Py_DECREF(tp); - } + PyTypeObject *tp = Py_TYPE(self); - .. warning:: + if (tp->tp_flags & Py_TPFLAGS_HAVE_GC) { + PyObject_GC_UnTrack(self); + } - In a garbage collected Python, :c:member:`!tp_dealloc` may be called from - any Python thread, not just the thread which created the object (if the - object becomes part of a refcount cycle, that cycle might be collected by - a garbage collection on any thread). This is not a problem for Python - API calls, since the thread on which :c:member:`!tp_dealloc` is called - with an :term:`attached thread state`. However, if the object being - destroyed in turn destroys objects from some other C or C++ library, care - should be taken to ensure that destroying those objects on the thread - which called :c:member:`!tp_dealloc` will not violate any assumptions of - the library. + // Optional, but convenient to avoid code duplication. + if (tp->tp_clear && tp->tp_clear(self) < 0) { + PyErr_WriteUnraisable(self); + } + + // Any additional destruction goes here. + + tp->tp_free(self); + self = NULL; // In case PyErr_WriteUnraisable() is called below. + + if (tp->tp_flags & Py_TPFLAGS_HEAPTYPE) { + Py_CLEAR(tp); + } + + done: + // Optional, if something was called that might have raised an + // exception. + if (PyErr_Occurred()) { + PyErr_WriteUnraisable(self); + } + PyErr_SetRaisedException(exc); + } + + :c:member:`!tp_dealloc` may be called from + any Python thread, not just the thread which created the object (if the + object becomes part of a refcount cycle, that cycle might be collected by + a garbage collection on any thread). This is not a problem for Python + API calls, since the thread on which :c:member:`!tp_dealloc` is called + with an :term:`attached thread state`. However, if the object being + destroyed in turn destroys objects from some other C library, care + should be taken to ensure that destroying those objects on the thread + which called :c:member:`!tp_dealloc` will not violate any assumptions of + the library. **Inheritance:** This field is inherited by subtypes. + .. seealso:: + + :ref:`life-cycle` for details about how this slot relates to other slots. + .. c:member:: Py_ssize_t PyTypeObject.tp_vectorcall_offset @@ -1137,11 +1182,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:macro:: Py_TPFLAGS_HAVE_GC This bit is set when the object supports garbage collection. If this bit - is set, instances must be created using :c:macro:`PyObject_GC_New` and - destroyed using :c:func:`PyObject_GC_Del`. More information in section - :ref:`supporting-cycle-detection`. This bit also implies that the - GC-related fields :c:member:`~PyTypeObject.tp_traverse` and :c:member:`~PyTypeObject.tp_clear` are present in - the type object. + is set, memory for new instances (see :c:member:`~PyTypeObject.tp_alloc`) + must be allocated using :c:macro:`PyObject_GC_New` or + :c:func:`PyType_GenericAlloc` and deallocated (see + :c:member:`~PyTypeObject.tp_free`) using :c:func:`PyObject_GC_Del`. More + information in section :ref:`supporting-cycle-detection`. **Inheritance:** @@ -1478,6 +1523,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) heap-allocated superclass). If they do not, the type object may not be garbage-collected. + .. note:: + + The :c:member:`~PyTypeObject.tp_traverse` function can be called from any + thread. + .. versionchanged:: 3.9 Heap-allocated types are expected to visit ``Py_TYPE(self)`` in @@ -1497,20 +1547,101 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: inquiry PyTypeObject.tp_clear - An optional pointer to a clear function for the garbage collector. This is only - used if the :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit is set. The signature is:: + An optional pointer to a clear function. The signature is:: int tp_clear(PyObject *); - The :c:member:`~PyTypeObject.tp_clear` member function is used to break reference cycles in cyclic - garbage detected by the garbage collector. Taken together, all :c:member:`~PyTypeObject.tp_clear` - functions in the system must combine to break all reference cycles. This is - subtle, and if in any doubt supply a :c:member:`~PyTypeObject.tp_clear` function. For example, - the tuple type does not implement a :c:member:`~PyTypeObject.tp_clear` function, because it's - possible to prove that no reference cycle can be composed entirely of tuples. - Therefore the :c:member:`~PyTypeObject.tp_clear` functions of other types must be sufficient to - break any cycle containing a tuple. This isn't immediately obvious, and there's - rarely a good reason to avoid implementing :c:member:`~PyTypeObject.tp_clear`. + The purpose of this function is to break reference cycles that are causing a + :term:`cyclic isolate` so that the objects can be safely destroyed. A + cleared object is a partially destroyed object; the object is not obligated + to satisfy design invariants held during normal use. + + :c:member:`!tp_clear` does not need to delete references to objects that + can't participate in reference cycles, such as Python strings or Python + integers. However, it may be convenient to clear all references, and write + the type's :c:member:`~PyTypeObject.tp_dealloc` function to invoke + :c:member:`!tp_clear` to avoid code duplication. (Beware that + :c:member:`!tp_clear` might have already been called. Prefer calling + idempotent functions like :c:func:`Py_CLEAR`.) + + Any non-trivial cleanup should be performed in + :c:member:`~PyTypeObject.tp_finalize` instead of :c:member:`!tp_clear`. + + .. note:: + + If :c:member:`!tp_clear` fails to break a reference cycle then the + objects in the :term:`cyclic isolate` may remain indefinitely + uncollectable ("leak"). See :data:`gc.garbage`. + + .. note:: + + Referents (direct and indirect) might have already been cleared; they are + not guaranteed to be in a consistent state. + + .. note:: + + The :c:member:`~PyTypeObject.tp_clear` function can be called from any + thread. + + .. note:: + + An object is not guaranteed to be automatically cleared before its + destructor (:c:member:`~PyTypeObject.tp_dealloc`) is called. + + This function differs from the destructor + (:c:member:`~PyTypeObject.tp_dealloc`) in the following ways: + + * The purpose of clearing an object is to remove references to other objects + that might participate in a reference cycle. The purpose of the + destructor, on the other hand, is a superset: it must release *all* + resources it owns, including references to objects that cannot participate + in a reference cycle (e.g., integers) as well as the object's own memory + (by calling :c:member:`~PyTypeObject.tp_free`). + * When :c:member:`!tp_clear` is called, other objects might still hold + references to the object being cleared. Because of this, + :c:member:`!tp_clear` must not deallocate the object's own memory + (:c:member:`~PyTypeObject.tp_free`). The destructor, on the other hand, + is only called when no (strong) references exist, and as such, must + safely destroy the object itself by deallocating it. + * :c:member:`!tp_clear` might never be automatically called. An object's + destructor, on the other hand, will be automatically called some time + after the object becomes unreachable (i.e., either there are no references + to the object or the object is a member of a :term:`cyclic isolate`). + + No guarantees are made about when, if, or how often Python automatically + clears an object, except: + + * Python will not automatically clear an object if it is reachable, i.e., + there is a reference to it and it is not a member of a :term:`cyclic + isolate`. + * Python will not automatically clear an object if it has not been + automatically finalized (see :c:member:`~PyTypeObject.tp_finalize`). (If + the finalizer resurrected the object, the object may or may not be + automatically finalized again before it is cleared.) + * If an object is a member of a :term:`cyclic isolate`, Python will not + automatically clear it if any member of the cyclic isolate has not yet + been automatically finalized (:c:member:`~PyTypeObject.tp_finalize`). + * Python will not destroy an object until after any automatic calls to its + :c:member:`!tp_clear` function have returned. This ensures that the act + of breaking a reference cycle does not invalidate the ``self`` pointer + while :c:member:`!tp_clear` is still executing. + * Python will not automatically call :c:member:`!tp_clear` multiple times + concurrently. + + CPython currently only automatically clears objects as needed to break + reference cycles in a :term:`cyclic isolate`, but future versions might + clear objects regularly before their destruction. + + Taken together, all :c:member:`~PyTypeObject.tp_clear` functions in the + system must combine to break all reference cycles. This is subtle, and if + in any doubt supply a :c:member:`~PyTypeObject.tp_clear` function. For + example, the tuple type does not implement a + :c:member:`~PyTypeObject.tp_clear` function, because it's possible to prove + that no reference cycle can be composed entirely of tuples. Therefore the + :c:member:`~PyTypeObject.tp_clear` functions of other types are responsible + for breaking any cycle containing a tuple. This isn't immediately obvious, + and there's rarely a good reason to avoid implementing + :c:member:`~PyTypeObject.tp_clear`. Implementations of :c:member:`~PyTypeObject.tp_clear` should drop the instance's references to those of its members that may be Python objects, and set its pointers to those @@ -1545,18 +1676,6 @@ and :c:data:`PyType_Type` effectively act as defaults.) PyObject_ClearManagedDict((PyObject*)self); - Note that :c:member:`~PyTypeObject.tp_clear` is not *always* called - before an instance is deallocated. For example, when reference counting - is enough to determine that an object is no longer used, the cyclic garbage - collector is not involved and :c:member:`~PyTypeObject.tp_dealloc` is - called directly. - - Because the goal of :c:member:`~PyTypeObject.tp_clear` functions is to break reference cycles, - it's not necessary to clear contained objects like Python strings or Python - integers, which can't participate in reference cycles. On the other hand, it may - be convenient to clear all contained Python objects, and write the type's - :c:member:`~PyTypeObject.tp_dealloc` function to invoke :c:member:`~PyTypeObject.tp_clear`. - More information about Python's garbage collection scheme can be found in section :ref:`supporting-cycle-detection`. @@ -1569,6 +1688,10 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:member:`~PyTypeObject.tp_clear` are all inherited from the base type if they are all zero in the subtype. + .. seealso:: + + :ref:`life-cycle` for details about how this slot relates to other slots. + .. c:member:: richcmpfunc PyTypeObject.tp_richcompare @@ -1945,18 +2068,17 @@ and :c:data:`PyType_Type` effectively act as defaults.) **Inheritance:** - This field is inherited by static subtypes, but not by dynamic - subtypes (subtypes created by a class statement). + Static subtypes inherit this slot, which will be + :c:func:`PyType_GenericAlloc` if inherited from :class:`object`. + + :ref:`Heap subtypes ` do not inherit this slot. **Default:** - For dynamic subtypes, this field is always set to - :c:func:`PyType_GenericAlloc`, to force a standard heap - allocation strategy. + For heap subtypes, this field is always set to + :c:func:`PyType_GenericAlloc`. - For static subtypes, :c:data:`PyBaseObject_Type` uses - :c:func:`PyType_GenericAlloc`. That is the recommended value - for all statically defined types. + For static subtypes, this slot is inherited (see above). .. c:member:: newfunc PyTypeObject.tp_new @@ -2004,20 +2126,27 @@ and :c:data:`PyType_Type` effectively act as defaults.) void tp_free(void *self); - An initializer that is compatible with this signature is :c:func:`PyObject_Free`. + This function must free the memory allocated by + :c:member:`~PyTypeObject.tp_alloc`. **Inheritance:** - This field is inherited by static subtypes, but not by dynamic - subtypes (subtypes created by a class statement) + Static subtypes inherit this slot, which will be :c:func:`PyObject_Free` if + inherited from :class:`object`. Exception: If the type supports garbage + collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`) and it would inherit + :c:func:`PyObject_Free`, then this slot is not inherited but instead defaults + to :c:func:`PyObject_GC_Del`. + + :ref:`Heap subtypes ` do not inherit this slot. **Default:** - In dynamic subtypes, this field is set to a deallocator suitable to - match :c:func:`PyType_GenericAlloc` and the value of the - :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit. + For :ref:`heap subtypes `, this slot defaults to a deallocator suitable to match + :c:func:`PyType_GenericAlloc` and the value of the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag. - For static subtypes, :c:data:`PyBaseObject_Type` uses :c:func:`PyObject_Free`. + For static subtypes, this slot is inherited (see above). .. c:member:: inquiry PyTypeObject.tp_is_gc @@ -2144,29 +2273,138 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_finalize - An optional pointer to an instance finalization function. Its signature is:: + An optional pointer to an instance finalization function. This is the C + implementation of the :meth:`~object.__del__` special method. Its signature + is:: void tp_finalize(PyObject *self); - If :c:member:`~PyTypeObject.tp_finalize` is set, the interpreter calls it once when - finalizing an instance. It is called either from the garbage - collector (if the instance is part of an isolated reference cycle) or - just before the object is deallocated. Either way, it is guaranteed - to be called before attempting to break reference cycles, ensuring - that it finds the object in a sane state. + The primary purpose of finalization is to perform any non-trivial cleanup + that must be performed before the object is destroyed, while the object and + any other objects it directly or indirectly references are still in a + consistent state. The finalizer is allowed to execute + arbitrary Python code. + + Before Python automatically finalizes an object, some of the object's direct + or indirect referents might have themselves been automatically finalized. + However, none of the referents will have been automatically cleared + (:c:member:`~PyTypeObject.tp_clear`) yet. + + Other non-finalized objects might still be using a finalized object, so the + finalizer must leave the object in a sane state (e.g., invariants are still + met). + + .. note:: + + After Python automatically finalizes an object, Python might start + automatically clearing (:c:member:`~PyTypeObject.tp_clear`) the object + and its referents (direct and indirect). Cleared objects are not + guaranteed to be in a consistent state; a finalized object must be able + to tolerate cleared referents. + + .. note:: + + An object is not guaranteed to be automatically finalized before its + destructor (:c:member:`~PyTypeObject.tp_dealloc`) is called. It is + recommended to call :c:func:`PyObject_CallFinalizerFromDealloc` at the + beginning of :c:member:`!tp_dealloc` to guarantee that the object is + always finalized before destruction. + + .. note:: + + The :c:member:`~PyTypeObject.tp_finalize` function can be called from any + thread, although the :term:`GIL` will be held. - :c:member:`~PyTypeObject.tp_finalize` should not mutate the current exception status; - therefore, a recommended way to write a non-trivial finalizer is:: + .. note:: + + The :c:member:`!tp_finalize` function can be called during shutdown, + after some global variables have been deleted. See the documentation of + the :meth:`~object.__del__` method for details. + + When Python finalizes an object, it behaves like the following algorithm: + + #. Python might mark the object as *finalized*. Currently, Python always + marks objects whose type supports garbage collection (i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`) and never marks other types of + objects; this might change in a future version. + #. If the object is not marked as *finalized* and its + :c:member:`!tp_finalize` finalizer function is non-``NULL``, the + finalizer function is called. + #. If the finalizer function was called and the finalizer made the object + reachable (i.e., there is a reference to the object and it is not a + member of a :term:`cyclic isolate`), then the finalizer is said to have + *resurrected* the object. It is unspecified whether the finalizer can + also resurrect the object by adding a new reference to the object that + does not make it reachable, i.e., the object is (still) a member of a + cyclic isolate. + #. If the finalizer resurrected the object, the object's pending destruction + is canceled and the object's *finalized* mark might be removed if + present. Currently, Python never removes the *finalized* mark; this + might change in a future version. + + *Automatic finalization* refers to any finalization performed by Python + except via calls to :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc`. No guarantees are made about + when, if, or how often an object is automatically finalized, except: + + * Python will not automatically finalize an object if it is reachable, i.e., + there is a reference to it and it is not a member of a :term:`cyclic + isolate`. + * Python will not automatically finalize an object if finalizing it would + not mark the object as *finalized*. Currently, this applies to objects + whose type does not support garbage collection, i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is not set. Such objects can still be + manually finalized by calling :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc`. + * Python will not automatically finalize any two members of a :term:`cyclic + isolate` concurrently. + * Python will not automatically finalize an object after it has + automatically cleared (:c:member:`~PyTypeObject.tp_clear`) the object. + * If an object is a member of a :term:`cyclic isolate`, Python will not + automatically finalize it after automatically clearing (see + :c:member:`~PyTypeObject.tp_clear`) any other member. + * Python will automatically finalize every member of a :term:`cyclic + isolate` before it automatically clears (see + :c:member:`~PyTypeObject.tp_clear`) any of them. + * If Python is going to automatically clear an object + (:c:member:`~PyTypeObject.tp_clear`), it will automatically finalize the + object first. + + Python currently only automatically finalizes objects that are members of a + :term:`cyclic isolate`, but future versions might finalize objects regularly + before their destruction. + + To manually finalize an object, do not call this function directly; call + :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc` instead. + + :c:member:`~PyTypeObject.tp_finalize` should leave the current exception + status unchanged. The recommended way to write a non-trivial finalizer is + to back up the exception at the beginning by calling + :c:func:`PyErr_GetRaisedException` and restore the exception at the end by + calling :c:func:`PyErr_SetRaisedException`. If an exception is encountered + in the middle of the finalizer, log and clear it with + :c:func:`PyErr_WriteUnraisable` or :c:func:`PyErr_FormatUnraisable`. For + example:: static void - local_finalize(PyObject *self) + foo_finalize(PyObject *self) { - /* Save the current exception, if any. */ + // Save the current exception, if any. PyObject *exc = PyErr_GetRaisedException(); - /* ... */ + // ... - /* Restore the saved exception. */ + if (do_something_that_might_raise() != success_indicator) { + PyErr_WriteUnraisable(self); + goto done; + } + + done: + // Restore the saved exception. This silently discards any exception + // raised above, so be sure to call PyErr_WriteUnraisable first if + // necessary. PyErr_SetRaisedException(exc); } @@ -2182,7 +2420,13 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:macro:`Py_TPFLAGS_HAVE_FINALIZE` flags bit in order for this field to be used. This is no longer required. - .. seealso:: "Safe object finalization" (:pep:`442`) + .. seealso:: + + * :pep:`442`: "Safe object finalization" + * :ref:`life-cycle` for details about how this slot relates to other + slots. + * :c:func:`PyObject_CallFinalizer` + * :c:func:`PyObject_CallFinalizerFromDealloc` .. c:member:: vectorcallfunc PyTypeObject.tp_vectorcall diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 0b26e18efd7f1b..c5c7994f1262a9 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -355,6 +355,12 @@ Glossary tasks (see :mod:`asyncio`) associate each task with a context which becomes the current context whenever the task starts or resumes execution. + cyclic isolate + A subgroup of one or more objects that reference each other in a reference + cycle, but are not referenced by objects outside the group. The goal of + the :term:`cyclic garbage collector ` is to identify these groups and break the reference + cycles so that the memory can be reclaimed. + decorator A function returning another function, usually applied as a function transformation using the ``@wrapper`` syntax. Common examples for From 5ab66a882d1b5e44ec50b25df116ab209d65863f Mon Sep 17 00:00:00 2001 From: Bas Bloemsaat Date: Tue, 20 May 2025 09:14:02 -0400 Subject: [PATCH 231/989] gh-62824: Add alias for iso-8859-8-i which is the same as iso-8859-8 (gh-134306) Co-authored-by: David Goncalves Co-authored-by: Oleg Iarygin --- Lib/encodings/aliases.py | 2 ++ Lib/test/test_codecs.py | 8 ++++++++ .../Library/2023-02-13-21-56-38.gh-issue-62824.CBZzX3.rst | 1 + 3 files changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-02-13-21-56-38.gh-issue-62824.CBZzX3.rst diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index a94bb270671e37..4ecb6b6e297a13 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -405,6 +405,8 @@ 'iso_8859_8' : 'iso8859_8', 'iso_8859_8_1988' : 'iso8859_8', 'iso_ir_138' : 'iso8859_8', + 'iso_8859_8_i' : 'iso8859_8', + 'iso_8859_8_e' : 'iso8859_8', # iso8859_9 codec 'csisolatin5' : 'iso8859_9', diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index d42270da15ee32..1d6136405f428c 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3,6 +3,7 @@ import copy import io import pickle +import os import sys import unittest import encodings @@ -3107,6 +3108,13 @@ def test_aliases(self): info = codecs.lookup(alias) self.assertEqual(info.name, expected_name) + def test_alias_modules_exist(self): + encodings_dir = os.path.dirname(encodings.__file__) + for value in encodings.aliases.aliases.values(): + codec_file = os.path.join(encodings_dir, value + ".py") + self.assertTrue(os.path.isfile(codec_file), + "Codec file not found: " + codec_file) + def test_quopri_stateless(self): # Should encode with quotetabs=True encoded = codecs.encode(b"space tab\teol \n", "quopri-codec") diff --git a/Misc/NEWS.d/next/Library/2023-02-13-21-56-38.gh-issue-62824.CBZzX3.rst b/Misc/NEWS.d/next/Library/2023-02-13-21-56-38.gh-issue-62824.CBZzX3.rst new file mode 100644 index 00000000000000..1fe4e47c9ec213 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-13-21-56-38.gh-issue-62824.CBZzX3.rst @@ -0,0 +1 @@ +Fix aliases for ``iso8859_8`` encoding. Patch by Dave Goncalves. From f3acbb72ff125172522545ec6f6c676f5108b109 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Tue, 20 May 2025 23:46:13 +0900 Subject: [PATCH 232/989] gh-101100: Fix Sphinx warnings in `library/decimal.rst` (#134303) --- Doc/conf.py | 1 - Doc/library/decimal.rst | 72 ++++++++++++++++++++++++----------------- Doc/tools/.nitignore | 1 - 3 files changed, 42 insertions(+), 32 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index 467961dd5e2bff..7fadad66cb3238 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -308,7 +308,6 @@ ('py:attr', '__annotations__'), ('py:meth', '__missing__'), ('py:attr', '__wrapped__'), - ('py:attr', 'decimal.Context.clamp'), ('py:meth', 'index'), # list.index, tuple.index, etc. ] diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst index f53c1f3467034e..10ddfa02b43156 100644 --- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -2,7 +2,7 @@ ===================================================================== .. module:: decimal - :synopsis: Implementation of the General Decimal Arithmetic Specification. + :synopsis: Implementation of the General Decimal Arithmetic Specification. .. moduleauthor:: Eric Price .. moduleauthor:: Facundo Batista @@ -121,7 +121,7 @@ reset them before monitoring a calculation. .. _decimal-tutorial: -Quick-start Tutorial +Quick-start tutorial -------------------- The usual start to using decimals is importing the module, viewing the current @@ -1096,40 +1096,52 @@ In addition to the three supplied contexts, new contexts can be created with the default values are copied from the :const:`DefaultContext`. If the *flags* field is not specified or is :const:`None`, all flags are cleared. - *prec* is an integer in the range [``1``, :const:`MAX_PREC`] that sets - the precision for arithmetic operations in the context. + .. attribute:: prec - The *rounding* option is one of the constants listed in the section - `Rounding Modes`_. + An integer in the range [``1``, :const:`MAX_PREC`] that sets + the precision for arithmetic operations in the context. - The *traps* and *flags* fields list any signals to be set. Generally, new - contexts should only set traps and leave the flags clear. + .. attribute:: rounding - The *Emin* and *Emax* fields are integers specifying the outer limits allowable - for exponents. *Emin* must be in the range [:const:`MIN_EMIN`, ``0``], - *Emax* in the range [``0``, :const:`MAX_EMAX`]. + One of the constants listed in the section `Rounding Modes`_. - The *capitals* field is either ``0`` or ``1`` (the default). If set to - ``1``, exponents are printed with a capital ``E``; otherwise, a - lowercase ``e`` is used: ``Decimal('6.02e+23')``. + .. attribute:: traps + flags - The *clamp* field is either ``0`` (the default) or ``1``. - If set to ``1``, the exponent ``e`` of a :class:`Decimal` - instance representable in this context is strictly limited to the - range ``Emin - prec + 1 <= e <= Emax - prec + 1``. If *clamp* is - ``0`` then a weaker condition holds: the adjusted exponent of - the :class:`Decimal` instance is at most :attr:`~Context.Emax`. When *clamp* is - ``1``, a large normal number will, where possible, have its - exponent reduced and a corresponding number of zeros added to its - coefficient, in order to fit the exponent constraints; this - preserves the value of the number but loses information about - significant trailing zeros. For example:: + Lists of any signals to be set. Generally, new contexts should only set + traps and leave the flags clear. - >>> Context(prec=6, Emax=999, clamp=1).create_decimal('1.23e999') - Decimal('1.23000E+999') + .. attribute:: Emin + Emax - A *clamp* value of ``1`` allows compatibility with the - fixed-width decimal interchange formats specified in IEEE 754. + Integers specifying the outer limits allowable for exponents. *Emin* must + be in the range [:const:`MIN_EMIN`, ``0``], *Emax* in the range + [``0``, :const:`MAX_EMAX`]. + + .. attribute:: capitals + + Either ``0`` or ``1`` (the default). If set to + ``1``, exponents are printed with a capital ``E``; otherwise, a + lowercase ``e`` is used: ``Decimal('6.02e+23')``. + + .. attribute:: clamp + + Either ``0`` (the default) or ``1``. If set to ``1``, the exponent ``e`` + of a :class:`Decimal` instance representable in this context is strictly + limited to the range ``Emin - prec + 1 <= e <= Emax - prec + 1``. + If *clamp* is ``0`` then a weaker condition holds: the adjusted exponent of + the :class:`Decimal` instance is at most :attr:`~Context.Emax`. When *clamp* is + ``1``, a large normal number will, where possible, have its + exponent reduced and a corresponding number of zeros added to its + coefficient, in order to fit the exponent constraints; this + preserves the value of the number but loses information about + significant trailing zeros. For example:: + + >>> Context(prec=6, Emax=999, clamp=1).create_decimal('1.23e999') + Decimal('1.23000E+999') + + A *clamp* value of ``1`` allows compatibility with the + fixed-width decimal interchange formats specified in IEEE 754. The :class:`Context` class defines several general purpose methods as well as a large number of methods for doing arithmetic directly in a given context. @@ -1769,7 +1781,7 @@ The following table summarizes the hierarchy of signals:: .. _decimal-notes: -Floating-Point Notes +Floating-point notes -------------------- diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 926d2cd42bdf02..e3bcb968128d96 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -14,7 +14,6 @@ Doc/c-api/typeobj.rst Doc/extending/extending.rst Doc/library/ast.rst Doc/library/asyncio-extending.rst -Doc/library/decimal.rst Doc/library/email.charset.rst Doc/library/email.compat32-message.rst Doc/library/email.parser.rst From f695eca60cfc53cf3322323082652037d6d0cfef Mon Sep 17 00:00:00 2001 From: Christian Harries <68507104+ChristianHrs@users.noreply.github.com> Date: Tue, 20 May 2025 16:14:27 +0100 Subject: [PATCH 233/989] gh-86802: Fix asyncio memory leak; shielded task exceptions log once through the exception handler (gh-134331) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Langa --- Lib/asyncio/tasks.py | 36 +++++++++++++---- Lib/test/test_asyncio/test_tasks.py | 40 +++++++++++++++++++ ...5-05-20-15-13-43.gh-issue-86802.trF7TM.rst | 3 ++ 3 files changed, 72 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-20-15-13-43.gh-issue-86802.trF7TM.rst diff --git a/Lib/asyncio/tasks.py b/Lib/asyncio/tasks.py index 888615f8e5e1b3..fbd5c39a7c56ac 100644 --- a/Lib/asyncio/tasks.py +++ b/Lib/asyncio/tasks.py @@ -908,6 +908,25 @@ def _done_callback(fut, cur_task=cur_task): return outer +def _log_on_exception(fut): + if fut.cancelled(): + return + + exc = fut.exception() + if exc is None: + return + + context = { + 'message': + f'{exc.__class__.__name__} exception in shielded future', + 'exception': exc, + 'future': fut, + } + if fut._source_traceback: + context['source_traceback'] = fut._source_traceback + fut._loop.call_exception_handler(context) + + def shield(arg): """Wait for a future, shielding it from cancellation. @@ -953,14 +972,11 @@ def shield(arg): else: cur_task = None - def _inner_done_callback(inner, cur_task=cur_task): - if cur_task is not None: - futures.future_discard_from_awaited_by(inner, cur_task) + def _clear_awaited_by_callback(inner): + futures.future_discard_from_awaited_by(inner, cur_task) + def _inner_done_callback(inner): if outer.cancelled(): - if not inner.cancelled(): - # Mark inner's result as retrieved. - inner.exception() return if inner.cancelled(): @@ -972,10 +988,16 @@ def _inner_done_callback(inner, cur_task=cur_task): else: outer.set_result(inner.result()) - def _outer_done_callback(outer): if not inner.done(): inner.remove_done_callback(_inner_done_callback) + # Keep only one callback to log on cancel + inner.remove_done_callback(_log_on_exception) + inner.add_done_callback(_log_on_exception) + + if cur_task is not None: + inner.add_done_callback(_clear_awaited_by_callback) + inner.add_done_callback(_inner_done_callback) outer.add_done_callback(_outer_done_callback) diff --git a/Lib/test/test_asyncio/test_tasks.py b/Lib/test/test_asyncio/test_tasks.py index 44498ef790e450..f6f976f213ac02 100644 --- a/Lib/test/test_asyncio/test_tasks.py +++ b/Lib/test/test_asyncio/test_tasks.py @@ -2116,6 +2116,46 @@ def test_shield_cancel_outer(self): self.assertTrue(outer.cancelled()) self.assertEqual(0, 0 if outer._callbacks is None else len(outer._callbacks)) + def test_shield_cancel_outer_result(self): + mock_handler = mock.Mock() + self.loop.set_exception_handler(mock_handler) + inner = self.new_future(self.loop) + outer = asyncio.shield(inner) + test_utils.run_briefly(self.loop) + outer.cancel() + test_utils.run_briefly(self.loop) + inner.set_result(1) + test_utils.run_briefly(self.loop) + mock_handler.assert_not_called() + + def test_shield_cancel_outer_exception(self): + mock_handler = mock.Mock() + self.loop.set_exception_handler(mock_handler) + inner = self.new_future(self.loop) + outer = asyncio.shield(inner) + test_utils.run_briefly(self.loop) + outer.cancel() + test_utils.run_briefly(self.loop) + inner.set_exception(Exception('foo')) + test_utils.run_briefly(self.loop) + mock_handler.assert_called_once() + + def test_shield_duplicate_log_once(self): + mock_handler = mock.Mock() + self.loop.set_exception_handler(mock_handler) + inner = self.new_future(self.loop) + outer = asyncio.shield(inner) + test_utils.run_briefly(self.loop) + outer.cancel() + test_utils.run_briefly(self.loop) + outer = asyncio.shield(inner) + test_utils.run_briefly(self.loop) + outer.cancel() + test_utils.run_briefly(self.loop) + inner.set_exception(Exception('foo')) + test_utils.run_briefly(self.loop) + mock_handler.assert_called_once() + def test_shield_shortcut(self): fut = self.new_future(self.loop) fut.set_result(42) diff --git a/Misc/NEWS.d/next/Library/2025-05-20-15-13-43.gh-issue-86802.trF7TM.rst b/Misc/NEWS.d/next/Library/2025-05-20-15-13-43.gh-issue-86802.trF7TM.rst new file mode 100644 index 00000000000000..d3117b16f04436 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-20-15-13-43.gh-issue-86802.trF7TM.rst @@ -0,0 +1,3 @@ +Fixed asyncio memory leak in cancelled shield tasks. For shielded tasks +where the shield was cancelled, log potential exceptions through the +exception handler. Contributed by Christian Harries. From 6dcb0fdfe0a2de083f0f1f9a568dd0a19541b863 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 20 May 2025 11:24:11 -0400 Subject: [PATCH 234/989] GH-134282: Always borrow references LOAD_CONST (GH-134284) --- Include/internal/pycore_magic_number.h | 3 +- Include/internal/pycore_opcode_metadata.h | 21 ++----- Include/internal/pycore_stackref.h | 13 ++-- Include/internal/pycore_uop_ids.h | 2 - Include/internal/pycore_uop_metadata.h | 10 +-- Include/opcode_ids.h | 42 ++++++------- Lib/_opcode_metadata.py | 46 ++++++-------- Lib/test/test_capi/test_object.py | 2 +- Lib/test/test_dis.py | 6 +- Lib/test/test_opcache.py | 14 ----- Python/bytecodes.c | 63 +++---------------- Python/executor_cases.c.h | 74 ++++------------------ Python/generated_cases.c.h | 76 +---------------------- Python/opcode_targets.h | 10 ++- Python/optimizer_bytecodes.c | 14 ----- Python/optimizer_cases.c.h | 16 +---- Tools/cases_generator/analyzer.py | 2 +- 17 files changed, 87 insertions(+), 327 deletions(-) diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index 22375a3b16b534..3fd56c346b9446 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -278,6 +278,7 @@ Known values: Python 3.14a7 3623 (Add BUILD_INTERPOLATION & BUILD_TEMPLATE opcodes) Python 3.14b1 3624 (Don't optimize LOAD_FAST when local is killed by DELETE_FAST) Python 3.15a0 3650 (Initial version) + Python 3.15a1 3651 (Simplify LOAD_CONST) Python 3.16 will start with 3700 @@ -290,7 +291,7 @@ PC/launcher.c must also be updated. */ -#define PYC_MAGIC_NUMBER 3650 +#define PYC_MAGIC_NUMBER 3651 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes (little-endian) and then appending b'\r\n'. */ #define PYC_MAGIC_NUMBER_TOKEN \ diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 33e9804e59379b..b7787653d257cc 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -334,10 +334,6 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_CONST: return 0; - case LOAD_CONST_IMMORTAL: - return 0; - case LOAD_CONST_MORTAL: - return 0; case LOAD_DEREF: return 0; case LOAD_FAST: @@ -821,10 +817,6 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_CONST: return 1; - case LOAD_CONST_IMMORTAL: - return 1; - case LOAD_CONST_MORTAL: - return 1; case LOAD_DEREF: return 1; case LOAD_FAST: @@ -1221,8 +1213,6 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_COMMON_CONSTANT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, - [LOAD_CONST_IMMORTAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, - [LOAD_CONST_MORTAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, @@ -1435,8 +1425,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_ATTR_WITH_HINT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_WITH_HINT, 1, 3 }, { _PUSH_NULL_CONDITIONAL, OPARG_SIMPLE, 9 } } }, [LOAD_BUILD_CLASS] = { .nuops = 1, .uops = { { _LOAD_BUILD_CLASS, OPARG_SIMPLE, 0 } } }, [LOAD_COMMON_CONSTANT] = { .nuops = 1, .uops = { { _LOAD_COMMON_CONSTANT, OPARG_SIMPLE, 0 } } }, - [LOAD_CONST_IMMORTAL] = { .nuops = 1, .uops = { { _LOAD_CONST_IMMORTAL, OPARG_SIMPLE, 0 } } }, - [LOAD_CONST_MORTAL] = { .nuops = 1, .uops = { { _LOAD_CONST_MORTAL, OPARG_SIMPLE, 0 } } }, + [LOAD_CONST] = { .nuops = 1, .uops = { { _LOAD_CONST, OPARG_SIMPLE, 0 } } }, [LOAD_DEREF] = { .nuops = 1, .uops = { { _LOAD_DEREF, OPARG_SIMPLE, 0 } } }, [LOAD_FAST] = { .nuops = 1, .uops = { { _LOAD_FAST, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, OPARG_SIMPLE, 0 } } }, @@ -1667,8 +1656,6 @@ const char *_PyOpcode_OpName[267] = { [LOAD_CLOSURE] = "LOAD_CLOSURE", [LOAD_COMMON_CONSTANT] = "LOAD_COMMON_CONSTANT", [LOAD_CONST] = "LOAD_CONST", - [LOAD_CONST_IMMORTAL] = "LOAD_CONST_IMMORTAL", - [LOAD_CONST_MORTAL] = "LOAD_CONST_MORTAL", [LOAD_DEREF] = "LOAD_DEREF", [LOAD_FAST] = "LOAD_FAST", [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", @@ -1794,6 +1781,8 @@ const uint8_t _PyOpcode_Deopt[256] = { [125] = 125, [126] = 126, [127] = 127, + [210] = 210, + [211] = 211, [212] = 212, [213] = 213, [214] = 214, @@ -1959,8 +1948,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_BUILD_CLASS] = LOAD_BUILD_CLASS, [LOAD_COMMON_CONSTANT] = LOAD_COMMON_CONSTANT, [LOAD_CONST] = LOAD_CONST, - [LOAD_CONST_IMMORTAL] = LOAD_CONST, - [LOAD_CONST_MORTAL] = LOAD_CONST, [LOAD_DEREF] = LOAD_DEREF, [LOAD_FAST] = LOAD_FAST, [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, @@ -2055,6 +2042,8 @@ const uint8_t _PyOpcode_Deopt[256] = { case 125: \ case 126: \ case 127: \ + case 210: \ + case 211: \ case 212: \ case 213: \ case 214: \ diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index dc5e56102fa406..3ead6bc63c1d12 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -134,12 +134,11 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj, const char *filename, int linenumbe #define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj), __FILE__, __LINE__) static inline _PyStackRef -_PyStackRef_FromPyObjectImmortal(PyObject *obj, const char *filename, int linenumber) +_PyStackRef_FromPyObjectBorrow(PyObject *obj, const char *filename, int linenumber) { - assert(_Py_IsImmortal(obj)); return _Py_stackref_create(obj, filename, linenumber); } -#define PyStackRef_FromPyObjectImmortal(obj) _PyStackRef_FromPyObjectImmortal(_PyObject_CAST(obj), __FILE__, __LINE__) +#define PyStackRef_FromPyObjectBorrow(obj) _PyStackRef_FromPyObjectBorrow(_PyObject_CAST(obj), __FILE__, __LINE__) static inline void _PyStackRef_CLOSE(_PyStackRef ref, const char *filename, int linenumber) @@ -366,15 +365,14 @@ PyStackRef_FromPyObjectNew(PyObject *obj) #define PyStackRef_FromPyObjectNew(obj) PyStackRef_FromPyObjectNew(_PyObject_CAST(obj)) static inline _PyStackRef -PyStackRef_FromPyObjectImmortal(PyObject *obj) +PyStackRef_FromPyObjectBorrow(PyObject *obj) { // Make sure we don't take an already tagged value. assert(((uintptr_t)obj & Py_TAG_BITS) == 0); assert(obj != NULL); - assert(_Py_IsImmortal(obj)); return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_DEFERRED }; } -#define PyStackRef_FromPyObjectImmortal(obj) PyStackRef_FromPyObjectImmortal(_PyObject_CAST(obj)) +#define PyStackRef_FromPyObjectBorrow(obj) PyStackRef_FromPyObjectBorrow(_PyObject_CAST(obj)) #define PyStackRef_CLOSE(REF) \ do { \ @@ -582,9 +580,8 @@ _PyStackRef_FromPyObjectNewMortal(PyObject *obj) /* Create a new reference from an object with an embedded reference count */ static inline _PyStackRef -PyStackRef_FromPyObjectImmortal(PyObject *obj) +PyStackRef_FromPyObjectBorrow(PyObject *obj) { - assert(_Py_IsImmortal(obj)); return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT}; } diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 322f3cc093f7bb..6014e1bf3c0c38 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -207,10 +207,8 @@ extern "C" { #define _LOAD_BYTECODE 445 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL #define _LOAD_CONST_INLINE 446 #define _LOAD_CONST_INLINE_BORROW 447 -#define _LOAD_CONST_MORTAL LOAD_CONST_MORTAL #define _LOAD_DEREF LOAD_DEREF #define _LOAD_FAST 448 #define _LOAD_FAST_0 449 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index b334dd840c8fac..002591318b3276 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -45,8 +45,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_LOAD_CONST_MORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, - [_LOAD_CONST_IMMORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, + [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG, [_LOAD_SMALL_INT_0] = 0, [_LOAD_SMALL_INT_1] = 0, [_LOAD_SMALL_INT_2] = 0, @@ -501,10 +500,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT", [_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS", [_LOAD_COMMON_CONSTANT] = "_LOAD_COMMON_CONSTANT", - [_LOAD_CONST_IMMORTAL] = "_LOAD_CONST_IMMORTAL", + [_LOAD_CONST] = "_LOAD_CONST", [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", - [_LOAD_CONST_MORTAL] = "_LOAD_CONST_MORTAL", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", [_LOAD_FAST_0] = "_LOAD_FAST_0", @@ -674,9 +672,7 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST_BORROW_LOAD_FAST_BORROW: return 0; - case _LOAD_CONST_MORTAL: - return 0; - case _LOAD_CONST_IMMORTAL: + case _LOAD_CONST: return 0; case _LOAD_SMALL_INT_0: return 0; diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index 209103c83b3eb7..1d5c74adefcd35 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -193,28 +193,26 @@ extern "C" { #define LOAD_ATTR_PROPERTY 187 #define LOAD_ATTR_SLOT 188 #define LOAD_ATTR_WITH_HINT 189 -#define LOAD_CONST_IMMORTAL 190 -#define LOAD_CONST_MORTAL 191 -#define LOAD_GLOBAL_BUILTIN 192 -#define LOAD_GLOBAL_MODULE 193 -#define LOAD_SUPER_ATTR_ATTR 194 -#define LOAD_SUPER_ATTR_METHOD 195 -#define RESUME_CHECK 196 -#define SEND_GEN 197 -#define STORE_ATTR_INSTANCE_VALUE 198 -#define STORE_ATTR_SLOT 199 -#define STORE_ATTR_WITH_HINT 200 -#define STORE_SUBSCR_DICT 201 -#define STORE_SUBSCR_LIST_INT 202 -#define TO_BOOL_ALWAYS_TRUE 203 -#define TO_BOOL_BOOL 204 -#define TO_BOOL_INT 205 -#define TO_BOOL_LIST 206 -#define TO_BOOL_NONE 207 -#define TO_BOOL_STR 208 -#define UNPACK_SEQUENCE_LIST 209 -#define UNPACK_SEQUENCE_TUPLE 210 -#define UNPACK_SEQUENCE_TWO_TUPLE 211 +#define LOAD_GLOBAL_BUILTIN 190 +#define LOAD_GLOBAL_MODULE 191 +#define LOAD_SUPER_ATTR_ATTR 192 +#define LOAD_SUPER_ATTR_METHOD 193 +#define RESUME_CHECK 194 +#define SEND_GEN 195 +#define STORE_ATTR_INSTANCE_VALUE 196 +#define STORE_ATTR_SLOT 197 +#define STORE_ATTR_WITH_HINT 198 +#define STORE_SUBSCR_DICT 199 +#define STORE_SUBSCR_LIST_INT 200 +#define TO_BOOL_ALWAYS_TRUE 201 +#define TO_BOOL_BOOL 202 +#define TO_BOOL_INT 203 +#define TO_BOOL_LIST 204 +#define TO_BOOL_NONE 205 +#define TO_BOOL_STR 206 +#define UNPACK_SEQUENCE_LIST 207 +#define UNPACK_SEQUENCE_TUPLE 208 +#define UNPACK_SEQUENCE_TWO_TUPLE 209 #define INSTRUMENTED_END_FOR 234 #define INSTRUMENTED_POP_ITER 235 #define INSTRUMENTED_END_SEND 236 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index b9304ec3c03fdd..f168d169a32948 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -6,10 +6,6 @@ "RESUME": [ "RESUME_CHECK", ], - "LOAD_CONST": [ - "LOAD_CONST_MORTAL", - "LOAD_CONST_IMMORTAL", - ], "TO_BOOL": [ "TO_BOOL_ALWAYS_TRUE", "TO_BOOL_BOOL", @@ -186,28 +182,26 @@ 'LOAD_ATTR_PROPERTY': 187, 'LOAD_ATTR_SLOT': 188, 'LOAD_ATTR_WITH_HINT': 189, - 'LOAD_CONST_IMMORTAL': 190, - 'LOAD_CONST_MORTAL': 191, - 'LOAD_GLOBAL_BUILTIN': 192, - 'LOAD_GLOBAL_MODULE': 193, - 'LOAD_SUPER_ATTR_ATTR': 194, - 'LOAD_SUPER_ATTR_METHOD': 195, - 'RESUME_CHECK': 196, - 'SEND_GEN': 197, - 'STORE_ATTR_INSTANCE_VALUE': 198, - 'STORE_ATTR_SLOT': 199, - 'STORE_ATTR_WITH_HINT': 200, - 'STORE_SUBSCR_DICT': 201, - 'STORE_SUBSCR_LIST_INT': 202, - 'TO_BOOL_ALWAYS_TRUE': 203, - 'TO_BOOL_BOOL': 204, - 'TO_BOOL_INT': 205, - 'TO_BOOL_LIST': 206, - 'TO_BOOL_NONE': 207, - 'TO_BOOL_STR': 208, - 'UNPACK_SEQUENCE_LIST': 209, - 'UNPACK_SEQUENCE_TUPLE': 210, - 'UNPACK_SEQUENCE_TWO_TUPLE': 211, + 'LOAD_GLOBAL_BUILTIN': 190, + 'LOAD_GLOBAL_MODULE': 191, + 'LOAD_SUPER_ATTR_ATTR': 192, + 'LOAD_SUPER_ATTR_METHOD': 193, + 'RESUME_CHECK': 194, + 'SEND_GEN': 195, + 'STORE_ATTR_INSTANCE_VALUE': 196, + 'STORE_ATTR_SLOT': 197, + 'STORE_ATTR_WITH_HINT': 198, + 'STORE_SUBSCR_DICT': 199, + 'STORE_SUBSCR_LIST_INT': 200, + 'TO_BOOL_ALWAYS_TRUE': 201, + 'TO_BOOL_BOOL': 202, + 'TO_BOOL_INT': 203, + 'TO_BOOL_LIST': 204, + 'TO_BOOL_NONE': 205, + 'TO_BOOL_STR': 206, + 'UNPACK_SEQUENCE_LIST': 207, + 'UNPACK_SEQUENCE_TUPLE': 208, + 'UNPACK_SEQUENCE_TWO_TUPLE': 209, } opmap = { diff --git a/Lib/test/test_capi/test_object.py b/Lib/test/test_capi/test_object.py index 127862546b1bce..cd772120bdee2b 100644 --- a/Lib/test/test_capi/test_object.py +++ b/Lib/test/test_capi/test_object.py @@ -180,7 +180,7 @@ def test_is_uniquely_referenced(self): self.assertTrue(_testcapi.is_uniquely_referenced(object())) self.assertTrue(_testcapi.is_uniquely_referenced([])) # Immortals - self.assertFalse(_testcapi.is_uniquely_referenced("spanish inquisition")) + self.assertFalse(_testcapi.is_uniquely_referenced(())) self.assertFalse(_testcapi.is_uniquely_referenced(42)) # CRASHES is_uniquely_referenced(NULL) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index ae68c1dd75c641..547f6de5f5aada 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -902,7 +902,7 @@ def loop_test(): %3d RESUME_CHECK 0 %3d BUILD_LIST 0 - LOAD_CONST_MORTAL 2 ((1, 2, 3)) + LOAD_CONST 2 ((1, 2, 3)) LIST_EXTEND 1 LOAD_SMALL_INT 3 BINARY_OP 5 (*) @@ -918,7 +918,7 @@ def loop_test(): %3d L2: END_FOR POP_ITER - LOAD_CONST_IMMORTAL 1 (None) + LOAD_CONST 1 (None) RETURN_VALUE """ % (loop_test.__code__.co_firstlineno, loop_test.__code__.co_firstlineno + 1, @@ -1304,7 +1304,7 @@ def test_load_attr_specialize(self): load_attr_quicken = """\ 0 RESUME_CHECK 0 - 1 LOAD_CONST_IMMORTAL 0 ('a') + 1 LOAD_CONST 0 ('a') LOAD_ATTR_SLOT 0 (__class__) RETURN_VALUE """ diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index a45aafc63fa697..30baa09048616c 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1810,20 +1810,6 @@ def compare_op_str(): self.assert_specialized(compare_op_str, "COMPARE_OP_STR") self.assert_no_opcode(compare_op_str, "COMPARE_OP") - @cpython_only - @requires_specialization_ft - def test_load_const(self): - def load_const(): - def unused(): pass - # Currently, the empty tuple is immortal, and the otherwise - # unused nested function's code object is mortal. This test will - # have to use different values if either of that changes. - return () - - load_const() - self.assert_specialized(load_const, "LOAD_CONST_IMMORTAL") - self.assert_specialized(load_const, "LOAD_CONST_MORTAL") - self.assert_no_opcode(load_const, "LOAD_CONST") @cpython_only @requires_specialization_ft diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 477a84b170ab98..65f411fa10506e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -295,55 +295,18 @@ dummy_func( value2 = PyStackRef_Borrow(GETLOCAL(oparg2)); } - family(LOAD_CONST, 0) = { - LOAD_CONST_MORTAL, - LOAD_CONST_IMMORTAL, - }; - inst(LOAD_CONST, (-- value)) { - /* We can't do this in the bytecode compiler as - * marshalling can intern strings and make them immortal. */ - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNew(obj); -#if ENABLE_SPECIALIZATION_FT -#ifdef Py_GIL_DISABLED - uint8_t expected = LOAD_CONST; - if (!_Py_atomic_compare_exchange_uint8( - &this_instr->op.code, &expected, - _Py_IsImmortal(obj) ? LOAD_CONST_IMMORTAL : LOAD_CONST_MORTAL)) { - // We might lose a race with instrumentation, which we don't care about. - assert(expected >= MIN_INSTRUMENTED_OPCODE); - } -#else - if (this_instr->op.code == LOAD_CONST) { - this_instr->op.code = _Py_IsImmortal(obj) ? LOAD_CONST_IMMORTAL : LOAD_CONST_MORTAL; - } -#endif -#endif - } - - inst(LOAD_CONST_MORTAL, (-- value)) { - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNewMortal(obj); - } - - inst(LOAD_CONST_IMMORTAL, (-- value)) { PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - assert(_Py_IsImmortal(obj)); - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); } replicate(4) inst(LOAD_SMALL_INT, (-- value)) { assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); } replicate(8) inst(STORE_FAST, (value --)) { - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; DEAD(value); @@ -355,10 +318,6 @@ dummy_func( }; inst(STORE_FAST_LOAD_FAST, (value1 -- value2)) { - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value1) - ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); @@ -369,14 +328,6 @@ dummy_func( } inst(STORE_FAST_STORE_FAST, (value2, value1 --)) { - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value1) - ); - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value2) - ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); @@ -942,7 +893,7 @@ dummy_func( PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(str_st); - res = PyStackRef_FromPyObjectImmortal(res_o); + res = PyStackRef_FromPyObjectBorrow(res_o); } op(_GUARD_NOS_TUPLE, (nos, unused -- nos, unused)) { @@ -5328,18 +5279,18 @@ dummy_func( } tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); } tier2 pure op (_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { PyStackRef_CLOSE(pop); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); } tier2 pure op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) { PyStackRef_CLOSE(pop2); PyStackRef_CLOSE(pop1); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); } tier2 pure op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop1, pop2 -- value)) { @@ -5348,7 +5299,7 @@ dummy_func( (void)null; // Silence compiler warnings about unused variables DEAD(null); PyStackRef_CLOSE(callable); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); } tier2 op(_CHECK_FUNCTION, (func_version/2 -- )) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c3da7a63b9ed86..cc521bd7923991 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -319,25 +319,11 @@ break; } - /* _LOAD_CONST is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ - - case _LOAD_CONST_MORTAL: { - _PyStackRef value; - oparg = CURRENT_OPARG(); - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNewMortal(obj); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_CONST_IMMORTAL: { + case _LOAD_CONST: { _PyStackRef value; oparg = CURRENT_OPARG(); PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - assert(_Py_IsImmortal(obj)); - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -350,7 +336,7 @@ assert(oparg == CURRENT_OPARG()); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -363,7 +349,7 @@ assert(oparg == CURRENT_OPARG()); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -376,7 +362,7 @@ assert(oparg == CURRENT_OPARG()); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -389,7 +375,7 @@ assert(oparg == CURRENT_OPARG()); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -401,7 +387,7 @@ oparg = CURRENT_OPARG(); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -413,10 +399,6 @@ oparg = 0; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -432,10 +414,6 @@ oparg = 1; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -451,10 +429,6 @@ oparg = 2; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -470,10 +444,6 @@ oparg = 3; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -489,10 +459,6 @@ oparg = 4; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -508,10 +474,6 @@ oparg = 5; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -527,10 +489,6 @@ oparg = 6; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -546,10 +504,6 @@ oparg = 7; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -564,10 +518,6 @@ _PyStackRef value; oparg = CURRENT_OPARG(); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -1403,7 +1353,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(str_st); stack_pointer = _PyFrame_GetStackPointer(frame); - res = PyStackRef_FromPyObjectImmortal(res_o); + res = PyStackRef_FromPyObjectBorrow(res_o); stack_pointer[0] = res; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -7055,7 +7005,7 @@ case _LOAD_CONST_INLINE_BORROW: { _PyStackRef value; PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -7072,7 +7022,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(pop); stack_pointer = _PyFrame_GetStackPointer(frame); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -7096,7 +7046,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(pop1); stack_pointer = _PyFrame_GetStackPointer(frame); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -7130,7 +7080,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(callable); stack_pointer = _PyFrame_GetStackPointer(frame); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index cb9bfcbf2a75ac..1c98e1ce4fcf5c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -905,7 +905,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(str_st); stack_pointer = _PyFrame_GetStackPointer(frame); - res = PyStackRef_FromPyObjectImmortal(res_o); + res = PyStackRef_FromPyObjectBorrow(res_o); } stack_pointer[0] = res; stack_pointer += 1; @@ -9002,63 +9002,9 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(LOAD_CONST); - PREDICTED_LOAD_CONST:; - _Py_CODEUNIT* const this_instr = next_instr - 1; - (void)this_instr; - _PyStackRef value; - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNew(obj); - #if ENABLE_SPECIALIZATION_FT - #ifdef Py_GIL_DISABLED - uint8_t expected = LOAD_CONST; - if (!_Py_atomic_compare_exchange_uint8( - &this_instr->op.code, &expected, - _Py_IsImmortal(obj) ? LOAD_CONST_IMMORTAL : LOAD_CONST_MORTAL)) { - assert(expected >= MIN_INSTRUMENTED_OPCODE); - } - #else - if (this_instr->op.code == LOAD_CONST) { - this_instr->op.code = _Py_IsImmortal(obj) ? LOAD_CONST_IMMORTAL : LOAD_CONST_MORTAL; - } - #endif - #endif - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - DISPATCH(); - } - - TARGET(LOAD_CONST_IMMORTAL) { - #if Py_TAIL_CALL_INTERP - int opcode = LOAD_CONST_IMMORTAL; - (void)(opcode); - #endif - frame->instr_ptr = next_instr; - next_instr += 1; - INSTRUCTION_STATS(LOAD_CONST_IMMORTAL); - static_assert(0 == 0, "incorrect cache size"); - _PyStackRef value; - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - assert(_Py_IsImmortal(obj)); - value = PyStackRef_FromPyObjectImmortal(obj); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - DISPATCH(); - } - - TARGET(LOAD_CONST_MORTAL) { - #if Py_TAIL_CALL_INTERP - int opcode = LOAD_CONST_MORTAL; - (void)(opcode); - #endif - frame->instr_ptr = next_instr; - next_instr += 1; - INSTRUCTION_STATS(LOAD_CONST_MORTAL); - static_assert(0 == 0, "incorrect cache size"); _PyStackRef value; PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNewMortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -9593,7 +9539,7 @@ _PyStackRef value; assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -11240,10 +11186,6 @@ INSTRUCTION_STATS(STORE_FAST); _PyStackRef value; value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -11265,10 +11207,6 @@ _PyStackRef value1; _PyStackRef value2; value1 = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value1) - ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); @@ -11293,14 +11231,6 @@ _PyStackRef value1; value1 = stack_pointer[-1]; value2 = stack_pointer[-2]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value1) - ); - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value2) - ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 8af445d7d6ab32..1d6dcddab4b12d 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -190,8 +190,6 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_ATTR_PROPERTY, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_WITH_HINT, - &&TARGET_LOAD_CONST_IMMORTAL, - &&TARGET_LOAD_CONST_MORTAL, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_SUPER_ATTR_ATTR, @@ -234,6 +232,8 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, &&TARGET_INSTRUMENTED_END_FOR, &&TARGET_INSTRUMENTED_POP_ITER, &&TARGET_INSTRUMENTED_END_SEND, @@ -410,8 +410,6 @@ Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_ATTR_WITH_HINT(TAIL_CALL_PA Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_BUILD_CLASS(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_COMMON_CONSTANT(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_CONST(TAIL_CALL_PARAMS); -Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_CONST_IMMORTAL(TAIL_CALL_PARAMS); -Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_CONST_MORTAL(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_DEREF(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_AND_CLEAR(TAIL_CALL_PARAMS); @@ -649,8 +647,6 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [LOAD_BUILD_CLASS] = _TAIL_CALL_LOAD_BUILD_CLASS, [LOAD_COMMON_CONSTANT] = _TAIL_CALL_LOAD_COMMON_CONSTANT, [LOAD_CONST] = _TAIL_CALL_LOAD_CONST, - [LOAD_CONST_IMMORTAL] = _TAIL_CALL_LOAD_CONST_IMMORTAL, - [LOAD_CONST_MORTAL] = _TAIL_CALL_LOAD_CONST_MORTAL, [LOAD_DEREF] = _TAIL_CALL_LOAD_DEREF, [LOAD_FAST] = _TAIL_CALL_LOAD_FAST, [LOAD_FAST_AND_CLEAR] = _TAIL_CALL_LOAD_FAST_AND_CLEAR, @@ -740,6 +736,8 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [125] = _TAIL_CALL_UNKNOWN_OPCODE, [126] = _TAIL_CALL_UNKNOWN_OPCODE, [127] = _TAIL_CALL_UNKNOWN_OPCODE, + [210] = _TAIL_CALL_UNKNOWN_OPCODE, + [211] = _TAIL_CALL_UNKNOWN_OPCODE, [212] = _TAIL_CALL_UNKNOWN_OPCODE, [213] = _TAIL_CALL_UNKNOWN_OPCODE, [214] = _TAIL_CALL_UNKNOWN_OPCODE, diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 9811e9f348c2a0..3799f25880d34b 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -510,20 +510,6 @@ dummy_func(void) { } op(_LOAD_CONST, (-- value)) { - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); - int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); - } - - op(_LOAD_CONST_MORTAL, (-- value)) { - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); - int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); - } - - op(_LOAD_CONST_IMMORTAL, (-- value)) { PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); value = sym_new_const(ctx, val); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 6e19319fbaef4a..a2c68457d8ef4e 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -66,21 +66,7 @@ break; } - /* _LOAD_CONST is not a viable micro-op for tier 2 */ - - case _LOAD_CONST_MORTAL: { - JitOptSymbol *value; - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); - int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_CONST_IMMORTAL: { + case _LOAD_CONST: { JitOptSymbol *value; PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 2b3a90c7db9b44..825c72eaa1634a 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -585,7 +585,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", "PyStackRef_False", - "PyStackRef_FromPyObjectImmortal", + "PyStackRef_FromPyObjectBorrow", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", "PyStackRef_IsExactly", From dd7f1130570d50461b2a0f81ab01c55b9ce93700 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 20 May 2025 11:31:37 -0400 Subject: [PATCH 235/989] gh-115999: Add PyCodeObject.co_tlbc to the debug offsets (#134286) Signed-off-by: Pablo Galindo --- Include/internal/pycore_debug_offsets.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 59d2c9d5377953..1a265c59ff8c08 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -52,9 +52,13 @@ extern "C" { #ifdef Py_GIL_DISABLED # define _Py_Debug_gilruntimestate_enabled offsetof(struct _gil_runtime_state, enabled) # define _Py_Debug_Free_Threaded 1 +# define _Py_Debug_code_object_co_tlbc offsetof(PyCodeObject, co_tlbc) +# define _Py_Debug_interpreter_frame_tlbc_index offsetof(_PyInterpreterFrame, tlbc_index) #else # define _Py_Debug_gilruntimestate_enabled 0 # define _Py_Debug_Free_Threaded 0 +# define _Py_Debug_code_object_co_tlbc 0 +# define _Py_Debug_interpreter_frame_tlbc_index 0 #endif @@ -109,6 +113,7 @@ typedef struct _Py_DebugOffsets { uint64_t localsplus; uint64_t owner; uint64_t stackpointer; + uint64_t tlbc_index; } interpreter_frame; // Code object offset; @@ -123,6 +128,7 @@ typedef struct _Py_DebugOffsets { uint64_t localsplusnames; uint64_t localspluskinds; uint64_t co_code_adaptive; + uint64_t co_tlbc; } code_object; // PyObject offset; @@ -265,6 +271,7 @@ typedef struct _Py_DebugOffsets { .localsplus = offsetof(_PyInterpreterFrame, localsplus), \ .owner = offsetof(_PyInterpreterFrame, owner), \ .stackpointer = offsetof(_PyInterpreterFrame, stackpointer), \ + .tlbc_index = _Py_Debug_interpreter_frame_tlbc_index, \ }, \ .code_object = { \ .size = sizeof(PyCodeObject), \ @@ -277,6 +284,7 @@ typedef struct _Py_DebugOffsets { .localsplusnames = offsetof(PyCodeObject, co_localsplusnames), \ .localspluskinds = offsetof(PyCodeObject, co_localspluskinds), \ .co_code_adaptive = offsetof(PyCodeObject, co_code_adaptive), \ + .co_tlbc = _Py_Debug_code_object_co_tlbc, \ }, \ .pyobject = { \ .size = sizeof(PyObject), \ From 317c49622397222b7c7fb49837e6b1fd7e82a80d Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 20 May 2025 11:39:56 -0400 Subject: [PATCH 236/989] gh-129748: Update mimalloc to use atomic store for mi_block_set_nextx (#134238) --- Include/internal/mimalloc/mimalloc/internal.h | 8 ++++---- Include/internal/mimalloc/mimalloc/types.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h index d97f51b8eefbe5..71b7ea702d6c5e 100644 --- a/Include/internal/mimalloc/mimalloc/internal.h +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -634,10 +634,10 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl mi_track_mem_defined(block,sizeof(mi_block_t)); mi_block_t* next; #ifdef MI_ENCODE_FREELIST - next = (mi_block_t*)mi_ptr_decode(null, block->next, keys); + next = (mi_block_t*)mi_ptr_decode(null, mi_atomic_load_relaxed(&block->next), keys); #else MI_UNUSED(keys); MI_UNUSED(null); - next = (mi_block_t*)block->next; + next = (mi_block_t*)mi_atomic_load_relaxed(&block->next); #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); return next; @@ -646,10 +646,10 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { mi_track_mem_undefined(block,sizeof(mi_block_t)); #ifdef MI_ENCODE_FREELIST - block->next = mi_ptr_encode(null, next, keys); + mi_atomic_store_relaxed(&block->next, mi_ptr_encode(null, next, keys)); #else MI_UNUSED(keys); MI_UNUSED(null); - block->next = (mi_encoded_t)next; + mi_atomic_store_relaxed(&block->next, (mi_encoded_t)next); #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); } diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h index 354839ba955b36..4f77bd7bc525db 100644 --- a/Include/internal/mimalloc/mimalloc/types.h +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -235,7 +235,7 @@ typedef size_t mi_threadid_t; // free lists contain blocks typedef struct mi_block_s { - mi_encoded_t next; + _Atomic(mi_encoded_t) next; } mi_block_t; From ec39fd2c20323ee9814a1137b1a0819e92efae4e Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Tue, 20 May 2025 21:11:47 +0530 Subject: [PATCH 237/989] gh-133980: use atomic store in `PyObject_GenericSetDict` (#133988) --- Lib/test/test_free_threading/test_dict.py | 16 ++++++++++++++++ Objects/object.c | 8 +++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_free_threading/test_dict.py b/Lib/test/test_free_threading/test_dict.py index 476cc3178d843f..5d5d4e226caa40 100644 --- a/Lib/test/test_free_threading/test_dict.py +++ b/Lib/test/test_free_threading/test_dict.py @@ -228,6 +228,22 @@ def reader_func(): self.assertEqual(count, 0) + def test_racing_object_get_set_dict(self): + e = Exception() + + def writer(): + for i in range(10000): + e.__dict__ = {1:2} + + def reader(): + for i in range(10000): + e.__dict__ + + t1 = Thread(target=writer) + t2 = Thread(target=reader) + + with threading_helper.start_threads([t1, t2]): + pass if __name__ == "__main__": unittest.main() diff --git a/Objects/object.c b/Objects/object.c index 723b0427e69251..af1aa217f75462 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1931,7 +1931,13 @@ PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context) return -1; } Py_BEGIN_CRITICAL_SECTION(obj); - Py_XSETREF(*dictptr, Py_NewRef(value)); + PyObject *olddict = *dictptr; + FT_ATOMIC_STORE_PTR_RELEASE(*dictptr, Py_NewRef(value)); +#ifdef Py_GIL_DISABLED + _PyObject_XDecRefDelayed(olddict); +#else + Py_XDECREF(olddict); +#endif Py_END_CRITICAL_SECTION(); return 0; } From 86397cf65d024a39ae85b81d7f611ad4864a78b4 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Tue, 20 May 2025 17:18:53 +0100 Subject: [PATCH 238/989] gh-76075: Correct `datetime.timestamp` documentation (#131202) * Clean up timestamp docs * Update datetime.rst * Suggestion --- Doc/library/datetime.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 3470f42a6c622d..16ed3215bc2c1a 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -1502,11 +1502,11 @@ Instance methods: returned by :func:`time.time`. Naive :class:`.datetime` instances are assumed to represent local - time and this method relies on the platform C :c:func:`mktime` - function to perform the conversion. Since :class:`.datetime` - supports wider range of values than :c:func:`mktime` on many - platforms, this method may raise :exc:`OverflowError` or :exc:`OSError` - for times far in the past or far in the future. + time and this method relies on platform C functions to perform + the conversion. Since :class:`.datetime` supports a wider range of + values than the platform C functions on many platforms, this + method may raise :exc:`OverflowError` or :exc:`OSError` for times + far in the past or far in the future. For aware :class:`.datetime` instances, the return value is computed as:: @@ -1519,6 +1519,10 @@ Instance methods: The :meth:`timestamp` method uses the :attr:`.fold` attribute to disambiguate the times during a repeated interval. + .. versionchanged:: 3.6 + This method no longer relies on the platform C :c:func:`mktime` + function to perform conversions. + .. note:: There is no method to obtain the POSIX timestamp directly from a From 6b735023132a4ac9dc5b849d982104eeb1e8bdad Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Tue, 20 May 2025 19:20:25 +0300 Subject: [PATCH 239/989] gh-132542: Set native thread ID after fork (GH-132701) --- Lib/test/test_threading.py | 28 +++++++++++++++++++ Lib/threading.py | 2 ++ ...-04-19-17-16-46.gh-issue-132542.7T_TY_.rst | 2 ++ 3 files changed, 32 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-17-16-46.gh-issue-132542.7T_TY_.rst diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index b6e2d419019aa1..a9eec139bec8bf 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1352,6 +1352,34 @@ def do_flush(*args, **kwargs): ''') assert_python_ok("-c", script) + @skip_unless_reliable_fork + def test_native_id_after_fork(self): + script = """if True: + import threading + import os + from test import support + + parent_thread_native_id = threading.current_thread().native_id + print(parent_thread_native_id, flush=True) + assert parent_thread_native_id == threading.get_native_id() + childpid = os.fork() + if childpid == 0: + print(threading.current_thread().native_id, flush=True) + assert threading.current_thread().native_id == threading.get_native_id() + else: + try: + assert parent_thread_native_id == threading.current_thread().native_id + assert parent_thread_native_id == threading.get_native_id() + finally: + support.wait_process(childpid, exitcode=0) + """ + rc, out, err = assert_python_ok('-c', script) + self.assertEqual(rc, 0) + self.assertEqual(err, b"") + native_ids = out.strip().splitlines() + self.assertEqual(len(native_ids), 2) + self.assertNotEqual(native_ids[0], native_ids[1]) + class ThreadJoinOnShutdown(BaseTestCase): def _run_and_join(self, script): diff --git a/Lib/threading.py b/Lib/threading.py index 9feada3b8bb15b..fb9ef75d28111b 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -944,6 +944,8 @@ def _after_fork(self, new_ident=None): # This thread is alive. self._ident = new_ident assert self._os_thread_handle.ident == new_ident + if _HAVE_THREAD_NATIVE_ID: + self._set_native_id() else: # Otherwise, the thread is dead, Jim. _PyThread_AfterFork() # already marked our handle done. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-17-16-46.gh-issue-132542.7T_TY_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-17-16-46.gh-issue-132542.7T_TY_.rst new file mode 100644 index 00000000000000..c69ce5efdedcca --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-17-16-46.gh-issue-132542.7T_TY_.rst @@ -0,0 +1,2 @@ +Update :attr:`Thread.native_id ` after +:manpage:`fork(2)` to ensure accuracy. Patch by Noam Cohen. From 7ad90463df16e1938a3b6725b0f02af34cea372e Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 20 May 2025 12:32:26 -0400 Subject: [PATCH 240/989] GH-133779: Fix finding pyconfig.h on Windows JIT builds (GH-134349) --- PCbuild/regen.targets | 3 +-- Tools/jit/_targets.py | 16 ++++++++-------- Tools/jit/build.py | 21 +++++++++++++++++---- configure | 2 +- configure.ac | 2 +- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/PCbuild/regen.targets b/PCbuild/regen.targets index 21de614e71ddce..742597f5cb5ebd 100644 --- a/PCbuild/regen.targets +++ b/PCbuild/regen.targets @@ -125,8 +125,7 @@ x86_64-pc-windows-msvc $(JITArgs) --debug
- +
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 6ceb4404e74ce7..d0a1c081ffecc2 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -47,6 +47,7 @@ class _Target(typing.Generic[_S, _R]): debug: bool = False verbose: bool = False known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) + pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve() def _get_nop(self) -> bytes: if re.fullmatch(r"aarch64-.*", self.triple): @@ -57,13 +58,13 @@ def _get_nop(self) -> bytes: raise ValueError(f"NOP not defined for {self.triple}") return nop - def _compute_digest(self, out: pathlib.Path) -> str: + def _compute_digest(self) -> str: hasher = hashlib.sha256() hasher.update(self.triple.encode()) hasher.update(self.debug.to_bytes()) # These dependencies are also reflected in _JITSources in regen.targets: hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) - hasher.update((out / "pyconfig.h").read_bytes()) + hasher.update((self.pyconfig_dir / "pyconfig.h").read_bytes()) for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): for filename in filenames: hasher.update(pathlib.Path(dirpath, filename).read_bytes()) @@ -125,7 +126,7 @@ async def _compile( f"-D_JIT_OPCODE={opname}", "-D_PyJIT_ACTIVE", "-D_Py_JIT", - "-I.", + f"-I{self.pyconfig_dir}", f"-I{CPYTHON / 'Include'}", f"-I{CPYTHON / 'Include' / 'internal'}", f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}", @@ -193,20 +194,19 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: def build( self, - out: pathlib.Path, *, comment: str = "", force: bool = False, - stencils_h: str = "jit_stencils.h", + jit_stencils: pathlib.Path, ) -> None: """Build jit_stencils.h in the given directory.""" + jit_stencils.parent.mkdir(parents=True, exist_ok=True) if not self.stable: warning = f"JIT support for {self.triple} is still experimental!" request = "Please report any issues you encounter.".center(len(warning)) outline = "=" * len(warning) print("\n".join(["", outline, warning, request, outline, ""])) - digest = f"// {self._compute_digest(out)}\n" - jit_stencils = out / stencils_h + digest = f"// {self._compute_digest()}\n" if ( not force and jit_stencils.exists() @@ -214,7 +214,7 @@ def build( ): return stencil_groups = ASYNCIO_RUNNER.run(self._build_stencils()) - jit_stencils_new = out / "jit_stencils.h.new" + jit_stencils_new = jit_stencils.parent / "jit_stencils.h.new" try: with jit_stencils_new.open("w") as file: file.write(digest) diff --git a/Tools/jit/build.py b/Tools/jit/build.py index 49b08f477dbed7..1afd0c76bad1d2 100644 --- a/Tools/jit/build.py +++ b/Tools/jit/build.py @@ -8,7 +8,6 @@ import _targets if __name__ == "__main__": - out = pathlib.Path.cwd().resolve() comment = f"$ {shlex.join([pathlib.Path(sys.executable).name] + sys.argv)}" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -23,6 +22,20 @@ parser.add_argument( "-f", "--force", action="store_true", help="force the entire JIT to be rebuilt" ) + parser.add_argument( + "-o", + "--output-dir", + help="where to output generated files", + required=True, + type=lambda p: pathlib.Path(p).resolve(), + ) + parser.add_argument( + "-p", + "--pyconfig-dir", + help="where to find pyconfig.h", + required=True, + type=lambda p: pathlib.Path(p).resolve(), + ) parser.add_argument( "-v", "--verbose", action="store_true", help="echo commands as they are run" ) @@ -31,13 +44,13 @@ target.debug = args.debug target.force = args.force target.verbose = args.verbose + target.pyconfig_dir = args.pyconfig_dir target.build( - out, comment=comment, - stencils_h=f"jit_stencils-{target.triple}.h", force=args.force, + jit_stencils=args.output_dir / f"jit_stencils-{target.triple}.h", ) - jit_stencils_h = out / "jit_stencils.h" + jit_stencils_h = args.output_dir / "jit_stencils.h" lines = [f"// {comment}\n"] guard = "#if" for target in args.target: diff --git a/configure b/configure index 2649a800f91a48..abdd28fcabf769 100755 --- a/configure +++ b/configure @@ -10863,7 +10863,7 @@ then : else case e in #( e) as_fn_append CFLAGS_NODIST " $jit_flags" - REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host}" + REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir ." JIT_STENCILS_H="jit_stencils.h" if test "x$Py_DEBUG" = xtrue then : diff --git a/configure.ac b/configure.ac index 5525a5b0ed5240..8d939f075058bf 100644 --- a/configure.ac +++ b/configure.ac @@ -2776,7 +2776,7 @@ AS_VAR_IF([jit_flags], [], [AS_VAR_APPEND([CFLAGS_NODIST], [" $jit_flags"]) AS_VAR_SET([REGEN_JIT_COMMAND], - ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host}"]) + ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir ."]) AS_VAR_SET([JIT_STENCILS_H], ["jit_stencils.h"]) AS_VAR_IF([Py_DEBUG], [true], From 99b580857f32a7cd9e25e1b453755757d3a4060b Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Tue, 20 May 2025 17:39:58 +0100 Subject: [PATCH 241/989] gh-122781: Allow empty offset for `%z` in `strptime` (#132922) * commit * Move tests --- Lib/_strptime.py | 45 ++++++++++--------- Lib/test/datetimetester.py | 11 +++++ ...-04-25-11-48-00.gh-issue-122781.ajsdns.rst | 2 + 3 files changed, 36 insertions(+), 22 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-25-11-48-00.gh-issue-122781.ajsdns.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index aa63933a49d16d..ae67949626d460 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -302,7 +302,7 @@ def __init__(self, locale_time=None): # W is set below by using 'U' 'y': r"(?P\d\d)", 'Y': r"(?P\d\d\d\d)", - 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", + 'z': r"(?P([+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?)|(?-i:Z))?", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), @@ -548,27 +548,28 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): iso_week = int(found_dict['V']) elif group_key == 'z': z = found_dict['z'] - if z == 'Z': - gmtoff = 0 - else: - if z[3] == ':': - z = z[:3] + z[4:] - if len(z) > 5: - if z[5] != ':': - msg = f"Inconsistent use of : in {found_dict['z']}" - raise ValueError(msg) - z = z[:5] + z[6:] - hours = int(z[1:3]) - minutes = int(z[3:5]) - seconds = int(z[5:7] or 0) - gmtoff = (hours * 60 * 60) + (minutes * 60) + seconds - gmtoff_remainder = z[8:] - # Pad to always return microseconds. - gmtoff_remainder_padding = "0" * (6 - len(gmtoff_remainder)) - gmtoff_fraction = int(gmtoff_remainder + gmtoff_remainder_padding) - if z.startswith("-"): - gmtoff = -gmtoff - gmtoff_fraction = -gmtoff_fraction + if z: + if z == 'Z': + gmtoff = 0 + else: + if z[3] == ':': + z = z[:3] + z[4:] + if len(z) > 5: + if z[5] != ':': + msg = f"Inconsistent use of : in {found_dict['z']}" + raise ValueError(msg) + z = z[:5] + z[6:] + hours = int(z[1:3]) + minutes = int(z[3:5]) + seconds = int(z[5:7] or 0) + gmtoff = (hours * 60 * 60) + (minutes * 60) + seconds + gmtoff_remainder = z[8:] + # Pad to always return microseconds. + gmtoff_remainder_padding = "0" * (6 - len(gmtoff_remainder)) + gmtoff_fraction = int(gmtoff_remainder + gmtoff_remainder_padding) + if z.startswith("-"): + gmtoff = -gmtoff + gmtoff_fraction = -gmtoff_fraction elif group_key == 'Z': # Since -1 is default value only need to worry about setting tz if # it can be something other than -1. diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 345698cfb5f1a4..df5e45f5f20e32 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2972,6 +2972,17 @@ def test_strptime_leap_year(self): with self._assertNotWarns(DeprecationWarning): self.theclass.strptime('02-29,2024', '%m-%d,%Y') + def test_strptime_z_empty(self): + for directive in ('z',): + string = '2025-04-25 11:42:47' + format = f'%Y-%m-%d %H:%M:%S%{directive}' + target = self.theclass(2025, 4, 25, 11, 42, 47) + with self.subTest(string=string, + format=format, + target=target): + result = self.theclass.strptime(string, format) + self.assertEqual(result, target) + def test_more_timetuple(self): # This tests fields beyond those tested by the TestDate.test_timetuple. t = self.theclass(2004, 12, 31, 6, 22, 33) diff --git a/Misc/NEWS.d/next/Library/2025-04-25-11-48-00.gh-issue-122781.ajsdns.rst b/Misc/NEWS.d/next/Library/2025-04-25-11-48-00.gh-issue-122781.ajsdns.rst new file mode 100644 index 00000000000000..5a9a0cdf7986dc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-25-11-48-00.gh-issue-122781.ajsdns.rst @@ -0,0 +1,2 @@ +Fix ``%z`` directive in :func:`datetime.datetime.strptime` to allow for no provided +offset as was documented. From 0584533dc70b4efdb65fd53fbb5beddbc7a302a8 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Tue, 20 May 2025 22:11:58 +0530 Subject: [PATCH 242/989] gh-128002: add what's new docs for `asyncio` (#134324) --- Doc/whatsnew/3.14.rst | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b192615e173f11..604f3365ebffcc 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1997,11 +1997,19 @@ Optimizations asyncio ------- -* :mod:`asyncio` now uses double linked list implementation for native tasks - which speeds up execution by 10% on standard pyperformance benchmarks and - reduces memory usage. +* :mod:`asyncio` has a new per-thread double linked list implementation internally for + :class:`native tasks ` which speeds up execution by 10-20% on standard + pyperformance benchmarks and reduces memory usage. + This enables external introspection tools such as + :ref:`python -m asyncio pstree ` + to introspect the call graph of asyncio tasks running in all threads. (Contributed by Kumar Aditya in :gh:`107803`.) +* :mod:`asyncio` has first class support for :term:`free-threading builds `. + This enables parallel execution of multiple event loops across different threads and scales + linearly with the number of threads. + (Contributed by Kumar Aditya in :gh:`128002`.) + * :mod:`asyncio` has new utility functions for introspecting and printing the program's call graph: :func:`asyncio.capture_call_graph` and :func:`asyncio.print_call_graph`. @@ -2083,7 +2091,6 @@ Deprecated * :class:`asyncio.WindowsProactorEventLoopPolicy` * :func:`asyncio.get_event_loop_policy` * :func:`asyncio.set_event_loop_policy` - * :func:`asyncio.set_event_loop` Users should use :func:`asyncio.run` or :class:`asyncio.Runner` with *loop_factory* to use the desired event loop implementation. From b430e92dd80105e97b945a78a48bce13564bf843 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Tue, 20 May 2025 22:27:23 +0530 Subject: [PATCH 243/989] gh-127945: Update What's New in Python 3.14 for free-threaded ctypes (#134332) --- Doc/whatsnew/3.14.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 604f3365ebffcc..88e52015bdc2b1 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1200,6 +1200,9 @@ ctypes making it a :term:`generic type`. (Contributed by Brian Schubert in :gh:`132168`.) +* :mod:`ctypes` now supports :term:`free-threading builds `. + (Contributed by Kumar Aditya and Peter Bierma in :gh:`127945`.) + curses ------ From 36eb711d2f26849214774a017fe8c8a5be3eec30 Mon Sep 17 00:00:00 2001 From: Alex Kautz Date: Tue, 20 May 2025 14:18:58 -0400 Subject: [PATCH 244/989] gh-85045: clarified that the underlying buffer of a TextIOBase can be a RawIOBase (GH-134372) Added a clarification that the underlying binary buffer of a TextIOBase can be a BufferedIOBase OR a RawIOBase --- Doc/library/io.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 3aa2f35f05eba0..08c76da3d8c00a 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -894,9 +894,10 @@ Text I/O .. attribute:: buffer - The underlying binary buffer (a :class:`BufferedIOBase` instance) that - :class:`TextIOBase` deals with. This is not part of the - :class:`TextIOBase` API and may not exist in some implementations. + The underlying binary buffer (a :class:`BufferedIOBase` + or :class:`RawIOBase` instance) that :class:`TextIOBase` deals with. + This is not part of the :class:`TextIOBase` API and may not exist + in some implementations. .. method:: detach() From 3b7888bf3d43b903f0a7ebd16f39d8bb61dfbb9e Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 20 May 2025 14:22:49 -0400 Subject: [PATCH 245/989] gh-106213: Shorten Emscripten wasm-gc trampoline by a little (#133984) Using the if instruction results in slightly shorter trampoline code. --- Python/emscripten_trampoline.c | 58 ++++++++++++---------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/Python/emscripten_trampoline.c b/Python/emscripten_trampoline.c index a7bb685bf3dc6d..cc5047d6bda224 100644 --- a/Python/emscripten_trampoline.c +++ b/Python/emscripten_trampoline.c @@ -35,7 +35,7 @@ EM_JS(CountArgsFunc, _PyEM_GetCountArgsPtr, (), { // (type $type1 (func (param i32) (result i32))) // (type $type2 (func (param i32 i32) (result i32))) // (type $type3 (func (param i32 i32 i32) (result i32))) -// (type $blocktype (func (param i32) (result))) +// (type $blocktype (func (param) (result))) // (table $funcs (import "e" "t") 0 funcref) // (export "f" (func $f)) // (func $f (param $fptr i32) (result i32) @@ -44,36 +44,28 @@ EM_JS(CountArgsFunc, _PyEM_GetCountArgsPtr, (), { // table.get $funcs // local.tee $fref // ref.test $type3 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b +// if $blocktype // i32.const 3 // return -// ) +// end // local.get $fref // ref.test $type2 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b +// if $blocktype // i32.const 2 // return -// ) +// end // local.get $fref // ref.test $type1 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b +// if $blocktype // i32.const 1 // return -// ) +// end // local.get $fref // ref.test $type0 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b +// if $blocktype // i32.const 0 // return -// ) +// end // i32.const -1 // ) // ) @@ -88,13 +80,13 @@ function getPyEMCountArgsPtr() { const code = new Uint8Array([ 0x00, 0x61, 0x73, 0x6d, // \0asm magic number 0x01, 0x00, 0x00, 0x00, // version 1 - 0x01, 0x1b, // Type section, body is 0x1b bytes + 0x01, 0x1a, // Type section, body is 0x1a bytes 0x05, // 6 entries - 0x60, 0x00, 0x01, 0x7f, // (type $type0 (func (param) (result i32))) - 0x60, 0x01, 0x7f, 0x01, 0x7f, // (type $type1 (func (param i32) (result i32))) - 0x60, 0x02, 0x7f, 0x7f, 0x01, 0x7f, // (type $type2 (func (param i32 i32) (result i32))) - 0x60, 0x03, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, // (type $type3 (func (param i32 i32 i32) (result i32))) - 0x60, 0x01, 0x7f, 0x00, // (type $blocktype (func (param i32) (result))) + 0x60, 0x00, 0x01, 0x7f, // (type $type0 (func (param) (result i32))) + 0x60, 0x01, 0x7f, 0x01, 0x7f, // (type $type1 (func (param i32) (result i32))) + 0x60, 0x02, 0x7f, 0x7f, 0x01, 0x7f, // (type $type2 (func (param i32 i32) (result i32))) + 0x60, 0x03, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, // (type $type3 (func (param i32 i32 i32) (result i32))) + 0x60, 0x00, 0x00, // (type $blocktype (func (param) (result))) 0x02, 0x09, // Import section, 0x9 byte body 0x01, // 1 import (table $funcs (import "e" "t") 0 funcref) 0x01, 0x65, // "e" @@ -110,44 +102,36 @@ function getPyEMCountArgsPtr() { 0x00, // a function 0x00, // at index 0 - 0x0a, 0x44, // Code section, - 0x01, 0x42, // one entry of length 50 + 0x0a, 56, // Code section, + 0x01, 54, // one entry of length 54 0x01, 0x01, 0x70, // one local of type funcref // Body of the function 0x20, 0x00, // local.get $fptr 0x25, 0x00, // table.get $funcs 0x22, 0x01, // local.tee $fref 0xfb, 0x14, 0x03, // ref.test $type3 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b + 0x04, 0x04, // if (type $blocktype) 0x41, 0x03, // i32.const 3 0x0f, // return 0x0b, // end block 0x20, 0x01, // local.get $fref 0xfb, 0x14, 0x02, // ref.test $type2 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b + 0x04, 0x04, // if (type $blocktype) 0x41, 0x02, // i32.const 2 0x0f, // return 0x0b, // end block 0x20, 0x01, // local.get $fref 0xfb, 0x14, 0x01, // ref.test $type1 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b + 0x04, 0x04, // if (type $blocktype) 0x41, 0x01, // i32.const 1 0x0f, // return 0x0b, // end block 0x20, 0x01, // local.get $fref 0xfb, 0x14, 0x00, // ref.test $type0 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b + 0x04, 0x04, // if (type $blocktype) 0x41, 0x00, // i32.const 0 0x0f, // return 0x0b, // end block From 91e6a58e2d6fd23e886135457e28dfa980ec49ed Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 20 May 2025 14:41:14 -0400 Subject: [PATCH 246/989] gh-127146: xfail more Emscripten stack overflows (#134358) Adds some additional test xfails for Emscripten stack overflows. Also corrects a test skip for test_io. --- Lib/test/test_builtin.py | 1 + Lib/test/test_capi/test_misc.py | 2 ++ Lib/test/test_descr.py | 1 + Lib/test/test_exceptions.py | 1 + Lib/test/test_io.py | 2 +- 5 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 31597a320d418a..f1f877c47a1e7a 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1120,6 +1120,7 @@ def test_filter_pickle(self): self.check_iter_pickle(f1, list(f2), proto) @support.skip_wasi_stack_overflow() + @support.skip_emscripten_stack_overflow() @support.requires_resource('cpu') def test_filter_dealloc(self): # Tests recursive deallocation of nested filter objects using the diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index a597f23a992e7b..f74694a7a745a4 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -412,10 +412,12 @@ def test_trashcan_subclass(self): L = MyList((L,)) @support.requires_resource('cpu') + @support.skip_emscripten_stack_overflow() def test_trashcan_python_class1(self): self.do_test_trashcan_python_class(list) @support.requires_resource('cpu') + @support.skip_emscripten_stack_overflow() def test_trashcan_python_class2(self): from _testcapi import MyList self.do_test_trashcan_python_class(MyList) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 76937432a43037..14026531e22333 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -4523,6 +4523,7 @@ class Oops(object): del o @support.skip_wasi_stack_overflow() + @support.skip_emscripten_stack_overflow() @support.requires_resource('cpu') def test_wrapper_segfault(self): # SF 927248: deeply nested wrappers could cause stack overflow diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index d177e3dc0f5007..cfd4a3e87695f5 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1429,6 +1429,7 @@ def g(): self.assertIn("maximum recursion depth exceeded", str(exc)) @support.skip_wasi_stack_overflow() + @support.skip_emscripten_stack_overflow() @cpython_only @support.requires_resource('cpu') def test_trashcan_recursion(self): diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 5a8f1949baaa98..90680c6d47ab41 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -572,7 +572,7 @@ def do_test(test, obj, abilities): for [test, abilities] in tests: with self.subTest(test): if test == pipe_writer and not threading_helper.can_start_thread: - skipTest() + self.skipTest("Need threads") with test() as obj: do_test(test, obj, abilities) From c7f8e706e116d7986da40805cc4a85333acde1c9 Mon Sep 17 00:00:00 2001 From: devdanzin <74280297+devdanzin@users.noreply.github.com> Date: Tue, 20 May 2025 16:30:00 -0300 Subject: [PATCH 247/989] gh-90117: handle dict and mapping views in pprint (#30135) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Teach pprint about dict views with PrettyPrinter._pprint_dict_view and ._pprint_dict_items_view. * Use _private names for _dict_*_view attributes of PrettyPrinter. * Use explicit 'items' keyword when calling _pprint_dict_view from _pprint_dict_items_view. * 📜🤖 Added by blurb_it. * Improve tests * Add tests for collections.abc.[Keys|Items|Mapping|Values]View support in pprint. * Add support for collections.abc.[Keys|Items|Mapping|Values]View in pprint. * Split _pprint_dict_view into _pprint_abc_view, so pretty-printing normal dict views and ABC views is handled in two simple methods. * Simplify redundant code. * Add collections.abc views to some existing pprint tests. * Test that views from collection.UserDict are correctly formatted by pprint. * Handle recursive dict and ABC views. * Test that subclasses of ABC views work in pprint. * Test dict views coming from collections.Counter. * Test ABC views coming from collections.ChainMap. * Test odict views coming from collections.OrderedDict. * Rename _pprint_abc_view to _pprint_mapping_abc_view. * Add pprint test for mapping ABC views where ._mapping has a custom __repr__ and fix ChainMap test. * When a mapping ABC view has a ._mapping that defines a custom __repr__, dispatch pretty-printing it by that __repr__. * Add tests for ABC mapping views subclasses that don't replace __repr__, also handling those that delete ._mapping on instances. * Simplify the pretty printing of ABC mapping views. * Add a test for depth handling when pretty printing dict views. * Fix checking whether the view type is a subclass of an items view, add a test. * Move construction of the views __repr__ set out of _safe_repr. --------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Éric Co-authored-by: Oleg Iarygin Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Gregory P. Smith --- Lib/pprint.py | 79 ++++ Lib/test/test_pprint.py | 346 +++++++++++++++++- .../2021-12-18-12-46-20.bpo-45959.vPlr3P.rst | 1 + 3 files changed, 424 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-12-18-12-46-20.bpo-45959.vPlr3P.rst diff --git a/Lib/pprint.py b/Lib/pprint.py index dc0953cec67a58..1e611481b51ac0 100644 --- a/Lib/pprint.py +++ b/Lib/pprint.py @@ -248,6 +248,49 @@ def _pprint_ordered_dict(self, object, stream, indent, allowance, context, level _dispatch[_collections.OrderedDict.__repr__] = _pprint_ordered_dict + def _pprint_dict_view(self, object, stream, indent, allowance, context, level): + """Pretty print dict views (keys, values, items).""" + if isinstance(object, self._dict_items_view): + key = _safe_tuple + else: + key = _safe_key + write = stream.write + write(object.__class__.__name__ + '([') + if self._indent_per_level > 1: + write((self._indent_per_level - 1) * ' ') + length = len(object) + if length: + if self._sort_dicts: + entries = sorted(object, key=key) + else: + entries = object + self._format_items(entries, stream, indent, allowance + 1, + context, level) + write('])') + + def _pprint_mapping_abc_view(self, object, stream, indent, allowance, context, level): + """Pretty print mapping views from collections.abc.""" + write = stream.write + write(object.__class__.__name__ + '(') + # Dispatch formatting to the view's _mapping + self._format(object._mapping, stream, indent, allowance, context, level) + write(')') + + _dict_keys_view = type({}.keys()) + _dispatch[_dict_keys_view.__repr__] = _pprint_dict_view + + _dict_values_view = type({}.values()) + _dispatch[_dict_values_view.__repr__] = _pprint_dict_view + + _dict_items_view = type({}.items()) + _dispatch[_dict_items_view.__repr__] = _pprint_dict_view + + _dispatch[_collections.abc.MappingView.__repr__] = _pprint_mapping_abc_view + + _view_reprs = {cls.__repr__ for cls in + (_dict_keys_view, _dict_values_view, _dict_items_view, + _collections.abc.MappingView)} + def _pprint_list(self, object, stream, indent, allowance, context, level): stream.write('[') self._format_items(object, stream, indent, allowance + 1, @@ -610,6 +653,42 @@ def _safe_repr(self, object, context, maxlevels, level): del context[objid] return "{%s}" % ", ".join(components), readable, recursive + if issubclass(typ, _collections.abc.MappingView) and r in self._view_reprs: + objid = id(object) + if maxlevels and level >= maxlevels: + return "{...}", False, objid in context + if objid in context: + return _recursion(object), False, True + key = _safe_key + if issubclass(typ, (self._dict_items_view, _collections.abc.ItemsView)): + key = _safe_tuple + if hasattr(object, "_mapping"): + # Dispatch formatting to the view's _mapping + mapping_repr, readable, recursive = self.format( + object._mapping, context, maxlevels, level) + return (typ.__name__ + '(%s)' % mapping_repr), readable, recursive + elif hasattr(typ, "_mapping"): + # We have a view that somehow has lost its type's _mapping, raise + # an error by calling repr() instead of failing cryptically later + return repr(object), True, False + if self._sort_dicts: + object = sorted(object, key=key) + context[objid] = 1 + readable = True + recursive = False + components = [] + append = components.append + level += 1 + for val in object: + vrepr, vreadable, vrecur = self.format( + val, context, maxlevels, level) + append(vrepr) + readable = readable and vreadable + if vrecur: + recursive = True + del context[objid] + return typ.__name__ + '([%s])' % ", ".join(components), readable, recursive + if (issubclass(typ, list) and r is list.__repr__) or \ (issubclass(typ, tuple) and r is tuple.__repr__): if issubclass(typ, list): diff --git a/Lib/test/test_pprint.py b/Lib/test/test_pprint.py index f68996f72b15b5..0c84d3d3bfd17a 100644 --- a/Lib/test/test_pprint.py +++ b/Lib/test/test_pprint.py @@ -10,6 +10,7 @@ import re import types import unittest +from collections.abc import ItemsView, KeysView, Mapping, MappingView, ValuesView from test.support import cpython_only from test.support.import_helper import ensure_lazy_imports @@ -70,6 +71,14 @@ class dict_custom_repr(dict): def __repr__(self): return '*'*len(dict.__repr__(self)) +class mappingview_custom_repr(MappingView): + def __repr__(self): + return '*'*len(MappingView.__repr__(self)) + +class keysview_custom_repr(KeysView): + def __repr__(self): + return '*'*len(KeysView.__repr__(self)) + @dataclasses.dataclass class dataclass1: field1: str @@ -180,10 +189,17 @@ def test_knotted(self): # Messy dict. self.d = {} self.d[0] = self.d[1] = self.d[2] = self.d + self.e = {} + self.v = ValuesView(self.e) + self.m = MappingView(self.e) + self.dv = self.e.values() + self.e["v"] = self.v + self.e["m"] = self.m + self.e["dv"] = self.dv pp = pprint.PrettyPrinter() - for icky in self.a, self.b, self.d, (self.d, self.d): + for icky in self.a, self.b, self.d, (self.d, self.d), self.e, self.v, self.m, self.dv: self.assertTrue(pprint.isrecursive(icky), "expected isrecursive") self.assertFalse(pprint.isreadable(icky), "expected not isreadable") self.assertTrue(pp.isrecursive(icky), "expected isrecursive") @@ -191,10 +207,11 @@ def test_knotted(self): # Break the cycles. self.d.clear() + self.e.clear() del self.a[:] del self.b[:] - for safe in self.a, self.b, self.d, (self.d, self.d): + for safe in self.a, self.b, self.d, (self.d, self.d), self.e, self.v, self.m, self.dv: # module-level convenience functions self.assertFalse(pprint.isrecursive(safe), "expected not isrecursive for %r" % (safe,)) @@ -237,6 +254,8 @@ def test_same_as_repr(self): set(), set2(), set3(), frozenset(), frozenset2(), frozenset3(), {}, dict2(), dict3(), + {}.keys(), {}.values(), {}.items(), + MappingView({}), KeysView({}), ItemsView({}), ValuesView({}), self.assertTrue, pprint, -6, -6, -6-6j, -1.5, "x", b"x", bytearray(b"x"), (3,), [3], {3: 6}, @@ -246,6 +265,9 @@ def test_same_as_repr(self): set({7}), set2({7}), set3({7}), frozenset({8}), frozenset2({8}), frozenset3({8}), dict2({5: 6}), dict3({5: 6}), + {5: 6}.keys(), {5: 6}.values(), {5: 6}.items(), + MappingView({5: 6}), KeysView({5: 6}), + ItemsView({5: 6}), ValuesView({5: 6}), range(10, -11, -1), True, False, None, ..., ): @@ -275,6 +297,12 @@ def test_container_repr_override_called(self): dict_custom_repr(), dict_custom_repr({5: 6}), dict_custom_repr(zip(range(N),range(N))), + mappingview_custom_repr({}), + mappingview_custom_repr({5: 6}), + mappingview_custom_repr(dict(zip(range(N),range(N)))), + keysview_custom_repr({}), + keysview_custom_repr({5: 6}), + keysview_custom_repr(dict(zip(range(N),range(N)))), ): native = repr(cont) expected = '*' * len(native) @@ -302,6 +330,56 @@ def test_basic_line_wrap(self): for type in [dict, dict2]: self.assertEqual(pprint.pformat(type(o)), exp) + o = range(100) + exp = 'dict_keys([%s])' % ',\n '.join(map(str, o)) + keys = dict.fromkeys(o).keys() + self.assertEqual(pprint.pformat(keys), exp) + + o = range(100) + exp = 'dict_values([%s])' % ',\n '.join(map(str, o)) + values = {v: v for v in o}.values() + self.assertEqual(pprint.pformat(values), exp) + + o = range(100) + exp = 'dict_items([%s])' % ',\n '.join("(%s, %s)" % (i, i) for i in o) + items = {v: v for v in o}.items() + self.assertEqual(pprint.pformat(items), exp) + + o = range(100) + exp = 'odict_keys([%s])' % ',\n '.join(map(str, o)) + keys = collections.OrderedDict.fromkeys(o).keys() + self.assertEqual(pprint.pformat(keys), exp) + + o = range(100) + exp = 'odict_values([%s])' % ',\n '.join(map(str, o)) + values = collections.OrderedDict({v: v for v in o}).values() + self.assertEqual(pprint.pformat(values), exp) + + o = range(100) + exp = 'odict_items([%s])' % ',\n '.join("(%s, %s)" % (i, i) for i in o) + items = collections.OrderedDict({v: v for v in o}).items() + self.assertEqual(pprint.pformat(items), exp) + + o = range(100) + exp = 'KeysView({%s})' % (': None,\n '.join(map(str, o)) + ': None') + keys_view = KeysView(dict.fromkeys(o)) + self.assertEqual(pprint.pformat(keys_view), exp) + + o = range(100) + exp = 'ItemsView({%s})' % (': None,\n '.join(map(str, o)) + ': None') + items_view = ItemsView(dict.fromkeys(o)) + self.assertEqual(pprint.pformat(items_view), exp) + + o = range(100) + exp = 'MappingView({%s})' % (': None,\n '.join(map(str, o)) + ': None') + mapping_view = MappingView(dict.fromkeys(o)) + self.assertEqual(pprint.pformat(mapping_view), exp) + + o = range(100) + exp = 'ValuesView({%s})' % (': None,\n '.join(map(str, o)) + ': None') + values_view = ValuesView(dict.fromkeys(o)) + self.assertEqual(pprint.pformat(values_view), exp) + o = range(100) exp = '[%s]' % ',\n '.join(map(str, o)) for type in [list, list2]: @@ -425,6 +503,30 @@ def test_ordered_dict(self): ('a', 6), ('lazy', 7), ('dog', 8)])""") + self.assertEqual(pprint.pformat(d.keys(), sort_dicts=False), +"""\ +odict_keys(['the', + 'quick', + 'brown', + 'fox', + 'jumped', + 'over', + 'a', + 'lazy', + 'dog'])""") + self.assertEqual(pprint.pformat(d.items(), sort_dicts=False), +"""\ +odict_items([('the', 0), + ('quick', 1), + ('brown', 2), + ('fox', 3), + ('jumped', 4), + ('over', 5), + ('a', 6), + ('lazy', 7), + ('dog', 8)])""") + self.assertEqual(pprint.pformat(d.values(), sort_dicts=False), + "odict_values([0, 1, 2, 3, 4, 5, 6, 7, 8])") def test_mapping_proxy(self): words = 'the quick brown fox jumped over a lazy dog'.split() @@ -453,6 +555,152 @@ def test_mapping_proxy(self): ('lazy', 7), ('dog', 8)]))""") + def test_dict_views(self): + for dict_class in (dict, collections.OrderedDict, collections.Counter): + empty = dict_class({}) + short = dict_class(dict(zip('edcba', 'edcba'))) + long = dict_class(dict((chr(x), chr(x)) for x in range(90, 64, -1))) + lengths = {"empty": empty, "short": short, "long": long} + prefix = "odict" if dict_class is collections.OrderedDict else "dict" + for name, d in lengths.items(): + with self.subTest(length=name, prefix=prefix): + is_short = len(d) < 6 + joiner = ", " if is_short else ",\n " + k = d.keys() + v = d.values() + i = d.items() + self.assertEqual(pprint.pformat(k, sort_dicts=True), + prefix + "_keys([%s])" % + joiner.join(repr(key) for key in sorted(k))) + self.assertEqual(pprint.pformat(v, sort_dicts=True), + prefix + "_values([%s])" % + joiner.join(repr(val) for val in sorted(v))) + self.assertEqual(pprint.pformat(i, sort_dicts=True), + prefix + "_items([%s])" % + joiner.join(repr(item) for item in sorted(i))) + self.assertEqual(pprint.pformat(k, sort_dicts=False), + prefix + "_keys([%s])" % + joiner.join(repr(key) for key in k)) + self.assertEqual(pprint.pformat(v, sort_dicts=False), + prefix + "_values([%s])" % + joiner.join(repr(val) for val in v)) + self.assertEqual(pprint.pformat(i, sort_dicts=False), + prefix + "_items([%s])" % + joiner.join(repr(item) for item in i)) + + def test_abc_views(self): + empty = {} + short = dict(zip('edcba', 'edcba')) + long = dict((chr(x), chr(x)) for x in range(90, 64, -1)) + lengths = {"empty": empty, "short": short, "long": long} + # Test that a subclass that doesn't replace __repr__ works with different lengths + class MV(MappingView): pass + + for name, d in lengths.items(): + with self.subTest(length=name, name="Views"): + is_short = len(d) < 6 + joiner = ", " if is_short else ",\n " + i = d.items() + s = sorted(i) + joined_items = "({%s})" % joiner.join(["%r: %r" % (k, v) for (k, v) in i]) + sorted_items = "({%s})" % joiner.join(["%r: %r" % (k, v) for (k, v) in s]) + self.assertEqual(pprint.pformat(KeysView(d), sort_dicts=True), + KeysView.__name__ + sorted_items) + self.assertEqual(pprint.pformat(ItemsView(d), sort_dicts=True), + ItemsView.__name__ + sorted_items) + self.assertEqual(pprint.pformat(MappingView(d), sort_dicts=True), + MappingView.__name__ + sorted_items) + self.assertEqual(pprint.pformat(MV(d), sort_dicts=True), + MV.__name__ + sorted_items) + self.assertEqual(pprint.pformat(ValuesView(d), sort_dicts=True), + ValuesView.__name__ + sorted_items) + self.assertEqual(pprint.pformat(KeysView(d), sort_dicts=False), + KeysView.__name__ + joined_items) + self.assertEqual(pprint.pformat(ItemsView(d), sort_dicts=False), + ItemsView.__name__ + joined_items) + self.assertEqual(pprint.pformat(MappingView(d), sort_dicts=False), + MappingView.__name__ + joined_items) + self.assertEqual(pprint.pformat(MV(d), sort_dicts=False), + MV.__name__ + joined_items) + self.assertEqual(pprint.pformat(ValuesView(d), sort_dicts=False), + ValuesView.__name__ + joined_items) + + def test_nested_views(self): + d = {1: MappingView({1: MappingView({1: MappingView({1: 2})})})} + self.assertEqual(repr(d), + "{1: MappingView({1: MappingView({1: MappingView({1: 2})})})}") + self.assertEqual(pprint.pformat(d), + "{1: MappingView({1: MappingView({1: MappingView({1: 2})})})}") + self.assertEqual(pprint.pformat(d, depth=2), + "{1: MappingView({1: {...}})}") + d = {} + d1 = {1: d.values()} + d2 = {1: d1.values()} + d3 = {1: d2.values()} + self.assertEqual(pprint.pformat(d3), + "{1: dict_values([dict_values([dict_values([])])])}") + self.assertEqual(pprint.pformat(d3, depth=2), + "{1: dict_values([{...}])}") + + def test_unorderable_items_views(self): + """Check that views with unorderable items have stable sorting.""" + d = dict((((3+1j), 3), ((1+1j), (1+0j)), (1j, 0j), (500, None), (499, None))) + iv = ItemsView(d) + self.assertEqual(pprint.pformat(iv), + pprint.pformat(iv)) + self.assertTrue(pprint.pformat(iv).endswith(", 499: None, 500: None})"), + pprint.pformat(iv)) + self.assertEqual(pprint.pformat(d.items()), # Won't be equal unless _safe_tuple + pprint.pformat(d.items())) # is used in _safe_repr + self.assertTrue(pprint.pformat(d.items()).endswith(", (499, None), (500, None)])")) + + def test_mapping_view_subclass_no_mapping(self): + class BMV(MappingView): + def __init__(self, d): + super().__init__(d) + self.mapping = self._mapping + del self._mapping + + self.assertRaises(AttributeError, pprint.pformat, BMV({})) + + def test_mapping_subclass_repr(self): + """Test that mapping ABC views use their ._mapping's __repr__.""" + class MyMapping(Mapping): + def __init__(self, keys=None): + self._keys = {} if keys is None else dict.fromkeys(keys) + + def __getitem__(self, item): + return self._keys[item] + + def __len__(self): + return len(self._keys) + + def __iter__(self): + return iter(self._keys) + + def __repr__(self): + return f"{self.__class__.__name__}([{', '.join(map(repr, self._keys.keys()))}])" + + m = MyMapping(["test", 1]) + self.assertEqual(repr(m), "MyMapping(['test', 1])") + short_view_repr = "%s(MyMapping(['test', 1]))" + self.assertEqual(repr(m.keys()), short_view_repr % "KeysView") + self.assertEqual(pprint.pformat(m.items()), short_view_repr % "ItemsView") + self.assertEqual(pprint.pformat(m.keys()), short_view_repr % "KeysView") + self.assertEqual(pprint.pformat(MappingView(m)), short_view_repr % "MappingView") + self.assertEqual(pprint.pformat(m.values()), short_view_repr % "ValuesView") + + alpha = "abcdefghijklmnopqrstuvwxyz" + m = MyMapping(alpha) + alpha_repr = ", ".join(map(repr, list(alpha))) + long_view_repr = "%%s(MyMapping([%s]))" % alpha_repr + self.assertEqual(repr(m), "MyMapping([%s])" % alpha_repr) + self.assertEqual(repr(m.keys()), long_view_repr % "KeysView") + self.assertEqual(pprint.pformat(m.items()), long_view_repr % "ItemsView") + self.assertEqual(pprint.pformat(m.keys()), long_view_repr % "KeysView") + self.assertEqual(pprint.pformat(MappingView(m)), long_view_repr % "MappingView") + self.assertEqual(pprint.pformat(m.values()), long_view_repr % "ValuesView") + def test_empty_simple_namespace(self): ns = types.SimpleNamespace() formatted = pprint.pformat(ns) @@ -768,6 +1016,10 @@ def test_sort_unorderable_values(self): 'frozenset({' + ','.join(map(repr, skeys)) + '})') self.assertEqual(clean(pprint.pformat(dict.fromkeys(keys))), '{' + ','.join('%r:None' % k for k in skeys) + '}') + self.assertEqual(clean(pprint.pformat(dict.fromkeys(keys).keys())), + 'dict_keys([' + ','.join('%r' % k for k in skeys) + '])') + self.assertEqual(clean(pprint.pformat(dict.fromkeys(keys).items())), + 'dict_items([' + ','.join('(%r,None)' % k for k in skeys) + '])') # Issue 10017: TypeError on user-defined types as dict keys. self.assertEqual(pprint.pformat({Unorderable: 0, 1: 0}), @@ -1049,6 +1301,66 @@ def test_chainmap(self): ('a', 6), ('lazy', 7), ('dog', 8)]))""") + self.assertEqual(pprint.pformat(d.keys()), +"""\ +KeysView(ChainMap({'a': 6, + 'brown': 2, + 'dog': 8, + 'fox': 3, + 'jumped': 4, + 'lazy': 7, + 'over': 5, + 'quick': 1, + 'the': 0}, + OrderedDict([('the', 0), + ('quick', 1), + ('brown', 2), + ('fox', 3), + ('jumped', 4), + ('over', 5), + ('a', 6), + ('lazy', 7), + ('dog', 8)])))""") + self.assertEqual(pprint.pformat(d.items()), + """\ +ItemsView(ChainMap({'a': 6, + 'brown': 2, + 'dog': 8, + 'fox': 3, + 'jumped': 4, + 'lazy': 7, + 'over': 5, + 'quick': 1, + 'the': 0}, + OrderedDict([('the', 0), + ('quick', 1), + ('brown', 2), + ('fox', 3), + ('jumped', 4), + ('over', 5), + ('a', 6), + ('lazy', 7), + ('dog', 8)])))""") + self.assertEqual(pprint.pformat(d.values()), + """\ +ValuesView(ChainMap({'a': 6, + 'brown': 2, + 'dog': 8, + 'fox': 3, + 'jumped': 4, + 'lazy': 7, + 'over': 5, + 'quick': 1, + 'the': 0}, + OrderedDict([('the', 0), + ('quick', 1), + ('brown', 2), + ('fox', 3), + ('jumped', 4), + ('over', 5), + ('a', 6), + ('lazy', 7), + ('dog', 8)])))""") def test_deque(self): d = collections.deque() @@ -1096,6 +1408,36 @@ def test_user_dict(self): 'over': 5, 'quick': 1, 'the': 0}""") + self.assertEqual(pprint.pformat(d.keys()), """\ +KeysView({'a': 6, + 'brown': 2, + 'dog': 8, + 'fox': 3, + 'jumped': 4, + 'lazy': 7, + 'over': 5, + 'quick': 1, + 'the': 0})""") + self.assertEqual(pprint.pformat(d.items()), """\ +ItemsView({'a': 6, + 'brown': 2, + 'dog': 8, + 'fox': 3, + 'jumped': 4, + 'lazy': 7, + 'over': 5, + 'quick': 1, + 'the': 0})""") + self.assertEqual(pprint.pformat(d.values()), """\ +ValuesView({'a': 6, + 'brown': 2, + 'dog': 8, + 'fox': 3, + 'jumped': 4, + 'lazy': 7, + 'over': 5, + 'quick': 1, + 'the': 0})""") def test_user_list(self): d = collections.UserList() diff --git a/Misc/NEWS.d/next/Library/2021-12-18-12-46-20.bpo-45959.vPlr3P.rst b/Misc/NEWS.d/next/Library/2021-12-18-12-46-20.bpo-45959.vPlr3P.rst new file mode 100644 index 00000000000000..bcafa64d2638dc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-12-18-12-46-20.bpo-45959.vPlr3P.rst @@ -0,0 +1 @@ +:mod:`pprint` can now pretty-print dict views. From c91ad5da9d92eac4718e4da8d53689c3cc24535e Mon Sep 17 00:00:00 2001 From: Chris Patti Date: Tue, 20 May 2025 15:47:57 -0400 Subject: [PATCH 248/989] gh-128066: Properly handle history file writes for RO fs on PyREPL (gh-134380) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Langa --- Lib/_pyrepl/simple_interact.py | 2 ++ Lib/site.py | 8 +++++++- .../2025-05-20-14-41-50.gh-issue-128066.qzzGfv.rst | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-14-41-50.gh-issue-128066.qzzGfv.rst diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index c23894a34b8b53..965b853c34b392 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -31,6 +31,7 @@ import sys import code import warnings +import errno from .readline import _get_reader, multiline_input, append_history_file @@ -153,6 +154,7 @@ def maybe_run_command(statement: str) -> bool: append_history_file() except (FileNotFoundError, PermissionError, OSError) as e: warnings.warn(f"failed to open the history file for writing: {e}") + input_n += 1 except KeyboardInterrupt: r = _get_reader() diff --git a/Lib/site.py b/Lib/site.py index 5c38b1b17d5abd..f93271971594d8 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -75,6 +75,7 @@ import _sitebuiltins import _io as io import stat +import errno # Prefixes for site-packages; add additional prefixes like /usr/local here PREFIXES = [sys.prefix, sys.exec_prefix] @@ -578,10 +579,15 @@ def register_readline(): def write_history(): try: readline_module.write_history_file(history) - except (FileNotFoundError, PermissionError): + except FileNotFoundError, PermissionError: # home directory does not exist or is not writable # https://bugs.python.org/issue19891 pass + except OSError: + if errno.EROFS: + pass # gh-128066: read-only file system + else: + raise atexit.register(write_history) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-14-41-50.gh-issue-128066.qzzGfv.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-14-41-50.gh-issue-128066.qzzGfv.rst new file mode 100644 index 00000000000000..f78190276851b4 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-14-41-50.gh-issue-128066.qzzGfv.rst @@ -0,0 +1,3 @@ +Fixes an edge case where PyREPL improperly threw an error when Python is +invoked on a read only filesystem while trying to write history file +entries. From a3a3cf6d157948ed64ae837d6310b933a39a2493 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kevin=20Hern=C3=A1ndez?= Date: Tue, 20 May 2025 16:26:48 -0400 Subject: [PATCH 249/989] gh-134215: PyREPL: Do not show underscored modules by default during autocompletion (gh-134267) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Co-authored-by: Tomas R. Co-authored-by: Łukasz Langa --- Lib/_pyrepl/_module_completer.py | 15 +++++-- Lib/test/test_pyrepl/test_pyrepl.py | 45 ++++++++++++++++++- Misc/ACKS | 1 + ...-05-19-14-57-46.gh-issue-134215.sbdDK6.rst | 1 + 4 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-05-19-14-57-46.gh-issue-134215.sbdDK6.rst diff --git a/Lib/_pyrepl/_module_completer.py b/Lib/_pyrepl/_module_completer.py index 0606797226d1e0..9aafb55090e2ce 100644 --- a/Lib/_pyrepl/_module_completer.py +++ b/Lib/_pyrepl/_module_completer.py @@ -81,8 +81,10 @@ def find_modules(self, path: str, prefix: str) -> list[str]: def _find_modules(self, path: str, prefix: str) -> list[str]: if not path: # Top-level import (e.g. `import foo`` or `from foo`)` - builtin_modules = [name for name in sys.builtin_module_names if name.startswith(prefix)] - third_party_modules = [name for _, name, _ in self.global_cache if name.startswith(prefix)] + builtin_modules = [name for name in sys.builtin_module_names + if self.is_suggestion_match(name, prefix)] + third_party_modules = [module.name for module in self.global_cache + if self.is_suggestion_match(module.name, prefix)] return sorted(builtin_modules + third_party_modules) if path.startswith('.'): @@ -98,7 +100,14 @@ def _find_modules(self, path: str, prefix: str) -> list[str]: if mod_info.ispkg and mod_info.name == segment] modules = self.iter_submodules(modules) return [module.name for module in modules - if module.name.startswith(prefix)] + if self.is_suggestion_match(module.name, prefix)] + + def is_suggestion_match(self, module_name: str, prefix: str) -> bool: + if prefix: + return module_name.startswith(prefix) + # For consistency with attribute completion, which + # does not suggest private attributes unless requested. + return not module_name.startswith("_") def iter_submodules(self, parent_modules: list[pkgutil.ModuleInfo]) -> Iterator[pkgutil.ModuleInfo]: """Iterate over all submodules of the given parent modules.""" diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index 59f5d1f893f9fd..29762232d43b89 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -8,6 +8,7 @@ import subprocess import sys import tempfile +from pkgutil import ModuleInfo from unittest import TestCase, skipUnless, skipIf from unittest.mock import patch from test.support import force_not_colorized, make_clean_env, Py_DEBUG @@ -959,6 +960,46 @@ def test_import_completions(self): output = reader.readline() self.assertEqual(output, expected) + @patch("pkgutil.iter_modules", lambda: [ModuleInfo(None, "public", True), + ModuleInfo(None, "_private", True)]) + @patch("sys.builtin_module_names", ()) + def test_private_completions(self): + cases = ( + # Return public methods by default + ("import \t\n", "import public"), + ("from \t\n", "from public"), + # Return private methods if explicitly specified + ("import _\t\n", "import _private"), + ("from _\t\n", "from _private"), + ) + for code, expected in cases: + with self.subTest(code=code): + events = code_to_events(code) + reader = self.prepare_reader(events, namespace={}) + output = reader.readline() + self.assertEqual(output, expected) + + @patch( + "_pyrepl._module_completer.ModuleCompleter.iter_submodules", + lambda *_: [ + ModuleInfo(None, "public", True), + ModuleInfo(None, "_private", True), + ], + ) + def test_sub_module_private_completions(self): + cases = ( + # Return public methods by default + ("from foo import \t\n", "from foo import public"), + # Return private methods if explicitly specified + ("from foo import _\t\n", "from foo import _private"), + ) + for code, expected in cases: + with self.subTest(code=code): + events = code_to_events(code) + reader = self.prepare_reader(events, namespace={}) + output = reader.readline() + self.assertEqual(output, expected) + def test_builtin_completion_top_level(self): import importlib # Make iter_modules() search only the standard library. @@ -991,8 +1032,8 @@ def test_relative_import_completions(self): output = reader.readline() self.assertEqual(output, expected) - @patch("pkgutil.iter_modules", lambda: [(None, 'valid_name', None), - (None, 'invalid-name', None)]) + @patch("pkgutil.iter_modules", lambda: [ModuleInfo(None, "valid_name", True), + ModuleInfo(None, "invalid-name", True)]) def test_invalid_identifiers(self): # Make sure modules which are not valid identifiers # are not suggested as those cannot be imported via 'import'. diff --git a/Misc/ACKS b/Misc/ACKS index 1b500870dec472..571142e7e49763 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -763,6 +763,7 @@ Chris Herborth Ivan Herman Jürgen Hermann Joshua Jay Herman +Kevin Hernandez Gary Herron Ernie Hershey Thomas Herve diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-05-19-14-57-46.gh-issue-134215.sbdDK6.rst b/Misc/NEWS.d/next/Tools-Demos/2025-05-19-14-57-46.gh-issue-134215.sbdDK6.rst new file mode 100644 index 00000000000000..546ed2a56b6695 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-05-19-14-57-46.gh-issue-134215.sbdDK6.rst @@ -0,0 +1 @@ +:term:`REPL` import autocomplete only suggests private modules when explicitly specified. From aadda87b3d3d99cb9e8c8791bb9715a3f0209195 Mon Sep 17 00:00:00 2001 From: tigerding <43339228+zydtiger@users.noreply.github.com> Date: Tue, 20 May 2025 16:36:04 -0400 Subject: [PATCH 250/989] gh-134209: use heap-allocated memory in `_curses.window.{instr,getstr}` (GH-134283) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * made curses buffer heap allocated instead of stack * change docs to explicitly mention the max buffer size * changing GetStr() function to behave similarly too * Update Doc/library/curses.rst * Update instr with proper return error handling * Update Modules/_cursesmodule.c * change to strlen and better memory safety * change from const int to Py_ssize_t * add mem allocation guard * update versionchanged to mention it was an increase. * explicitly use versionchanged 3.14 as that is its own branch now. TESTED: `python -m test -u curses test_curses` --------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Gregory P. Smith Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/curses.rst | 8 ++ ...-05-19-20-59-06.gh-issue-134209.anhTcF.rst | 3 + Modules/_cursesmodule.c | 102 ++++++++++++------ 3 files changed, 80 insertions(+), 33 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-19-20-59-06.gh-issue-134209.anhTcF.rst diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index 137504c51b4358..5ec23b61396773 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -988,6 +988,10 @@ the following methods and attributes: window.getstr(y, x, n) Read a bytes object from the user, with primitive line editing capacity. + The maximum value for *n* is 2047. + + .. versionchanged:: 3.14 + The maximum value for *n* was increased from 1023 to 2047. .. method:: window.getyx() @@ -1079,6 +1083,10 @@ the following methods and attributes: current cursor position, or at *y*, *x* if specified. Attributes are stripped from the characters. If *n* is specified, :meth:`instr` returns a string at most *n* characters long (exclusive of the trailing NUL). + The maximum value for *n* is 2047. + + .. versionchanged:: 3.14 + The maximum value for *n* was increased from 1023 to 2047. .. method:: window.is_linetouched(line) diff --git a/Misc/NEWS.d/next/Library/2025-05-19-20-59-06.gh-issue-134209.anhTcF.rst b/Misc/NEWS.d/next/Library/2025-05-19-20-59-06.gh-issue-134209.anhTcF.rst new file mode 100644 index 00000000000000..f985872f3c975e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-19-20-59-06.gh-issue-134209.anhTcF.rst @@ -0,0 +1,3 @@ +:mod:`curses`: The :meth:`curses.window.instr` and :meth:`curses.window.getstr` +methods now allocate their internal buffer on the heap instead of the stack; +in addition, the max buffer size is increased from 1023 to 2047. diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index 2e6ec822e2d5b6..5e1eccee3e4a89 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -1816,7 +1816,7 @@ _curses.window.getstr x: int X-coordinate. ] - n: int = 1023 + n: int = 2047 Maximal number of characters. / @@ -1829,62 +1829,80 @@ PyCursesWindow_GetStr(PyObject *op, PyObject *args) PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); int x, y, n; - char rtn[1024]; /* This should be big enough.. I hope */ - int rtn2; + int rtn; + + /* could make the buffer size larger/dynamic */ + Py_ssize_t max_buf_size = 2048; + PyObject *result = PyBytes_FromStringAndSize(NULL, max_buf_size); + if (result == NULL) + return NULL; + char *buf = PyBytes_AS_STRING(result); switch (PyTuple_Size(args)) { case 0: Py_BEGIN_ALLOW_THREADS - rtn2 = wgetnstr(self->win,rtn, 1023); + rtn = wgetnstr(self->win, buf, max_buf_size - 1); Py_END_ALLOW_THREADS break; case 1: if (!PyArg_ParseTuple(args,"i;n", &n)) - return NULL; + goto error; if (n < 0) { PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; + goto error; } Py_BEGIN_ALLOW_THREADS - rtn2 = wgetnstr(self->win, rtn, Py_MIN(n, 1023)); + rtn = wgetnstr(self->win, buf, Py_MIN(n, max_buf_size - 1)); Py_END_ALLOW_THREADS break; case 2: if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) - return NULL; + goto error; Py_BEGIN_ALLOW_THREADS #ifdef STRICT_SYSV_CURSES - rtn2 = wmove(self->win,y,x)==ERR ? ERR : wgetnstr(self->win, rtn, 1023); + rtn = wmove(self->win,y,x)==ERR ? ERR : wgetnstr(self->win, rtn, max_buf_size - 1); #else - rtn2 = mvwgetnstr(self->win,y,x,rtn, 1023); + rtn = mvwgetnstr(self->win,y,x,buf, max_buf_size - 1); #endif Py_END_ALLOW_THREADS break; case 3: if (!PyArg_ParseTuple(args,"iii;y,x,n", &y, &x, &n)) - return NULL; + goto error; if (n < 0) { PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; + goto error; } #ifdef STRICT_SYSV_CURSES Py_BEGIN_ALLOW_THREADS - rtn2 = wmove(self->win,y,x)==ERR ? ERR : - wgetnstr(self->win, rtn, Py_MIN(n, 1023)); + rtn = wmove(self->win,y,x)==ERR ? ERR : + wgetnstr(self->win, rtn, Py_MIN(n, max_buf_size - 1)); Py_END_ALLOW_THREADS #else Py_BEGIN_ALLOW_THREADS - rtn2 = mvwgetnstr(self->win, y, x, rtn, Py_MIN(n, 1023)); + rtn = mvwgetnstr(self->win, y, x, buf, Py_MIN(n, max_buf_size - 1)); Py_END_ALLOW_THREADS #endif break; default: PyErr_SetString(PyExc_TypeError, "getstr requires 0 to 3 arguments"); + goto error; + } + + if (rtn == ERR) { + Py_DECREF(result); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + + if (_PyBytes_Resize(&result, strlen(buf)) < 0) { return NULL; } - if (rtn2 == ERR) - rtn[0] = 0; - return PyBytes_FromString(rtn); + + return result; + +error: + Py_DECREF(result); + return NULL; } /*[clinic input] @@ -2023,7 +2041,7 @@ _curses.window.instr x: int X-coordinate. ] - n: int = 1023 + n: int = 2047 Maximal number of characters. / @@ -2040,43 +2058,61 @@ PyCursesWindow_InStr(PyObject *op, PyObject *args) PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); int x, y, n; - char rtn[1024]; /* This should be big enough.. I hope */ - int rtn2; + int rtn; + + /* could make the buffer size larger/dynamic */ + Py_ssize_t max_buf_size = 2048; + PyObject *result = PyBytes_FromStringAndSize(NULL, max_buf_size); + if (result == NULL) + return NULL; + char *buf = PyBytes_AS_STRING(result); switch (PyTuple_Size(args)) { case 0: - rtn2 = winnstr(self->win,rtn, 1023); + rtn = winnstr(self->win, buf, max_buf_size - 1); break; case 1: if (!PyArg_ParseTuple(args,"i;n", &n)) - return NULL; + goto error; if (n < 0) { PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; + goto error; } - rtn2 = winnstr(self->win, rtn, Py_MIN(n, 1023)); + rtn = winnstr(self->win, buf, Py_MIN(n, max_buf_size - 1)); break; case 2: if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) - return NULL; - rtn2 = mvwinnstr(self->win,y,x,rtn,1023); + goto error; + rtn = mvwinnstr(self->win, y, x, buf, max_buf_size - 1); break; case 3: if (!PyArg_ParseTuple(args, "iii;y,x,n", &y, &x, &n)) - return NULL; + goto error; if (n < 0) { PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; + goto error; } - rtn2 = mvwinnstr(self->win, y, x, rtn, Py_MIN(n,1023)); + rtn = mvwinnstr(self->win, y, x, buf, Py_MIN(n, max_buf_size - 1)); break; default: PyErr_SetString(PyExc_TypeError, "instr requires 0 or 3 arguments"); + goto error; + } + + if (rtn == ERR) { + Py_DECREF(result); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + + if (_PyBytes_Resize(&result, strlen(buf)) < 0) { return NULL; } - if (rtn2 == ERR) - rtn[0] = 0; - return PyBytes_FromString(rtn); + + return result; + +error: + Py_DECREF(result); + return NULL; } /*[clinic input] From e4fbfb12889013fd52565cd2598a366754cb677b Mon Sep 17 00:00:00 2001 From: Chris Eibl <138194463+chris-eibl@users.noreply.github.com> Date: Tue, 20 May 2025 23:21:25 +0200 Subject: [PATCH 251/989] GH-130727: Avoid race condition in _wmimodule by copying shared data (GH-134313) --- ...-05-20-21-43-20.gh-issue-130727.-69t4D.rst | 2 ++ PC/_wmimodule.cpp | 22 +++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2025-05-20-21-43-20.gh-issue-130727.-69t4D.rst diff --git a/Misc/NEWS.d/next/Windows/2025-05-20-21-43-20.gh-issue-130727.-69t4D.rst b/Misc/NEWS.d/next/Windows/2025-05-20-21-43-20.gh-issue-130727.-69t4D.rst new file mode 100644 index 00000000000000..dc10b3e62c8d4a --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2025-05-20-21-43-20.gh-issue-130727.-69t4D.rst @@ -0,0 +1,2 @@ +Fix a race in internal calls into WMI that can result in an "invalid handle" +exception under high load. Patch by Chris Eibl. diff --git a/PC/_wmimodule.cpp b/PC/_wmimodule.cpp index b6efb3e4a207b4..30d61c86587fbe 100644 --- a/PC/_wmimodule.cpp +++ b/PC/_wmimodule.cpp @@ -57,11 +57,11 @@ _query_thread(LPVOID param) IEnumWbemClassObject* enumerator = NULL; HRESULT hr = S_OK; BSTR bstrQuery = NULL; - struct _query_data *data = (struct _query_data*)param; + _query_data data = *(struct _query_data*)param; // gh-125315: Copy the query string first, so that if the main thread gives // up on waiting we aren't left with a dangling pointer (and a likely crash) - bstrQuery = SysAllocString(data->query); + bstrQuery = SysAllocString(data.query); if (!bstrQuery) { hr = HRESULT_FROM_WIN32(ERROR_NOT_ENOUGH_MEMORY); } @@ -71,7 +71,7 @@ _query_thread(LPVOID param) } if (FAILED(hr)) { - CloseHandle(data->writePipe); + CloseHandle(data.writePipe); if (bstrQuery) { SysFreeString(bstrQuery); } @@ -96,7 +96,7 @@ _query_thread(LPVOID param) IID_IWbemLocator, (LPVOID *)&locator ); } - if (SUCCEEDED(hr) && !SetEvent(data->initEvent)) { + if (SUCCEEDED(hr) && !SetEvent(data.initEvent)) { hr = HRESULT_FROM_WIN32(GetLastError()); } if (SUCCEEDED(hr)) { @@ -105,7 +105,7 @@ _query_thread(LPVOID param) NULL, NULL, 0, NULL, 0, 0, &services ); } - if (SUCCEEDED(hr) && !SetEvent(data->connectEvent)) { + if (SUCCEEDED(hr) && !SetEvent(data.connectEvent)) { hr = HRESULT_FROM_WIN32(GetLastError()); } if (SUCCEEDED(hr)) { @@ -143,7 +143,7 @@ _query_thread(LPVOID param) if (FAILED(hr) || got != 1 || !value) { continue; } - if (!startOfEnum && !WriteFile(data->writePipe, (LPVOID)L"\0", 2, &written, NULL)) { + if (!startOfEnum && !WriteFile(data.writePipe, (LPVOID)L"\0", 2, &written, NULL)) { hr = HRESULT_FROM_WIN32(GetLastError()); break; } @@ -171,10 +171,10 @@ _query_thread(LPVOID param) DWORD cbStr1, cbStr2; cbStr1 = (DWORD)(wcslen(propName) * sizeof(propName[0])); cbStr2 = (DWORD)(wcslen(propStr) * sizeof(propStr[0])); - if (!WriteFile(data->writePipe, propName, cbStr1, &written, NULL) || - !WriteFile(data->writePipe, (LPVOID)L"=", 2, &written, NULL) || - !WriteFile(data->writePipe, propStr, cbStr2, &written, NULL) || - !WriteFile(data->writePipe, (LPVOID)L"\0", 2, &written, NULL) + if (!WriteFile(data.writePipe, propName, cbStr1, &written, NULL) || + !WriteFile(data.writePipe, (LPVOID)L"=", 2, &written, NULL) || + !WriteFile(data.writePipe, propStr, cbStr2, &written, NULL) || + !WriteFile(data.writePipe, (LPVOID)L"\0", 2, &written, NULL) ) { hr = HRESULT_FROM_WIN32(GetLastError()); } @@ -200,7 +200,7 @@ _query_thread(LPVOID param) locator->Release(); } CoUninitialize(); - CloseHandle(data->writePipe); + CloseHandle(data.writePipe); return (DWORD)hr; } From e1c0c451a2ff815fc817e71ec15c37bff9cb84d0 Mon Sep 17 00:00:00 2001 From: Nadeshiko Manju Date: Wed, 21 May 2025 06:02:50 +0800 Subject: [PATCH 252/989] GH-131798: Narrow the return type of _GET_LEN to int (GH-133345) --- Lib/test/test_capi/test_opt.py | 44 +++++++++++++++++++ ...-05-03-22-31-53.gh-issue-131798.fQ0ato.rst | 2 + Python/optimizer_bytecodes.c | 19 ++++++++ Python/optimizer_cases.c.h | 23 +++++++++- 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-03-22-31-53.gh-issue-131798.fQ0ato.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f7a4200e1eec0e..50c4f19a1ab55f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1925,6 +1925,50 @@ def testfunc(n): self.assertNotIn("_GUARD_NOS_INT", uops) self.assertNotIn("_GUARD_TOS_INT", uops) + def test_get_len_with_const_tuple(self): + def testfunc(n): + x = 0.0 + for _ in range(n): + match (1, 2, 3, 4): + case [_, _, _, _]: + x += 1.0 + return x + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(int(res), TIER2_THRESHOLD) + uops = get_opnames(ex) + self.assertNotIn("_GUARD_NOS_INT", uops) + self.assertNotIn("_GET_LEN", uops) + self.assertIn("_LOAD_CONST_INLINE_BORROW", uops) + + def test_get_len_with_non_const_tuple(self): + def testfunc(n): + x = 0.0 + for _ in range(n): + match object(), object(): + case [_, _]: + x += 1.0 + return x + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(int(res), TIER2_THRESHOLD) + uops = get_opnames(ex) + self.assertNotIn("_GUARD_NOS_INT", uops) + self.assertNotIn("_GET_LEN", uops) + self.assertIn("_LOAD_CONST_INLINE_BORROW", uops) + + def test_get_len_with_non_tuple(self): + def testfunc(n): + x = 0.0 + for _ in range(n): + match [1, 2, 3, 4]: + case [_, _, _, _]: + x += 1.0 + return x + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(int(res), TIER2_THRESHOLD) + uops = get_opnames(ex) + self.assertNotIn("_GUARD_NOS_INT", uops) + self.assertIn("_GET_LEN", uops) + def test_binary_op_subscr_tuple_int(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-03-22-31-53.gh-issue-131798.fQ0ato.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-03-22-31-53.gh-issue-131798.fQ0ato.rst new file mode 100644 index 00000000000000..f322d43b30a9f3 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-03-22-31-53.gh-issue-131798.fQ0ato.rst @@ -0,0 +1,2 @@ +Allow the JIT to remove int guards after ``_GET_LEN`` by setting the return +type to int. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 3799f25880d34b..639b4b7af1632c 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1116,6 +1116,25 @@ dummy_func(void) { res = sym_new_type(ctx, &PyLong_Type); } + op(_GET_LEN, (obj -- obj, len)) { + int tuple_length = sym_tuple_length(obj); + if (tuple_length == -1) { + len = sym_new_type(ctx, &PyLong_Type); + } + else { + assert(tuple_length >= 0); + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)temp); + } + len = sym_new_const(ctx, temp); + Py_DECREF(temp); + } + } + op(_GUARD_CALLABLE_LEN, (callable, unused, unused -- callable, unused, unused)) { PyObject *len = _PyInterpreterState_GET()->callable_cache.len; if (sym_get_const(ctx, callable) == len) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index a2c68457d8ef4e..0a539b2829e3ba 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1463,8 +1463,29 @@ } case _GET_LEN: { + JitOptSymbol *obj; JitOptSymbol *len; - len = sym_new_not_null(ctx); + obj = stack_pointer[-1]; + int tuple_length = sym_tuple_length(obj); + if (tuple_length == -1) { + len = sym_new_type(ctx, &PyLong_Type); + } + else { + assert(tuple_length >= 0); + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)temp); + } + len = sym_new_const(ctx, temp); + stack_pointer[0] = len; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + Py_DECREF(temp); + stack_pointer += -1; + } stack_pointer[0] = len; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); From 2f0570caf490d4edbfbfd75c529cdee24f6edb8a Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 20 May 2025 18:09:51 -0400 Subject: [PATCH 253/989] GH-131798: Narrow types more aggressively in the JIT (GH-134373) --- Include/internal/pycore_uop_ids.h | 75 ++++++++++--------- Include/internal/pycore_uop_metadata.h | 4 + Lib/test/test_capi/test_opt.py | 19 +++++ ...-05-20-13-58-18.gh-issue-131798.hG8xBw.rst | 1 + Python/bytecodes.c | 5 ++ Python/executor_cases.c.h | 18 +++++ Python/optimizer_analysis.c | 74 ++++++++---------- Python/optimizer_bytecodes.c | 12 +++ Python/optimizer_cases.c.h | 18 +++++ Python/optimizer_symbols.c | 40 +++++++--- 10 files changed, 179 insertions(+), 87 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-13-58-18.gh-issue-131798.hG8xBw.rst diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 6014e1bf3c0c38..4c270211d4c323 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -270,67 +270,68 @@ extern "C" { #define _POP_TOP POP_TOP #define _POP_TOP_LOAD_CONST_INLINE 486 #define _POP_TOP_LOAD_CONST_INLINE_BORROW 487 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 488 +#define _POP_TWO 488 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 489 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 489 +#define _PUSH_FRAME 490 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 490 -#define _PY_FRAME_GENERAL 491 -#define _PY_FRAME_KW 492 -#define _QUICKEN_RESUME 493 -#define _REPLACE_WITH_TRUE 494 +#define _PUSH_NULL_CONDITIONAL 491 +#define _PY_FRAME_GENERAL 492 +#define _PY_FRAME_KW 493 +#define _QUICKEN_RESUME 494 +#define _REPLACE_WITH_TRUE 495 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 495 -#define _SEND 496 -#define _SEND_GEN_FRAME 497 +#define _SAVE_RETURN_OFFSET 496 +#define _SEND 497 +#define _SEND_GEN_FRAME 498 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 498 -#define _STORE_ATTR 499 -#define _STORE_ATTR_INSTANCE_VALUE 500 -#define _STORE_ATTR_SLOT 501 -#define _STORE_ATTR_WITH_HINT 502 +#define _START_EXECUTOR 499 +#define _STORE_ATTR 500 +#define _STORE_ATTR_INSTANCE_VALUE 501 +#define _STORE_ATTR_SLOT 502 +#define _STORE_ATTR_WITH_HINT 503 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 503 -#define _STORE_FAST_0 504 -#define _STORE_FAST_1 505 -#define _STORE_FAST_2 506 -#define _STORE_FAST_3 507 -#define _STORE_FAST_4 508 -#define _STORE_FAST_5 509 -#define _STORE_FAST_6 510 -#define _STORE_FAST_7 511 +#define _STORE_FAST 504 +#define _STORE_FAST_0 505 +#define _STORE_FAST_1 506 +#define _STORE_FAST_2 507 +#define _STORE_FAST_3 508 +#define _STORE_FAST_4 509 +#define _STORE_FAST_5 510 +#define _STORE_FAST_6 511 +#define _STORE_FAST_7 512 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 512 -#define _STORE_SUBSCR 513 -#define _STORE_SUBSCR_DICT 514 -#define _STORE_SUBSCR_LIST_INT 515 +#define _STORE_SLICE 513 +#define _STORE_SUBSCR 514 +#define _STORE_SUBSCR_DICT 515 +#define _STORE_SUBSCR_LIST_INT 516 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 516 -#define _TO_BOOL 517 +#define _TIER2_RESUME_CHECK 517 +#define _TO_BOOL 518 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 518 +#define _TO_BOOL_LIST 519 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 519 +#define _TO_BOOL_STR 520 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 520 -#define _UNPACK_SEQUENCE_LIST 521 -#define _UNPACK_SEQUENCE_TUPLE 522 -#define _UNPACK_SEQUENCE_TWO_TUPLE 523 +#define _UNPACK_SEQUENCE 521 +#define _UNPACK_SEQUENCE_LIST 522 +#define _UNPACK_SEQUENCE_TUPLE 523 +#define _UNPACK_SEQUENCE_TWO_TUPLE 524 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 523 +#define MAX_UOP_ID 524 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 002591318b3276..5f8c4f3210abff 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -63,6 +63,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, [_END_SEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -557,6 +558,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_TOP] = "_POP_TOP", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TWO] = "_POP_TWO", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", [_PUSH_FRAME] = "_PUSH_FRAME", @@ -708,6 +710,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_TOP: return 1; + case _POP_TWO: + return 2; case _PUSH_NULL: return 0; case _END_FOR: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 50c4f19a1ab55f..98b434313e4d2d 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2137,6 +2137,25 @@ def testfunc(n): self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertIn("_GUARD_IS_TRUE_POP", uops) + def test_set_type_version_sets_type(self): + class C: + A = 1 + + def testfunc(n): + x = 0 + c = C() + for _ in range(n): + x += c.A # Guarded. + x += type(c).A # Unguarded! + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 2 * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_GUARD_TYPE_VERSION", uops) + self.assertNotIn("_CHECK_ATTR_CLASS", uops) + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-13-58-18.gh-issue-131798.hG8xBw.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-13-58-18.gh-issue-131798.hG8xBw.rst new file mode 100644 index 00000000000000..c490ecf15605b2 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-13-58-18.gh-issue-131798.hG8xBw.rst @@ -0,0 +1 @@ +Improve the JIT's ability to narrow unknown classes to constant values. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 65f411fa10506e..a2367026cde8b8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -344,6 +344,11 @@ dummy_func( PyStackRef_CLOSE(value); } + tier2 op(_POP_TWO, (nos, tos --)) { + PyStackRef_CLOSE(tos); + PyStackRef_CLOSE(nos); + } + pure inst(PUSH_NULL, (-- res)) { res = PyStackRef_NULL; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cc521bd7923991..1c8239f38eec91 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -539,6 +539,24 @@ break; } + case _POP_TWO: { + _PyStackRef tos; + _PyStackRef nos; + tos = stack_pointer[-1]; + nos = stack_pointer[-2]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(tos); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(nos); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + case _PUSH_NULL: { _PyStackRef res; res = PyStackRef_NULL; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b0bd1e9518c6e..53ab289b75cc9a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -523,6 +523,25 @@ optimize_uops( } +const uint16_t op_without_push[MAX_UOP_ID + 1] = { + [_COPY] = _NOP, + [_LOAD_CONST_INLINE] = _NOP, + [_LOAD_CONST_INLINE_BORROW] = _NOP, + [_LOAD_FAST] = _NOP, + [_LOAD_FAST_BORROW] = _NOP, + [_LOAD_SMALL_INT] = _NOP, + [_POP_TOP_LOAD_CONST_INLINE] = _POP_TOP, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _POP_TOP, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO, +}; + +const uint16_t op_without_pop[MAX_UOP_ID + 1] = { + [_POP_TOP] = _NOP, + [_POP_TOP_LOAD_CONST_INLINE] = _LOAD_CONST_INLINE, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, +}; + static int remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) @@ -551,50 +570,23 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = _NOP; } break; - case _POP_TOP: - case _POP_TOP_LOAD_CONST_INLINE: - case _POP_TOP_LOAD_CONST_INLINE_BORROW: - case _POP_TWO_LOAD_CONST_INLINE_BORROW: - optimize_pop_top_again: + default: { - _PyUOpInstruction *last = &buffer[pc-1]; - while (last->opcode == _NOP) { - last--; - } - switch (last->opcode) { - case _POP_TWO_LOAD_CONST_INLINE_BORROW: - last->opcode = _POP_TOP; + // Cancel out pushes and pops, repeatedly. So: + // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP + // ...becomes: + // _NOP + _POP_TOP + _NOP + while (op_without_pop[opcode]) { + _PyUOpInstruction *last = &buffer[pc - 1]; + while (last->opcode == _NOP) { + last--; + } + if (!op_without_push[last->opcode]) { break; - case _POP_TOP_LOAD_CONST_INLINE: - case _POP_TOP_LOAD_CONST_INLINE_BORROW: - last->opcode = _NOP; - goto optimize_pop_top_again; - case _COPY: - case _LOAD_CONST_INLINE: - case _LOAD_CONST_INLINE_BORROW: - case _LOAD_FAST: - case _LOAD_FAST_BORROW: - case _LOAD_SMALL_INT: - last->opcode = _NOP; - if (opcode == _POP_TOP) { - opcode = buffer[pc].opcode = _NOP; - } - else if (opcode == _POP_TOP_LOAD_CONST_INLINE) { - opcode = buffer[pc].opcode = _LOAD_CONST_INLINE; - } - else if (opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) { - opcode = buffer[pc].opcode = _LOAD_CONST_INLINE_BORROW; - } - else { - assert(opcode == _POP_TWO_LOAD_CONST_INLINE_BORROW); - opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; - goto optimize_pop_top_again; - } + } + last->opcode = op_without_push[last->opcode]; + opcode = buffer[pc].opcode = op_without_pop[opcode]; } - _Py_FALLTHROUGH; - } - default: - { /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ bool needs_ip = opcode == _PUSH_FRAME; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 639b4b7af1632c..f12cd7b968cffc 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -118,6 +118,18 @@ dummy_func(void) { sym_set_type(left, &PyLong_Type); } + op(_CHECK_ATTR_CLASS, (type_version/2, owner -- owner)) { + PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version); + if (type) { + if (type == sym_get_const(ctx, owner)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + sym_set_const(owner, type); + } + } + } + op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { assert(type_version); if (sym_matches_type_version(owner, type_version)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 0a539b2829e3ba..602f5e2cfaf77e 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -102,6 +102,12 @@ break; } + case _POP_TWO: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _PUSH_NULL: { JitOptSymbol *res; res = sym_new_null(ctx); @@ -1259,6 +1265,18 @@ } case _CHECK_ATTR_CLASS: { + JitOptSymbol *owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand0; + PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version); + if (type) { + if (type == sym_get_const(ctx, owner)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + sym_set_const(owner, type); + } + } break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index e8a4f87031b76a..2e619fa6f9977f 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -200,6 +200,10 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ) bool _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version) { + PyTypeObject *type = _PyType_LookupByVersion(version); + if (type) { + _Py_uop_sym_set_type(ctx, sym, type); + } JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -215,18 +219,24 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int return true; } case JIT_SYM_KNOWN_VALUE_TAG: - Py_CLEAR(sym->value.value); - sym_set_bottom(ctx, sym); - return false; + if (Py_TYPE(sym->value.value)->tp_version_tag != version) { + Py_CLEAR(sym->value.value); + sym_set_bottom(ctx, sym); + return false; + }; + return true; case JIT_SYM_TUPLE_TAG: - sym_set_bottom(ctx, sym); - return false; + if (PyTuple_Type.tp_version_tag != version) { + sym_set_bottom(ctx, sym); + return false; + }; + return true; case JIT_SYM_TYPE_VERSION_TAG: - if (sym->version.version == version) { - return true; + if (sym->version.version != version) { + sym_set_bottom(ctx, sym); + return false; } - sym_set_bottom(ctx, sym); - return false; + return true; case JIT_SYM_BOTTOM_TAG: return false; case JIT_SYM_NON_NULL_TAG: @@ -266,6 +276,18 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val } return; case JIT_SYM_TUPLE_TAG: + if (PyTuple_CheckExact(const_val)) { + Py_ssize_t len = _Py_uop_sym_tuple_length(sym); + if (len == PyTuple_GET_SIZE(const_val)) { + for (Py_ssize_t i = 0; i < len; i++) { + JitOptSymbol *sym_item = _Py_uop_sym_tuple_getitem(ctx, sym, i); + PyObject *item = PyTuple_GET_ITEM(const_val, i); + _Py_uop_sym_set_const(ctx, sym_item, item); + } + make_const(sym, const_val); + return; + } + } sym_set_bottom(ctx, sym); return; case JIT_SYM_TYPE_VERSION_TAG: From 6856a04d68bc1b779b5756a8a7c0ef08d684a630 Mon Sep 17 00:00:00 2001 From: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com> Date: Tue, 20 May 2025 19:50:49 -0400 Subject: [PATCH 254/989] Add documentation for remote debugging with pdb (#134260) * Mention remote debugging via -p PID in usage text Adds a brief note to the pdb help summary about attaching to a running process using the -p option, making the remote debugging feature more visible. * Mention remote debugging in pdb.rst --- Doc/library/pdb.rst | 20 +++++++++++++++++++- Lib/pdb.py | 3 ++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst index a0304edddf6478..f4b51664545be5 100644 --- a/Doc/library/pdb.rst +++ b/Doc/library/pdb.rst @@ -80,7 +80,7 @@ The debugger's prompt is ``(Pdb)``, which is the indicator that you are in debug You can also invoke :mod:`pdb` from the command line to debug other scripts. For example:: - python -m pdb [-c command] (-m module | pyfile) [args ...] + python -m pdb [-c command] (-m module | -p pid | pyfile) [args ...] When invoked as a module, pdb will automatically enter post-mortem debugging if the program being debugged exits abnormally. After post-mortem debugging (or @@ -104,6 +104,24 @@ useful than quitting the debugger upon program's exit. .. versionchanged:: 3.7 Added the ``-m`` option. +.. option:: -p, --pid + + Attach to the process with the specified PID. + + .. versionadded:: 3.14 + + +To attach to a running Python process for remote debugging, use the ``-p`` or +``--pid`` option with the target process's PID:: + + python -m pdb -p 1234 + +.. note:: + + Attaching to a process that is blocked in a system call or waiting for I/O + will only work once the next bytecode instruction is executed or when the + process receives a signal. + Typical usage to execute a statement under control of the debugger is:: >>> import pdb diff --git a/Lib/pdb.py b/Lib/pdb.py index 544c701bbd2c72..78ee35f61bbcea 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -3489,7 +3489,8 @@ def help(): _usage = """\ Debug the Python program given by pyfile. Alternatively, an executable module or package to debug can be specified using -the -m switch. +the -m switch. You can also attach to a running Python process +using the -p option with its PID. Initial commands are read from .pdbrc files in your home directory and in the current directory, if they exist. Commands supplied with From d327159eb4dd286973d10af93999de90a860880a Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 20 May 2025 19:54:09 -0400 Subject: [PATCH 255/989] gh-91048: Fix error path result in _remote_debugging_module (#134347) --- Modules/_remote_debugging_module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 42db93bb5ead0f..8c0f40f835c36e 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -1556,7 +1556,7 @@ get_stack_trace(PyObject* self, PyObject* args) &address_of_current_frame) < 0) { - Py_DECREF(result); + Py_CLEAR(result); goto result_err; } @@ -1565,7 +1565,7 @@ get_stack_trace(PyObject* self, PyObject* args) } if (PyList_Append(result, frame_info) == -1) { - Py_DECREF(result); + Py_CLEAR(result); goto result_err; } From 1298511b41ec0f9be925c12f3830e94fe8f7e7dc Mon Sep 17 00:00:00 2001 From: Tim Hatch Date: Tue, 20 May 2025 18:32:41 -0700 Subject: [PATCH 256/989] gh-72680: Fix false positives when using zipfile.is_zipfile() (GH-134250) bpo-28494: Improve zipfile.is_zipfile reliability The zipfile.is_zipfile function would only search for the EndOfZipfile section header. This failed to correctly identify non-zipfiles that contained this header. Now the zipfile.is_zipfile function verifies the first central directory entry. Changes: * Extended zipfile.is_zipfile to verify zipfile catalog * Added tests to validate failure of binary non-zipfiles * Reuse 'concat' handling for is_zipfile Co-authored-by: John Jolly --- Lib/test/test_zipfile/test_core.py | 19 ++++++++ Lib/zipfile/__init__.py | 48 +++++++++++++------ .../2017-12-30-18-21-00.bpo-28494.Dt_Wks.rst | 1 + 3 files changed, 54 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-12-30-18-21-00.bpo-28494.Dt_Wks.rst diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 43056978848c03..e93603998f979e 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -1991,6 +1991,25 @@ def test_is_zip_erroneous_file(self): self.assertFalse(zipfile.is_zipfile(fp)) fp.seek(0, 0) self.assertFalse(zipfile.is_zipfile(fp)) + # - passing non-zipfile with ZIP header elements + # data created using pyPNG like so: + # d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)] + # w = png.Writer(1,2,alpha=True,compression=0) + # f = open('onepix.png', 'wb') + # w.write(f, d) + # w.close() + data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00" + b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I" + b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07" + b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82") + # - passing a filename + with open(TESTFN, "wb") as fp: + fp.write(data) + self.assertFalse(zipfile.is_zipfile(TESTFN)) + # - passing a file-like object + fp = io.BytesIO() + fp.write(data) + self.assertFalse(zipfile.is_zipfile(fp)) def test_damaged_zipfile(self): """Check that zipfiles with missing bytes at the end raise BadZipFile.""" diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 894b4d37233923..18caeb3e04a2b5 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -234,8 +234,19 @@ def strip(cls, data, xids): def _check_zipfile(fp): try: - if _EndRecData(fp): - return True # file has correct magic number + endrec = _EndRecData(fp) + if endrec: + if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0: + return True # Empty zipfiles are still zipfiles + elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]: + # Central directory is on the same disk + fp.seek(sum(_handle_prepended_data(endrec))) + if endrec[_ECD_SIZE] >= sizeCentralDir: + data = fp.read(sizeCentralDir) # CD is where we expect it to be + if len(data) == sizeCentralDir: + centdir = struct.unpack(structCentralDir, data) # CD is the right size + if centdir[_CD_SIGNATURE] == stringCentralDir: + return True # First central directory entry has correct magic number except OSError: pass return False @@ -258,6 +269,22 @@ def is_zipfile(filename): pass return result +def _handle_prepended_data(endrec, debug=0): + size_cd = endrec[_ECD_SIZE] # bytes in central directory + offset_cd = endrec[_ECD_OFFSET] # offset of central directory + + # "concat" is zero, unless zip was concatenated to another file + concat = endrec[_ECD_LOCATION] - size_cd - offset_cd + if endrec[_ECD_SIGNATURE] == stringEndArchive64: + # If Zip64 extension structures are present, account for them + concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + + if debug > 2: + inferred = concat + offset_cd + print("given, inferred, offset", offset_cd, inferred, concat) + + return offset_cd, concat + def _EndRecData64(fpin, offset, endrec): """ Read the ZIP64 end-of-archive records and use that to update endrec @@ -1501,28 +1528,21 @@ def _RealGetContents(self): raise BadZipFile("File is not a zip file") if self.debug > 1: print(endrec) - size_cd = endrec[_ECD_SIZE] # bytes in central directory - offset_cd = endrec[_ECD_OFFSET] # offset of central directory self._comment = endrec[_ECD_COMMENT] # archive comment - # "concat" is zero, unless zip was concatenated to another file - concat = endrec[_ECD_LOCATION] - size_cd - offset_cd - if endrec[_ECD_SIGNATURE] == stringEndArchive64: - # If Zip64 extension structures are present, account for them - concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + offset_cd, concat = _handle_prepended_data(endrec, self.debug) + + # self.start_dir: Position of start of central directory + self.start_dir = offset_cd + concat # store the offset to the beginning of data for the # .data_offset property self._data_offset = concat - if self.debug > 2: - inferred = concat + offset_cd - print("given, inferred, offset", offset_cd, inferred, concat) - # self.start_dir: Position of start of central directory - self.start_dir = offset_cd + concat if self.start_dir < 0: raise BadZipFile("Bad offset for central directory") fp.seek(self.start_dir, 0) + size_cd = endrec[_ECD_SIZE] data = fp.read(size_cd) fp = io.BytesIO(data) total = 0 diff --git a/Misc/NEWS.d/next/Library/2017-12-30-18-21-00.bpo-28494.Dt_Wks.rst b/Misc/NEWS.d/next/Library/2017-12-30-18-21-00.bpo-28494.Dt_Wks.rst new file mode 100644 index 00000000000000..0c518983770d5d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-12-30-18-21-00.bpo-28494.Dt_Wks.rst @@ -0,0 +1 @@ +Improve Zip file validation false positive rate in :func:`zipfile.is_zipfile`. From c740fe3bd092911d9e474bcc0eed2a009482be9f Mon Sep 17 00:00:00 2001 From: Sofia Toro Date: Wed, 21 May 2025 00:49:06 -0400 Subject: [PATCH 257/989] gh-134360 Add processName attribute to `logging.Formatter` docstring (GH-134371) --- Lib/logging/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index f2d1a02629d92b..5c3c44249345f3 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -591,6 +591,7 @@ class Formatter(object): %(threadName)s Thread name (if available) %(taskName)s Task name (if available) %(process)d Process ID (if available) + %(processName)s Process name (if available) %(message)s The result of record.getMessage(), computed just as the record is emitted """ From e6dde10a69ce62c3b02b3b4e07559d06a9451fc2 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Wed, 21 May 2025 14:10:57 +0300 Subject: [PATCH 258/989] gh-132542: Only run test_native_id_after_fork if native_id is supported (GH-134408) --- Lib/test/test_threading.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index a9eec139bec8bf..0e51e7fc8c5a76 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1353,6 +1353,7 @@ def do_flush(*args, **kwargs): assert_python_ok("-c", script) @skip_unless_reliable_fork + @unittest.skipUnless(hasattr(threading, 'get_native_id'), "test needs threading.get_native_id()") def test_native_id_after_fork(self): script = """if True: import threading From dcfc91e4e552e74a43f5fdf049af7a8fe7a784ee Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 21 May 2025 15:56:34 +0300 Subject: [PATCH 259/989] Fix signature of `_curses.assume_default_colors` in the docs (#134409) --- Doc/library/curses.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index 5ec23b61396773..0b13c559295f3c 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -68,7 +68,7 @@ The module :mod:`curses` defines the following exception: The module :mod:`curses` defines the following functions: -.. function:: assume_default_colors(fg, bg) +.. function:: assume_default_colors(fg, bg, /) Allow use of default values for colors on terminals supporting this feature. Use this to support transparency in your application. From b8998fe2d8249565bf30ce6075ed678e1643f2a4 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Wed, 21 May 2025 09:01:25 -0400 Subject: [PATCH 260/989] gh-131185: Use a proper thread-local for cached thread states (gh-132510) Switches over to a _Py_thread_local in place of autoTssKey, and also fixes a few other checks regarding PyGILState_Ensure after finalization. Note that this doesn't fix concurrent use of PyGILState_Ensure with Py_Finalize; I'm pretty sure zapthreads doesn't work at all, and that needs to be fixed seperately. --- Include/internal/pycore_runtime_init.h | 5 +- Include/internal/pycore_runtime_structs.h | 3 - Lib/test/test_embed.py | 4 + ...-04-14-07-41-28.gh-issue-131185.ZCjMHD.rst | 2 + Programs/_testembed.c | 29 ++- Python/pystate.c | 199 ++++-------------- Tools/c-analyzer/cpython/ignored.tsv | 1 + 7 files changed, 79 insertions(+), 164 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-04-14-07-41-28.gh-issue-131185.ZCjMHD.rst diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 4200d91a2fcd9d..b182f7825a2326 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -61,9 +61,6 @@ extern PyTypeObject _PyExc_MemoryError; }, \ }, \ }, \ - /* A TSS key must be initialized with Py_tss_NEEDS_INIT \ - in accordance with the specification. */ \ - .autoTSSkey = Py_tss_NEEDS_INIT, \ .parser = _parser_runtime_state_INIT, \ .ceval = { \ .pending_mainthread = { \ @@ -236,4 +233,4 @@ extern PyTypeObject _PyExc_MemoryError; #ifdef __cplusplus } #endif -#endif /* !Py_INTERNAL_RUNTIME_INIT_H */ +#endif /* !Py_INTERNAL_RUNTIME_INIT_H */ \ No newline at end of file diff --git a/Include/internal/pycore_runtime_structs.h b/Include/internal/pycore_runtime_structs.h index 6bf3aae7175a97..12164c7fdd9425 100644 --- a/Include/internal/pycore_runtime_structs.h +++ b/Include/internal/pycore_runtime_structs.h @@ -223,9 +223,6 @@ struct pyruntimestate { struct _pythread_runtime_state threads; struct _signals_runtime_state signals; - /* Used for the thread state bound to the current thread. */ - Py_tss_t autoTSSkey; - /* Used instead of PyThreadState.trash when there is not current tstate. */ Py_tss_t trashTSSkey; diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 46222e521aead8..89f4aebe28f4a1 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1915,6 +1915,10 @@ def test_get_incomplete_frame(self): self.run_embedded_interpreter("test_get_incomplete_frame") + def test_gilstate_after_finalization(self): + self.run_embedded_interpreter("test_gilstate_after_finalization") + + class MiscTests(EmbeddingTestsMixin, unittest.TestCase): def test_unicode_id_init(self): # bpo-42882: Test that _PyUnicode_FromId() works diff --git a/Misc/NEWS.d/next/C_API/2025-04-14-07-41-28.gh-issue-131185.ZCjMHD.rst b/Misc/NEWS.d/next/C_API/2025-04-14-07-41-28.gh-issue-131185.ZCjMHD.rst new file mode 100644 index 00000000000000..aa0e8bca93b46f --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-04-14-07-41-28.gh-issue-131185.ZCjMHD.rst @@ -0,0 +1,2 @@ +:c:func:`PyGILState_Ensure` no longer crashes when called after interpreter +finalization. diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 8e0e330f6605c9..577da65c7cdafa 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -8,6 +8,7 @@ #include #include "pycore_initconfig.h" // _PyConfig_InitCompatConfig() #include "pycore_runtime.h" // _PyRuntime +#include "pycore_lock.h" // PyEvent #include "pycore_pythread.h" // PyThread_start_joinable_thread() #include "pycore_import.h" // _PyImport_FrozenBootstrap #include @@ -2312,6 +2313,32 @@ test_get_incomplete_frame(void) return result; } +static void +do_gilstate_ensure(void *event_ptr) +{ + PyEvent *event = (PyEvent *)event_ptr; + // Signal to the calling thread that we've started + _PyEvent_Notify(event); + PyGILState_Ensure(); // This should hang + assert(NULL); +} + +static int +test_gilstate_after_finalization(void) +{ + _testembed_initialize(); + Py_Finalize(); + PyThread_handle_t handle; + PyThread_ident_t ident; + PyEvent event = {0}; + if (PyThread_start_joinable_thread(&do_gilstate_ensure, &event, &ident, &handle) < 0) { + return -1; + } + PyEvent_Wait(&event); + // We're now pretty confident that the thread went for + // PyGILState_Ensure(), but that means it got hung. + return PyThread_detach_thread(handle); +} /* ********************************************************* * List of test cases and the function that implements it. @@ -2402,7 +2429,7 @@ static struct TestCase TestCases[] = { {"test_frozenmain", test_frozenmain}, #endif {"test_get_incomplete_frame", test_get_incomplete_frame}, - + {"test_gilstate_after_finalization", test_gilstate_after_finalization}, {NULL, NULL} }; diff --git a/Python/pystate.c b/Python/pystate.c index 4757a8c3d1476c..4144e6edefc073 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -69,7 +69,12 @@ to avoid the expense of doing their own locking). #ifdef HAVE_THREAD_LOCAL +/* The attached thread state for the current thread. */ _Py_thread_local PyThreadState *_Py_tss_tstate = NULL; + +/* The "bound" thread state used by PyGILState_Ensure(), + also known as a "gilstate." */ +_Py_thread_local PyThreadState *_Py_tss_gilstate = NULL; #endif static inline PyThreadState * @@ -118,79 +123,9 @@ _PyThreadState_GetCurrent(void) } -//------------------------------------------------ -// the thread state bound to the current OS thread -//------------------------------------------------ - -static inline int -tstate_tss_initialized(Py_tss_t *key) -{ - return PyThread_tss_is_created(key); -} - -static inline int -tstate_tss_init(Py_tss_t *key) -{ - assert(!tstate_tss_initialized(key)); - return PyThread_tss_create(key); -} - -static inline void -tstate_tss_fini(Py_tss_t *key) -{ - assert(tstate_tss_initialized(key)); - PyThread_tss_delete(key); -} - -static inline PyThreadState * -tstate_tss_get(Py_tss_t *key) -{ - assert(tstate_tss_initialized(key)); - return (PyThreadState *)PyThread_tss_get(key); -} - -static inline int -tstate_tss_set(Py_tss_t *key, PyThreadState *tstate) -{ - assert(tstate != NULL); - assert(tstate_tss_initialized(key)); - return PyThread_tss_set(key, (void *)tstate); -} - -static inline int -tstate_tss_clear(Py_tss_t *key) -{ - assert(tstate_tss_initialized(key)); - return PyThread_tss_set(key, (void *)NULL); -} - -#ifdef HAVE_FORK -/* Reset the TSS key - called by PyOS_AfterFork_Child(). - * This should not be necessary, but some - buggy - pthread implementations - * don't reset TSS upon fork(), see issue #10517. - */ -static PyStatus -tstate_tss_reinit(Py_tss_t *key) -{ - if (!tstate_tss_initialized(key)) { - return _PyStatus_OK(); - } - PyThreadState *tstate = tstate_tss_get(key); - - tstate_tss_fini(key); - if (tstate_tss_init(key) != 0) { - return _PyStatus_NO_MEMORY(); - } - - /* If the thread had an associated auto thread state, reassociate it with - * the new key. */ - if (tstate && tstate_tss_set(key, tstate) != 0) { - return _PyStatus_ERR("failed to re-set autoTSSkey"); - } - return _PyStatus_OK(); -} -#endif - +//--------------------------------------------- +// The thread state used by PyGILState_Ensure() +//--------------------------------------------- /* The stored thread state is set by bind_tstate() (AKA PyThreadState_Bind(). @@ -198,36 +133,23 @@ tstate_tss_reinit(Py_tss_t *key) The GIL does no need to be held for these. */ -#define gilstate_tss_initialized(runtime) \ - tstate_tss_initialized(&(runtime)->autoTSSkey) -#define gilstate_tss_init(runtime) \ - tstate_tss_init(&(runtime)->autoTSSkey) -#define gilstate_tss_fini(runtime) \ - tstate_tss_fini(&(runtime)->autoTSSkey) -#define gilstate_tss_get(runtime) \ - tstate_tss_get(&(runtime)->autoTSSkey) -#define _gilstate_tss_set(runtime, tstate) \ - tstate_tss_set(&(runtime)->autoTSSkey, tstate) -#define _gilstate_tss_clear(runtime) \ - tstate_tss_clear(&(runtime)->autoTSSkey) -#define gilstate_tss_reinit(runtime) \ - tstate_tss_reinit(&(runtime)->autoTSSkey) +static inline PyThreadState * +gilstate_get(void) +{ + return _Py_tss_gilstate; +} static inline void -gilstate_tss_set(_PyRuntimeState *runtime, PyThreadState *tstate) +gilstate_set(PyThreadState *tstate) { - assert(tstate != NULL && tstate->interp->runtime == runtime); - if (_gilstate_tss_set(runtime, tstate) != 0) { - Py_FatalError("failed to set current tstate (TSS)"); - } + assert(tstate != NULL); + _Py_tss_gilstate = tstate; } static inline void -gilstate_tss_clear(_PyRuntimeState *runtime) +gilstate_clear(void) { - if (_gilstate_tss_clear(runtime) != 0) { - Py_FatalError("failed to clear current tstate (TSS)"); - } + _Py_tss_gilstate = NULL; } @@ -253,7 +175,7 @@ bind_tstate(PyThreadState *tstate) assert(tstate_is_alive(tstate) && !tstate->_status.bound); assert(!tstate->_status.unbound); // just in case assert(!tstate->_status.bound_gilstate); - assert(tstate != gilstate_tss_get(tstate->interp->runtime)); + assert(tstate != gilstate_get()); assert(!tstate->_status.active); assert(tstate->thread_id == 0); assert(tstate->native_thread_id == 0); @@ -328,14 +250,13 @@ bind_gilstate_tstate(PyThreadState *tstate) // XXX assert(!tstate->_status.active); assert(!tstate->_status.bound_gilstate); - _PyRuntimeState *runtime = tstate->interp->runtime; - PyThreadState *tcur = gilstate_tss_get(runtime); + PyThreadState *tcur = gilstate_get(); assert(tstate != tcur); if (tcur != NULL) { tcur->_status.bound_gilstate = 0; } - gilstate_tss_set(runtime, tstate); + gilstate_set(tstate); tstate->_status.bound_gilstate = 1; } @@ -347,9 +268,8 @@ unbind_gilstate_tstate(PyThreadState *tstate) assert(tstate_is_bound(tstate)); // XXX assert(!tstate->_status.active); assert(tstate->_status.bound_gilstate); - assert(tstate == gilstate_tss_get(tstate->interp->runtime)); - - gilstate_tss_clear(tstate->interp->runtime); + assert(tstate == gilstate_get()); + gilstate_clear(); tstate->_status.bound_gilstate = 0; } @@ -373,7 +293,7 @@ holds_gil(PyThreadState *tstate) // (and tstate->interp->runtime->ceval.gil.locked). assert(tstate != NULL); /* Must be the tstate for this thread */ - assert(tstate == gilstate_tss_get(tstate->interp->runtime)); + assert(tstate == gilstate_get()); return tstate == current_fast_get(); } @@ -469,16 +389,6 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) return status; } - if (gilstate_tss_init(runtime) != 0) { - _PyRuntimeState_Fini(runtime); - return _PyStatus_NO_MEMORY(); - } - - if (PyThread_tss_create(&runtime->trashTSSkey) != 0) { - _PyRuntimeState_Fini(runtime); - return _PyStatus_NO_MEMORY(); - } - init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head, unicode_next_index); @@ -492,14 +402,7 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime) /* The count is cleared by _Py_FinalizeRefTotal(). */ assert(runtime->object_state.interpreter_leaks == 0); #endif - - if (gilstate_tss_initialized(runtime)) { - gilstate_tss_fini(runtime); - } - - if (PyThread_tss_is_created(&runtime->trashTSSkey)) { - PyThread_tss_delete(&runtime->trashTSSkey); - } + gilstate_clear(); } #ifdef HAVE_FORK @@ -532,18 +435,6 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) _PyTypes_AfterFork(); - PyStatus status = gilstate_tss_reinit(runtime); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - - if (PyThread_tss_is_created(&runtime->trashTSSkey)) { - PyThread_tss_delete(&runtime->trashTSSkey); - } - if (PyThread_tss_create(&runtime->trashTSSkey) != 0) { - return _PyStatus_NO_MEMORY(); - } - _PyThread_AfterFork(&runtime->threads); return _PyStatus_OK(); @@ -1669,7 +1560,7 @@ _PyThreadState_NewBound(PyInterpreterState *interp, int whence) bind_tstate(tstate); // This makes sure there's a gilstate tstate bound // as soon as possible. - if (gilstate_tss_get(tstate->interp->runtime) == NULL) { + if (gilstate_get() == NULL) { bind_gilstate_tstate(tstate); } } @@ -2095,7 +1986,7 @@ tstate_activate(PyThreadState *tstate) assert(!tstate->_status.active); assert(!tstate->_status.bound_gilstate || - tstate == gilstate_tss_get((tstate->interp->runtime))); + tstate == gilstate_get()); if (!tstate->_status.bound_gilstate) { bind_gilstate_tstate(tstate); } @@ -2563,7 +2454,7 @@ _PyThreadState_Bind(PyThreadState *tstate) bind_tstate(tstate); // This makes sure there's a gilstate tstate bound // as soon as possible. - if (gilstate_tss_get(tstate->interp->runtime) == NULL) { + if (gilstate_get() == NULL) { bind_gilstate_tstate(tstate); } } @@ -2765,7 +2656,7 @@ _PyGILState_Init(PyInterpreterState *interp) return _PyStatus_OK(); } _PyRuntimeState *runtime = interp->runtime; - assert(gilstate_tss_get(runtime) == NULL); + assert(gilstate_get() == NULL); assert(runtime->gilstate.autoInterpreterState == NULL); runtime->gilstate.autoInterpreterState = interp; return _PyStatus_OK(); @@ -2801,7 +2692,7 @@ _PyGILState_SetTstate(PyThreadState *tstate) _PyRuntimeState *runtime = tstate->interp->runtime; assert(runtime->gilstate.autoInterpreterState == tstate->interp); - assert(gilstate_tss_get(runtime) == tstate); + assert(gilstate_get() == tstate); assert(tstate->gilstate_counter == 1); #endif } @@ -2817,11 +2708,7 @@ _PyGILState_GetInterpreterStateUnsafe(void) PyThreadState * PyGILState_GetThisThreadState(void) { - _PyRuntimeState *runtime = &_PyRuntime; - if (!gilstate_tss_initialized(runtime)) { - return NULL; - } - return gilstate_tss_get(runtime); + return gilstate_get(); } int @@ -2832,16 +2719,12 @@ PyGILState_Check(void) return 1; } - if (!gilstate_tss_initialized(runtime)) { - return 1; - } - PyThreadState *tstate = current_fast_get(); if (tstate == NULL) { return 0; } - PyThreadState *tcur = gilstate_tss_get(runtime); + PyThreadState *tcur = gilstate_get(); return (tstate == tcur); } @@ -2856,12 +2739,17 @@ PyGILState_Ensure(void) called Py_Initialize(). */ /* Ensure that _PyEval_InitThreads() and _PyGILState_Init() have been - called by Py_Initialize() */ - assert(_PyEval_ThreadsInitialized()); - assert(gilstate_tss_initialized(runtime)); - assert(runtime->gilstate.autoInterpreterState != NULL); + called by Py_Initialize() - PyThreadState *tcur = gilstate_tss_get(runtime); + TODO: This isn't thread-safe. There's no protection here against + concurrent finalization of the interpreter; it's simply a guard + for *after* the interpreter has finalized. + */ + if (!_PyEval_ThreadsInitialized() || runtime->gilstate.autoInterpreterState == NULL) { + PyThread_hang_thread(); + } + + PyThreadState *tcur = gilstate_get(); int has_gil; if (tcur == NULL) { /* Create a new Python thread state for this thread */ @@ -2901,8 +2789,7 @@ PyGILState_Ensure(void) void PyGILState_Release(PyGILState_STATE oldstate) { - _PyRuntimeState *runtime = &_PyRuntime; - PyThreadState *tstate = gilstate_tss_get(runtime); + PyThreadState *tstate = gilstate_get(); if (tstate == NULL) { Py_FatalError("auto-releasing thread-state, " "but no thread-state for this thread"); diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index b128abca39fb41..15b18f5286b399 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -191,6 +191,7 @@ Python/pyfpe.c - PyFPE_counter - Python/import.c - pkgcontext - Python/pystate.c - _Py_tss_tstate - +Python/pystate.c - _Py_tss_gilstate - ##----------------------- ## should be const From 0c5a8b0b55238a45b9073d06a10c3a59568cdf3c Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Wed, 21 May 2025 09:14:36 -0400 Subject: [PATCH 261/989] gh-134262: Add retries to generate_sbom.py (#134263) Co-authored-by: Semyon Moroz --- Tools/build/generate_sbom.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index db01426e9722c3..5845f2d85c7fdb 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -7,7 +7,9 @@ import re import subprocess import sys +import time import typing +import urllib.error import urllib.request from pathlib import Path, PurePosixPath, PureWindowsPath @@ -161,6 +163,21 @@ def get_externals() -> list[str]: return externals +def download_with_retries(download_location: str, + max_retries: int = 5, + base_delay: float = 2.0) -> typing.Any: + """Download a file with exponential backoff retry.""" + for attempt in range(max_retries): + try: + resp = urllib.request.urlopen(download_location) + except urllib.error.URLError as ex: + if attempt == max_retries: + raise ex + time.sleep(base_delay**attempt) + else: + return resp + + def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: """Make a bunch of assertions about the SBOM package data to ensure it's consistent.""" @@ -175,7 +192,7 @@ def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: # and that the download URL is valid. if "checksums" not in package or "CI" in os.environ: download_location = package["downloadLocation"] - resp = urllib.request.urlopen(download_location) + resp = download_with_retries(download_location) error_if(resp.status != 200, f"Couldn't access URL: {download_location}'") package["checksums"] = [{ From 88f8102a8fd4a8dd81457f01507e9a8693b1d4b7 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 21 May 2025 07:23:48 -0600 Subject: [PATCH 262/989] gh-132775: Support Fallbacks in _PyObject_GetXIData() (gh-133482) It now supports a "full" fallback to _PyFunction_GetXIData() and then `_PyPickle_GetXIData()`. There's also room for other fallback modes if that later makes sense. --- Include/internal/pycore_crossinterp.h | 25 +- .../pycore_crossinterp_data_registry.h | 4 +- Lib/test/support/import_helper.py | 2 +- Lib/test/test_crossinterp.py | 511 +++++++++++++++--- Modules/_interpchannelsmodule.c | 8 +- Modules/_interpqueuesmodule.c | 4 +- Modules/_interpreters_common.h | 4 +- Modules/_interpretersmodule.c | 2 +- Modules/_testinternalcapi.c | 9 +- Python/crossinterp.c | 89 ++- Python/crossinterp_data_lookup.h | 62 ++- 11 files changed, 581 insertions(+), 139 deletions(-) diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index 19c55dd65983d7..45fa47d62c78a3 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -131,7 +131,23 @@ PyAPI_FUNC(void) _PyXIData_Clear(PyInterpreterState *, _PyXIData_t *); /* getting cross-interpreter data */ -typedef int (*xidatafunc)(PyThreadState *tstate, PyObject *, _PyXIData_t *); +typedef int xidata_fallback_t; +#define _PyXIDATA_XIDATA_ONLY (0) +#define _PyXIDATA_FULL_FALLBACK (1) + +// Technically, we don't need two different function types; +// we could go with just the fallback one. However, only container +// types like tuple need it, so always having the extra arg would be +// a bit unfortunate. It's also nice to be able to clearly distinguish +// between types that might call _PyObject_GetXIData() and those that won't. +// +typedef int (*xidatafunc)(PyThreadState *, PyObject *, _PyXIData_t *); +typedef int (*xidatafbfunc)( + PyThreadState *, PyObject *, xidata_fallback_t, _PyXIData_t *); +typedef struct { + xidatafunc basic; + xidatafbfunc fallback; +} _PyXIData_getdata_t; PyAPI_FUNC(PyObject *) _PyXIData_GetNotShareableErrorType(PyThreadState *); PyAPI_FUNC(void) _PyXIData_SetNotShareableError(PyThreadState *, const char *); @@ -140,16 +156,21 @@ PyAPI_FUNC(void) _PyXIData_FormatNotShareableError( const char *, ...); -PyAPI_FUNC(xidatafunc) _PyXIData_Lookup( +PyAPI_FUNC(_PyXIData_getdata_t) _PyXIData_Lookup( PyThreadState *, PyObject *); PyAPI_FUNC(int) _PyObject_CheckXIData( PyThreadState *, PyObject *); +PyAPI_FUNC(int) _PyObject_GetXIDataNoFallback( + PyThreadState *, + PyObject *, + _PyXIData_t *); PyAPI_FUNC(int) _PyObject_GetXIData( PyThreadState *, PyObject *, + xidata_fallback_t, _PyXIData_t *); // _PyObject_GetXIData() for bytes diff --git a/Include/internal/pycore_crossinterp_data_registry.h b/Include/internal/pycore_crossinterp_data_registry.h index 8f4bcb948e5a45..fbb4cad5cac32e 100644 --- a/Include/internal/pycore_crossinterp_data_registry.h +++ b/Include/internal/pycore_crossinterp_data_registry.h @@ -17,7 +17,7 @@ typedef struct _xid_regitem { /* This is NULL for builtin types. */ PyObject *weakref; size_t refcount; - xidatafunc getdata; + _PyXIData_getdata_t getdata; } _PyXIData_regitem_t; typedef struct { @@ -30,7 +30,7 @@ typedef struct { PyAPI_FUNC(int) _PyXIData_RegisterClass( PyThreadState *, PyTypeObject *, - xidatafunc); + _PyXIData_getdata_t); PyAPI_FUNC(int) _PyXIData_UnregisterClass( PyThreadState *, PyTypeObject *); diff --git a/Lib/test/support/import_helper.py b/Lib/test/support/import_helper.py index edb734d294f287..0af63501f93bc8 100644 --- a/Lib/test/support/import_helper.py +++ b/Lib/test/support/import_helper.py @@ -438,5 +438,5 @@ def ensure_module_imported(name, *, clearnone=True): if sys.modules.get(name) is not None: mod = sys.modules[name] else: - mod, _, _ = _force_import(name, False, True, clearnone) + mod, _, _ = _ensure_module(name, False, True, clearnone) return mod diff --git a/Lib/test/test_crossinterp.py b/Lib/test/test_crossinterp.py index cddacbc9970052..c54635eaeab3f9 100644 --- a/Lib/test/test_crossinterp.py +++ b/Lib/test/test_crossinterp.py @@ -5,6 +5,7 @@ import sys import types import unittest +import warnings from test.support import import_helper @@ -16,13 +17,281 @@ from test import _crossinterp_definitions as defs -BUILTIN_TYPES = [o for _, o in __builtins__.items() - if isinstance(o, type)] -EXCEPTION_TYPES = [cls for cls in BUILTIN_TYPES +@contextlib.contextmanager +def ignore_byteswarning(): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=BytesWarning) + yield + + +# builtin types + +BUILTINS_TYPES = [o for _, o in __builtins__.items() if isinstance(o, type)] +EXCEPTION_TYPES = [cls for cls in BUILTINS_TYPES if issubclass(cls, BaseException)] OTHER_TYPES = [o for n, o in vars(types).items() if (isinstance(o, type) and - n not in ('DynamicClassAttribute', '_GeneratorWrapper'))] + n not in ('DynamicClassAttribute', '_GeneratorWrapper'))] +BUILTIN_TYPES = [ + *BUILTINS_TYPES, + *OTHER_TYPES, +] + +# builtin exceptions + +try: + raise Exception +except Exception as exc: + CAUGHT = exc +EXCEPTIONS_WITH_SPECIAL_SIG = { + BaseExceptionGroup: (lambda msg: (msg, [CAUGHT])), + ExceptionGroup: (lambda msg: (msg, [CAUGHT])), + UnicodeError: (lambda msg: (None, msg, None, None, None)), + UnicodeEncodeError: (lambda msg: ('utf-8', '', 1, 3, msg)), + UnicodeDecodeError: (lambda msg: ('utf-8', b'', 1, 3, msg)), + UnicodeTranslateError: (lambda msg: ('', 1, 3, msg)), +} +BUILTIN_EXCEPTIONS = [ + *(cls(*sig('error!')) for cls, sig in EXCEPTIONS_WITH_SPECIAL_SIG.items()), + *(cls('error!') for cls in EXCEPTION_TYPES + if cls not in EXCEPTIONS_WITH_SPECIAL_SIG), +] + +# other builtin objects + +METHOD = defs.SpamOkay().okay +BUILTIN_METHOD = [].append +METHOD_DESCRIPTOR_WRAPPER = str.join +METHOD_WRAPPER = object().__str__ +WRAPPER_DESCRIPTOR = object.__init__ +BUILTIN_WRAPPERS = { + METHOD: types.MethodType, + BUILTIN_METHOD: types.BuiltinMethodType, + dict.__dict__['fromkeys']: types.ClassMethodDescriptorType, + types.FunctionType.__code__: types.GetSetDescriptorType, + types.FunctionType.__globals__: types.MemberDescriptorType, + METHOD_DESCRIPTOR_WRAPPER: types.MethodDescriptorType, + METHOD_WRAPPER: types.MethodWrapperType, + WRAPPER_DESCRIPTOR: types.WrapperDescriptorType, + staticmethod(defs.SpamOkay.okay): None, + classmethod(defs.SpamOkay.okay): None, + property(defs.SpamOkay.okay): None, +} +BUILTIN_FUNCTIONS = [ + # types.BuiltinFunctionType + len, + sys.is_finalizing, + sys.exit, + _testinternalcapi.get_crossinterp_data, +] +assert 'emptymod' not in sys.modules +with import_helper.ready_to_import('emptymod', ''): + import emptymod as EMPTYMOD +MODULES = [ + sys, + defs, + unittest, + EMPTYMOD, +] +OBJECT = object() +EXCEPTION = Exception() +LAMBDA = (lambda: None) +BUILTIN_SIMPLE = [ + OBJECT, + # singletons + None, + True, + False, + Ellipsis, + NotImplemented, + # bytes + *(i.to_bytes(2, 'little', signed=True) + for i in range(-1, 258)), + # str + 'hello world', + '你好世界', + '', + # int + sys.maxsize + 1, + sys.maxsize, + -sys.maxsize - 1, + -sys.maxsize - 2, + *range(-1, 258), + 2**1000, + # float + 0.0, + 1.1, + -1.0, + 0.12345678, + -0.12345678, +] +TUPLE_EXCEPTION = (0, 1.0, EXCEPTION) +TUPLE_OBJECT = (0, 1.0, OBJECT) +TUPLE_NESTED_EXCEPTION = (0, 1.0, (EXCEPTION,)) +TUPLE_NESTED_OBJECT = (0, 1.0, (OBJECT,)) +MEMORYVIEW_EMPTY = memoryview(b'') +MEMORYVIEW_NOT_EMPTY = memoryview(b'spam'*42) +MAPPING_PROXY_EMPTY = types.MappingProxyType({}) +BUILTIN_CONTAINERS = [ + # tuple (flat) + (), + (1,), + ("hello", "world", ), + (1, True, "hello"), + TUPLE_EXCEPTION, + TUPLE_OBJECT, + # tuple (nested) + ((1,),), + ((1, 2), (3, 4)), + ((1, 2), (3, 4), (5, 6)), + TUPLE_NESTED_EXCEPTION, + TUPLE_NESTED_OBJECT, + # buffer + MEMORYVIEW_EMPTY, + MEMORYVIEW_NOT_EMPTY, + # list + [], + [1, 2, 3], + [[1], (2,), {3: 4}], + # dict + {}, + {1: 7, 2: 8, 3: 9}, + {1: [1], 2: (2,), 3: {3: 4}}, + # set + set(), + {1, 2, 3}, + {frozenset({1}), (2,)}, + # frozenset + frozenset([]), + frozenset({frozenset({1}), (2,)}), + # bytearray + bytearray(b''), + # other + MAPPING_PROXY_EMPTY, + types.SimpleNamespace(), +] +ns = {} +exec(""" +try: + raise Exception +except Exception as exc: + TRACEBACK = exc.__traceback__ + FRAME = TRACEBACK.tb_frame +""", ns, ns) +BUILTIN_OTHER = [ + # types.CellType + types.CellType(), + # types.FrameType + ns['FRAME'], + # types.TracebackType + ns['TRACEBACK'], +] +del ns + +# user-defined objects + +USER_TOP_INSTANCES = [c(*a) for c, a in defs.TOP_CLASSES.items()] +USER_NESTED_INSTANCES = [c(*a) for c, a in defs.NESTED_CLASSES.items()] +USER_INSTANCES = [ + *USER_TOP_INSTANCES, + *USER_NESTED_INSTANCES, +] +USER_EXCEPTIONS = [ + defs.MimimalError('error!'), +] + +# shareable objects + +TUPLES_WITHOUT_EQUALITY = [ + TUPLE_EXCEPTION, + TUPLE_OBJECT, + TUPLE_NESTED_EXCEPTION, + TUPLE_NESTED_OBJECT, +] +_UNSHAREABLE_SIMPLE = [ + Ellipsis, + NotImplemented, + OBJECT, + sys.maxsize + 1, + -sys.maxsize - 2, + 2**1000, +] +with ignore_byteswarning(): + _SHAREABLE_SIMPLE = [o for o in BUILTIN_SIMPLE + if o not in _UNSHAREABLE_SIMPLE] + _SHAREABLE_CONTAINERS = [ + *(o for o in BUILTIN_CONTAINERS if type(o) is memoryview), + *(o for o in BUILTIN_CONTAINERS + if type(o) is tuple and o not in TUPLES_WITHOUT_EQUALITY), + ] + _UNSHAREABLE_CONTAINERS = [o for o in BUILTIN_CONTAINERS + if o not in _SHAREABLE_CONTAINERS] +SHAREABLE = [ + *_SHAREABLE_SIMPLE, + *_SHAREABLE_CONTAINERS, +] +NOT_SHAREABLE = [ + *_UNSHAREABLE_SIMPLE, + *_UNSHAREABLE_CONTAINERS, + *BUILTIN_TYPES, + *BUILTIN_WRAPPERS, + *BUILTIN_EXCEPTIONS, + *BUILTIN_FUNCTIONS, + *MODULES, + *BUILTIN_OTHER, + # types.CodeType + *(f.__code__ for f in defs.FUNCTIONS), + *(f.__code__ for f in defs.FUNCTION_LIKE), + # types.FunctionType + *defs.FUNCTIONS, + defs.SpamOkay.okay, + LAMBDA, + *defs.FUNCTION_LIKE, + # coroutines and generators + *defs.FUNCTION_LIKE_APPLIED, + # user classes + *defs.CLASSES, + *USER_INSTANCES, + # user exceptions + *USER_EXCEPTIONS, +] + +# pickleable objects + +PICKLEABLE = [ + *BUILTIN_SIMPLE, + *(o for o in BUILTIN_CONTAINERS if o not in [ + MEMORYVIEW_EMPTY, + MEMORYVIEW_NOT_EMPTY, + MAPPING_PROXY_EMPTY, + ] or type(o) is dict), + *BUILTINS_TYPES, + *BUILTIN_EXCEPTIONS, + *BUILTIN_FUNCTIONS, + *defs.TOP_FUNCTIONS, + defs.SpamOkay.okay, + *defs.FUNCTION_LIKE, + *defs.TOP_CLASSES, + *USER_TOP_INSTANCES, + *USER_EXCEPTIONS, + # from OTHER_TYPES + types.NoneType, + types.EllipsisType, + types.NotImplementedType, + types.GenericAlias, + types.UnionType, + types.SimpleNamespace, + # from BUILTIN_WRAPPERS + METHOD, + BUILTIN_METHOD, + METHOD_DESCRIPTOR_WRAPPER, + METHOD_WRAPPER, + WRAPPER_DESCRIPTOR, +] +assert not any(isinstance(o, types.MappingProxyType) for o in PICKLEABLE) + + +# helpers DEFS = defs with open(code_defs.__file__) as infile: @@ -111,6 +380,77 @@ class _GetXIDataTests(unittest.TestCase): MODE = None + def assert_functions_equal(self, func1, func2): + assert type(func1) is types.FunctionType, repr(func1) + assert type(func2) is types.FunctionType, repr(func2) + self.assertEqual(func1.__name__, func2.__name__) + self.assertEqual(func1.__code__, func2.__code__) + self.assertEqual(func1.__defaults__, func2.__defaults__) + self.assertEqual(func1.__kwdefaults__, func2.__kwdefaults__) + # We don't worry about __globals__ for now. + + def assert_exc_args_equal(self, exc1, exc2): + args1 = exc1.args + args2 = exc2.args + if isinstance(exc1, ExceptionGroup): + self.assertIs(type(args1), type(args2)) + self.assertEqual(len(args1), 2) + self.assertEqual(len(args1), len(args2)) + self.assertEqual(args1[0], args2[0]) + group1 = args1[1] + group2 = args2[1] + self.assertEqual(len(group1), len(group2)) + for grouped1, grouped2 in zip(group1, group2): + # Currently the "extra" attrs are not preserved + # (via __reduce__). + self.assertIs(type(exc1), type(exc2)) + self.assert_exc_equal(grouped1, grouped2) + else: + self.assertEqual(args1, args2) + + def assert_exc_equal(self, exc1, exc2): + self.assertIs(type(exc1), type(exc2)) + + if type(exc1).__eq__ is not object.__eq__: + self.assertEqual(exc1, exc2) + + self.assert_exc_args_equal(exc1, exc2) + # XXX For now we do not preserve tracebacks. + if exc1.__traceback__ is not None: + self.assertEqual(exc1.__traceback__, exc2.__traceback__) + self.assertEqual( + getattr(exc1, '__notes__', None), + getattr(exc2, '__notes__', None), + ) + # We assume there are no cycles. + if exc1.__cause__ is None: + self.assertIs(exc1.__cause__, exc2.__cause__) + else: + self.assert_exc_equal(exc1.__cause__, exc2.__cause__) + if exc1.__context__ is None: + self.assertIs(exc1.__context__, exc2.__context__) + else: + self.assert_exc_equal(exc1.__context__, exc2.__context__) + + def assert_equal_or_equalish(self, obj, expected): + cls = type(expected) + if cls.__eq__ is not object.__eq__: + self.assertEqual(obj, expected) + elif cls is types.FunctionType: + self.assert_functions_equal(obj, expected) + elif isinstance(expected, BaseException): + self.assert_exc_equal(obj, expected) + elif cls is types.MethodType: + raise NotImplementedError(cls) + elif cls is types.BuiltinMethodType: + raise NotImplementedError(cls) + elif cls is types.MethodWrapperType: + raise NotImplementedError(cls) + elif cls.__bases__ == (object,): + self.assertEqual(obj.__dict__, expected.__dict__) + else: + raise NotImplementedError(cls) + def get_xidata(self, obj, *, mode=None): mode = self._resolve_mode(mode) return _testinternalcapi.get_crossinterp_data(obj, mode) @@ -126,35 +466,37 @@ def _get_roundtrip(self, obj, mode): def assert_roundtrip_identical(self, values, *, mode=None): mode = self._resolve_mode(mode) for obj in values: - with self.subTest(obj): + with self.subTest(repr(obj)): got = self._get_roundtrip(obj, mode) self.assertIs(got, obj) def assert_roundtrip_equal(self, values, *, mode=None, expecttype=None): mode = self._resolve_mode(mode) for obj in values: - with self.subTest(obj): + with self.subTest(repr(obj)): got = self._get_roundtrip(obj, mode) - self.assertEqual(got, obj) + if got is obj: + continue self.assertIs(type(got), type(obj) if expecttype is None else expecttype) + self.assert_equal_or_equalish(got, obj) def assert_roundtrip_equal_not_identical(self, values, *, mode=None, expecttype=None): mode = self._resolve_mode(mode) for obj in values: - with self.subTest(obj): + with self.subTest(repr(obj)): got = self._get_roundtrip(obj, mode) self.assertIsNot(got, obj) self.assertIs(type(got), type(obj) if expecttype is None else expecttype) - self.assertEqual(got, obj) + self.assert_equal_or_equalish(got, obj) def assert_roundtrip_not_equal(self, values, *, mode=None, expecttype=None): mode = self._resolve_mode(mode) for obj in values: - with self.subTest(obj): + with self.subTest(repr(obj)): got = self._get_roundtrip(obj, mode) self.assertIsNot(got, obj) self.assertIs(type(got), @@ -164,7 +506,7 @@ def assert_roundtrip_not_equal(self, values, *, def assert_not_shareable(self, values, exctype=None, *, mode=None): mode = self._resolve_mode(mode) for obj in values: - with self.subTest(obj): + with self.subTest(repr(obj)): with self.assertRaises(NotShareableError) as cm: _testinternalcapi.get_crossinterp_data(obj, mode) if exctype is not None: @@ -182,49 +524,26 @@ class PickleTests(_GetXIDataTests): MODE = 'pickle' def test_shareable(self): - self.assert_roundtrip_equal([ - # singletons - None, - True, - False, - # bytes - *(i.to_bytes(2, 'little', signed=True) - for i in range(-1, 258)), - # str - 'hello world', - '你好世界', - '', - # int - sys.maxsize, - -sys.maxsize - 1, - *range(-1, 258), - # float - 0.0, - 1.1, - -1.0, - 0.12345678, - -0.12345678, - # tuple - (), - (1,), - ("hello", "world", ), - (1, True, "hello"), - ((1,),), - ((1, 2), (3, 4)), - ((1, 2), (3, 4), (5, 6)), - ]) - # not shareable using xidata - self.assert_roundtrip_equal([ - # int - sys.maxsize + 1, - -sys.maxsize - 2, - 2**1000, - # tuple - (0, 1.0, []), - (0, 1.0, {}), - (0, 1.0, ([],)), - (0, 1.0, ({},)), - ]) + with ignore_byteswarning(): + for obj in SHAREABLE: + if obj in PICKLEABLE: + self.assert_roundtrip_equal([obj]) + else: + self.assert_not_shareable([obj]) + + def test_not_shareable(self): + with ignore_byteswarning(): + for obj in NOT_SHAREABLE: + if type(obj) is types.MappingProxyType: + self.assert_not_shareable([obj]) + elif obj in PICKLEABLE: + with self.subTest(repr(obj)): + # We don't worry about checking the actual value. + # The other tests should cover that well enough. + got = self.get_roundtrip(obj) + self.assertIs(type(got), type(obj)) + else: + self.assert_not_shareable([obj]) def test_list(self): self.assert_roundtrip_equal_not_identical([ @@ -266,7 +585,7 @@ def assert_class_defs_same(self, defs): if cls not in defs.CLASSES_WITHOUT_EQUALITY: continue instances.append(cls(*args)) - self.assert_roundtrip_not_equal(instances) + self.assert_roundtrip_equal(instances) def assert_class_defs_other_pickle(self, defs, mod): # Pickle relative to a different module than the original. @@ -286,7 +605,7 @@ def assert_class_defs_other_unpickle(self, defs, mod, *, fail=False): instances = [] for cls, args in defs.TOP_CLASSES.items(): - with self.subTest(cls): + with self.subTest(repr(cls)): setattr(mod, cls.__name__, cls) xid = self.get_xidata(cls) inst = cls(*args) @@ -295,7 +614,7 @@ def assert_class_defs_other_unpickle(self, defs, mod, *, fail=False): (cls, xid, inst, instxid)) for cls, xid, inst, instxid in instances: - with self.subTest(cls): + with self.subTest(repr(cls)): delattr(mod, cls.__name__) if fail: with self.assertRaises(NotShareableError): @@ -403,13 +722,13 @@ def assert_func_defs_same(self, defs): def assert_func_defs_other_pickle(self, defs, mod): # Pickle relative to a different module than the original. for func in defs.TOP_FUNCTIONS: - assert not hasattr(mod, func.__name__), (cls, getattr(mod, func.__name__)) + assert not hasattr(mod, func.__name__), (getattr(mod, func.__name__),) self.assert_not_shareable(defs.TOP_FUNCTIONS) def assert_func_defs_other_unpickle(self, defs, mod, *, fail=False): # Unpickle relative to a different module than the original. for func in defs.TOP_FUNCTIONS: - assert not hasattr(mod, func.__name__), (cls, getattr(mod, func.__name__)) + assert not hasattr(mod, func.__name__), (getattr(mod, func.__name__),) captured = [] for func in defs.TOP_FUNCTIONS: @@ -434,7 +753,7 @@ def assert_func_defs_not_shareable(self, defs): self.assert_not_shareable(defs.TOP_FUNCTIONS) def test_user_function_normal(self): -# self.assert_roundtrip_equal(defs.TOP_FUNCTIONS) + self.assert_roundtrip_equal(defs.TOP_FUNCTIONS) self.assert_func_defs_same(defs) def test_user_func_in___main__(self): @@ -505,7 +824,7 @@ def test_nested_function(self): # exceptions def test_user_exception_normal(self): - self.assert_roundtrip_not_equal([ + self.assert_roundtrip_equal([ defs.MimimalError('error!'), ]) self.assert_roundtrip_equal_not_identical([ @@ -521,7 +840,7 @@ def test_builtin_exception(self): special = { BaseExceptionGroup: (msg, [caught]), ExceptionGroup: (msg, [caught]), -# UnicodeError: (None, msg, None, None, None), + UnicodeError: (None, msg, None, None, None), UnicodeEncodeError: ('utf-8', '', 1, 3, msg), UnicodeDecodeError: ('utf-8', b'', 1, 3, msg), UnicodeTranslateError: ('', 1, 3, msg), @@ -531,7 +850,7 @@ def test_builtin_exception(self): args = special.get(cls) or (msg,) exceptions.append(cls(*args)) - self.assert_roundtrip_not_equal(exceptions) + self.assert_roundtrip_equal(exceptions) class MarshalTests(_GetXIDataTests): @@ -576,7 +895,7 @@ def test_simple_builtin_objects(self): '', ]) self.assert_not_shareable([ - object(), + OBJECT, types.SimpleNamespace(), ]) @@ -647,10 +966,7 @@ def test_builtin_type(self): shareable = [ StopIteration, ] - types = [ - *BUILTIN_TYPES, - *OTHER_TYPES, - ] + types = BUILTIN_TYPES self.assert_not_shareable(cls for cls in types if cls not in shareable) self.assert_roundtrip_identical(cls for cls in types @@ -763,7 +1079,7 @@ class ShareableFuncTests(_GetXIDataTests): MODE = 'func' def test_stateless(self): - self.assert_roundtrip_not_equal([ + self.assert_roundtrip_equal([ *defs.STATELESS_FUNCTIONS, # Generators can be stateless too. *defs.FUNCTION_LIKE, @@ -912,10 +1228,49 @@ def test_impure_script_function(self): ], expecttype=types.CodeType) +class ShareableFallbackTests(_GetXIDataTests): + + MODE = 'fallback' + + def test_shareable(self): + self.assert_roundtrip_equal(SHAREABLE) + + def test_not_shareable(self): + okay = [ + *PICKLEABLE, + *defs.STATELESS_FUNCTIONS, + LAMBDA, + ] + ignored = [ + *TUPLES_WITHOUT_EQUALITY, + OBJECT, + METHOD, + BUILTIN_METHOD, + METHOD_WRAPPER, + ] + with ignore_byteswarning(): + self.assert_roundtrip_equal([ + *(o for o in NOT_SHAREABLE + if o in okay and o not in ignored + and o is not MAPPING_PROXY_EMPTY), + ]) + self.assert_roundtrip_not_equal([ + *(o for o in NOT_SHAREABLE + if o in ignored and o is not MAPPING_PROXY_EMPTY), + ]) + self.assert_not_shareable([ + *(o for o in NOT_SHAREABLE if o not in okay), + MAPPING_PROXY_EMPTY, + ]) + + class ShareableTypeTests(_GetXIDataTests): MODE = 'xidata' + def test_shareable(self): + self.assert_roundtrip_equal(SHAREABLE) + def test_singletons(self): self.assert_roundtrip_identical([ None, @@ -983,8 +1338,8 @@ def test_tuple(self): def test_tuples_containing_non_shareable_types(self): non_shareables = [ - Exception(), - object(), + EXCEPTION, + OBJECT, ] for s in non_shareables: value = tuple([0, 1.0, s]) @@ -999,6 +1354,9 @@ def test_tuples_containing_non_shareable_types(self): # The rest are not shareable. + def test_not_shareable(self): + self.assert_not_shareable(NOT_SHAREABLE) + def test_object(self): self.assert_not_shareable([ object(), @@ -1015,12 +1373,12 @@ def test_function_object(self): for func in defs.FUNCTIONS: assert type(func) is types.FunctionType, func assert type(defs.SpamOkay.okay) is types.FunctionType, func - assert type(lambda: None) is types.LambdaType + assert type(LAMBDA) is types.LambdaType self.assert_not_shareable([ *defs.FUNCTIONS, defs.SpamOkay.okay, - (lambda: None), + LAMBDA, ]) def test_builtin_function(self): @@ -1085,10 +1443,7 @@ def test_class(self): self.assert_not_shareable(instances) def test_builtin_type(self): - self.assert_not_shareable([ - *BUILTIN_TYPES, - *OTHER_TYPES, - ]) + self.assert_not_shareable(BUILTIN_TYPES) def test_exception(self): self.assert_not_shareable([ @@ -1127,7 +1482,7 @@ def test_builtin_objects(self): """, ns, ns) self.assert_not_shareable([ - types.MappingProxyType({}), + MAPPING_PROXY_EMPTY, types.SimpleNamespace(), # types.CellType types.CellType(), diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index 172cebcaa4884f..f9fa1dab291056 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -1779,7 +1779,7 @@ channel_send(_channels *channels, int64_t cid, PyObject *obj, PyThread_release_lock(mutex); return -1; } - if (_PyObject_GetXIData(tstate, obj, data) != 0) { + if (_PyObject_GetXIDataNoFallback(tstate, obj, data) != 0) { PyThread_release_lock(mutex); GLOBAL_FREE(data); return -1; @@ -2694,7 +2694,7 @@ add_channelid_type(PyObject *mod) Py_DECREF(cls); return NULL; } - if (ensure_xid_class(cls, _channelid_shared) < 0) { + if (ensure_xid_class(cls, GETDATA(_channelid_shared)) < 0) { Py_DECREF(cls); return NULL; } @@ -2797,12 +2797,12 @@ set_channelend_types(PyObject *mod, PyTypeObject *send, PyTypeObject *recv) // Add and register the types. state->send_channel_type = (PyTypeObject *)Py_NewRef(send); state->recv_channel_type = (PyTypeObject *)Py_NewRef(recv); - if (ensure_xid_class(send, _channelend_shared) < 0) { + if (ensure_xid_class(send, GETDATA(_channelend_shared)) < 0) { Py_CLEAR(state->send_channel_type); Py_CLEAR(state->recv_channel_type); return -1; } - if (ensure_xid_class(recv, _channelend_shared) < 0) { + if (ensure_xid_class(recv, GETDATA(_channelend_shared)) < 0) { (void)clear_xid_class(state->send_channel_type); Py_CLEAR(state->send_channel_type); Py_CLEAR(state->recv_channel_type); diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 526249a0e1aec3..209fcdfd0cd01e 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -1143,7 +1143,7 @@ queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) _queue_unmark_waiter(queue, queues->mutex); return -1; } - if (_PyObject_GetXIData(tstate, obj, data) != 0) { + if (_PyObject_GetXIDataNoFallback(tstate, obj, data) != 0) { _queue_unmark_waiter(queue, queues->mutex); GLOBAL_FREE(data); return -1; @@ -1270,7 +1270,7 @@ set_external_queue_type(module_state *state, PyTypeObject *queue_type) } // Add and register the new type. - if (ensure_xid_class(queue_type, _queueobj_shared) < 0) { + if (ensure_xid_class(queue_type, GETDATA(_queueobj_shared)) < 0) { return -1; } state->queue_type = (PyTypeObject *)Py_NewRef(queue_type); diff --git a/Modules/_interpreters_common.h b/Modules/_interpreters_common.h index edd65577284a20..d73cbca36359c7 100644 --- a/Modules/_interpreters_common.h +++ b/Modules/_interpreters_common.h @@ -5,8 +5,10 @@ _RESOLVE_MODINIT_FUNC_NAME(NAME) +#define GETDATA(FUNC) ((_PyXIData_getdata_t){.basic=FUNC}) + static int -ensure_xid_class(PyTypeObject *cls, xidatafunc getdata) +ensure_xid_class(PyTypeObject *cls, _PyXIData_getdata_t getdata) { PyThreadState *tstate = PyThreadState_Get(); return _PyXIData_RegisterClass(tstate, cls, getdata); diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 77678f7c126005..f3c571e717fd0e 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -286,7 +286,7 @@ register_memoryview_xid(PyObject *mod, PyTypeObject **p_state) *p_state = cls; // Register XID for the builtin memoryview type. - if (ensure_xid_class(&PyMemoryView_Type, _pybuffer_shared) < 0) { + if (ensure_xid_class(&PyMemoryView_Type, GETDATA(_pybuffer_shared)) < 0) { return -1; } // We don't ever bother un-registering memoryview. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 76bd76cc6b2490..136e6a7a015049 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1991,7 +1991,14 @@ get_crossinterp_data(PyObject *self, PyObject *args, PyObject *kwargs) return NULL; } if (strcmp(mode, "xidata") == 0) { - if (_PyObject_GetXIData(tstate, obj, xidata) != 0) { + if (_PyObject_GetXIDataNoFallback(tstate, obj, xidata) != 0) { + goto error; + } + } + else if (strcmp(mode, "fallback") == 0) { + xidata_fallback_t fallback = _PyXIDATA_FULL_FALLBACK; + if (_PyObject_GetXIData(tstate, obj, fallback, xidata) != 0) + { goto error; } } diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 725d6009f84014..dc67de4a40849d 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -210,16 +210,16 @@ _Py_CallInInterpreterAndRawFree(PyInterpreterState *interp, /* cross-interpreter data */ /**************************/ -/* registry of {type -> xidatafunc} */ +/* registry of {type -> _PyXIData_getdata_t} */ -/* For now we use a global registry of shareable classes. An - alternative would be to add a tp_* slot for a class's - xidatafunc. It would be simpler and more efficient. */ +/* For now we use a global registry of shareable classes. + An alternative would be to add a tp_* slot for a class's + _PyXIData_getdata_t. It would be simpler and more efficient. */ static void xid_lookup_init(_PyXIData_lookup_t *); static void xid_lookup_fini(_PyXIData_lookup_t *); struct _dlcontext; -static xidatafunc lookup_getdata(struct _dlcontext *, PyObject *); +static _PyXIData_getdata_t lookup_getdata(struct _dlcontext *, PyObject *); #include "crossinterp_data_lookup.h" @@ -343,7 +343,7 @@ _set_xid_lookup_failure(PyThreadState *tstate, PyObject *obj, const char *msg, set_notshareableerror(tstate, cause, 0, msg); } else { - msg = "%S does not support cross-interpreter data"; + msg = "%R does not support cross-interpreter data"; format_notshareableerror(tstate, cause, 0, msg, obj); } } @@ -356,8 +356,8 @@ _PyObject_CheckXIData(PyThreadState *tstate, PyObject *obj) if (get_lookup_context(tstate, &ctx) < 0) { return -1; } - xidatafunc getdata = lookup_getdata(&ctx, obj); - if (getdata == NULL) { + _PyXIData_getdata_t getdata = lookup_getdata(&ctx, obj); + if (getdata.basic == NULL && getdata.fallback == NULL) { if (!_PyErr_Occurred(tstate)) { _set_xid_lookup_failure(tstate, obj, NULL, NULL); } @@ -388,9 +388,9 @@ _check_xidata(PyThreadState *tstate, _PyXIData_t *xidata) return 0; } -int -_PyObject_GetXIData(PyThreadState *tstate, - PyObject *obj, _PyXIData_t *xidata) +static int +_get_xidata(PyThreadState *tstate, + PyObject *obj, xidata_fallback_t fallback, _PyXIData_t *xidata) { PyInterpreterState *interp = tstate->interp; @@ -398,6 +398,7 @@ _PyObject_GetXIData(PyThreadState *tstate, assert(xidata->obj == NULL); if (xidata->data != NULL || xidata->obj != NULL) { _PyErr_SetString(tstate, PyExc_ValueError, "xidata not cleared"); + return -1; } // Call the "getdata" func for the object. @@ -406,8 +407,8 @@ _PyObject_GetXIData(PyThreadState *tstate, return -1; } Py_INCREF(obj); - xidatafunc getdata = lookup_getdata(&ctx, obj); - if (getdata == NULL) { + _PyXIData_getdata_t getdata = lookup_getdata(&ctx, obj); + if (getdata.basic == NULL && getdata.fallback == NULL) { if (PyErr_Occurred()) { Py_DECREF(obj); return -1; @@ -419,7 +420,9 @@ _PyObject_GetXIData(PyThreadState *tstate, } return -1; } - int res = getdata(tstate, obj, xidata); + int res = getdata.basic != NULL + ? getdata.basic(tstate, obj, xidata) + : getdata.fallback(tstate, obj, fallback, xidata); Py_DECREF(obj); if (res != 0) { PyObject *cause = _PyErr_GetRaisedException(tstate); @@ -439,6 +442,51 @@ _PyObject_GetXIData(PyThreadState *tstate, return 0; } +int +_PyObject_GetXIDataNoFallback(PyThreadState *tstate, + PyObject *obj, _PyXIData_t *xidata) +{ + return _get_xidata(tstate, obj, _PyXIDATA_XIDATA_ONLY, xidata); +} + +int +_PyObject_GetXIData(PyThreadState *tstate, + PyObject *obj, xidata_fallback_t fallback, + _PyXIData_t *xidata) +{ + switch (fallback) { + case _PyXIDATA_XIDATA_ONLY: + return _get_xidata(tstate, obj, fallback, xidata); + case _PyXIDATA_FULL_FALLBACK: + if (_get_xidata(tstate, obj, fallback, xidata) == 0) { + return 0; + } + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (PyFunction_Check(obj)) { + if (_PyFunction_GetXIData(tstate, obj, xidata) == 0) { + Py_DECREF(exc); + return 0; + } + _PyErr_Clear(tstate); + } + // We could try _PyMarshal_GetXIData() but we won't for now. + if (_PyPickle_GetXIData(tstate, obj, xidata) == 0) { + Py_DECREF(exc); + return 0; + } + // Raise the original exception. + _PyErr_SetRaisedException(tstate, exc); + return -1; + default: +#ifdef Py_DEBUG + Py_FatalError("unsupported xidata fallback option"); +#endif + _PyErr_SetString(tstate, PyExc_SystemError, + "unsupported xidata fallback option"); + return -1; + } +} + /* pickle C-API */ @@ -1617,14 +1665,9 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) PyThreadState *tstate = _PyThreadState_GET(); assert(!PyErr_Occurred()); + assert(code != _PyXI_ERR_NO_ERROR); + assert(code != _PyXI_ERR_UNCAUGHT_EXCEPTION); switch (code) { - case _PyXI_ERR_NO_ERROR: _Py_FALLTHROUGH; - case _PyXI_ERR_UNCAUGHT_EXCEPTION: - // There is nothing to apply. -#ifdef Py_DEBUG - Py_UNREACHABLE(); -#endif - return 0; case _PyXI_ERR_OTHER: // XXX msg? PyErr_SetNone(PyExc_InterpreterError); @@ -1649,7 +1692,7 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) break; default: #ifdef Py_DEBUG - Py_UNREACHABLE(); + Py_FatalError("unsupported error code"); #else PyErr_Format(PyExc_RuntimeError, "unsupported error code %d", code); #endif @@ -1796,7 +1839,7 @@ _sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) return -1; } PyThreadState *tstate = PyThreadState_Get(); - if (_PyObject_GetXIData(tstate, value, item->xidata) != 0) { + if (_PyObject_GetXIDataNoFallback(tstate, value, item->xidata) != 0) { PyMem_RawFree(item->xidata); item->xidata = NULL; // The caller may want to propagate PyExc_NotShareableError diff --git a/Python/crossinterp_data_lookup.h b/Python/crossinterp_data_lookup.h index d69927dbcd387f..88eb41da89ee40 100644 --- a/Python/crossinterp_data_lookup.h +++ b/Python/crossinterp_data_lookup.h @@ -12,7 +12,8 @@ typedef _PyXIData_regitem_t dlregitem_t; // forward static void _xidregistry_init(dlregistry_t *); static void _xidregistry_fini(dlregistry_t *); -static xidatafunc _lookup_getdata_from_registry(dlcontext_t *, PyObject *); +static _PyXIData_getdata_t _lookup_getdata_from_registry( + dlcontext_t *, PyObject *); /* used in crossinterp.c */ @@ -49,7 +50,7 @@ get_lookup_context(PyThreadState *tstate, dlcontext_t *res) return 0; } -static xidatafunc +static _PyXIData_getdata_t lookup_getdata(dlcontext_t *ctx, PyObject *obj) { /* Cross-interpreter objects are looked up by exact match on the class. @@ -88,24 +89,24 @@ _PyXIData_FormatNotShareableError(PyThreadState *tstate, } -xidatafunc +_PyXIData_getdata_t _PyXIData_Lookup(PyThreadState *tstate, PyObject *obj) { dlcontext_t ctx; if (get_lookup_context(tstate, &ctx) < 0) { - return NULL; + return (_PyXIData_getdata_t){0}; } return lookup_getdata(&ctx, obj); } /***********************************************/ -/* a registry of {type -> xidatafunc} */ +/* a registry of {type -> _PyXIData_getdata_t} */ /***********************************************/ -/* For now we use a global registry of shareable classes. An - alternative would be to add a tp_* slot for a class's - xidatafunc. It would be simpler and more efficient. */ +/* For now we use a global registry of shareable classes. + An alternative would be to add a tp_* slot for a class's + _PyXIData_getdata_t. It would be simpler and more efficient. */ /* registry lifecycle */ @@ -200,7 +201,7 @@ _xidregistry_find_type(dlregistry_t *xidregistry, PyTypeObject *cls) return NULL; } -static xidatafunc +static _PyXIData_getdata_t _lookup_getdata_from_registry(dlcontext_t *ctx, PyObject *obj) { PyTypeObject *cls = Py_TYPE(obj); @@ -209,10 +210,12 @@ _lookup_getdata_from_registry(dlcontext_t *ctx, PyObject *obj) _xidregistry_lock(xidregistry); dlregitem_t *matched = _xidregistry_find_type(xidregistry, cls); - xidatafunc func = matched != NULL ? matched->getdata : NULL; + _PyXIData_getdata_t getdata = matched != NULL + ? matched->getdata + : (_PyXIData_getdata_t){0}; _xidregistry_unlock(xidregistry); - return func; + return getdata; } @@ -220,12 +223,13 @@ _lookup_getdata_from_registry(dlcontext_t *ctx, PyObject *obj) static int _xidregistry_add_type(dlregistry_t *xidregistry, - PyTypeObject *cls, xidatafunc getdata) + PyTypeObject *cls, _PyXIData_getdata_t getdata) { dlregitem_t *newhead = PyMem_RawMalloc(sizeof(dlregitem_t)); if (newhead == NULL) { return -1; } + assert((getdata.basic == NULL) != (getdata.fallback == NULL)); *newhead = (dlregitem_t){ // We do not keep a reference, to avoid keeping the class alive. .cls = cls, @@ -283,13 +287,13 @@ _xidregistry_clear(dlregistry_t *xidregistry) int _PyXIData_RegisterClass(PyThreadState *tstate, - PyTypeObject *cls, xidatafunc getdata) + PyTypeObject *cls, _PyXIData_getdata_t getdata) { if (!PyType_Check(cls)) { PyErr_Format(PyExc_ValueError, "only classes may be registered"); return -1; } - if (getdata == NULL) { + if (getdata.basic == NULL && getdata.fallback == NULL) { PyErr_Format(PyExc_ValueError, "missing 'getdata' func"); return -1; } @@ -304,7 +308,8 @@ _PyXIData_RegisterClass(PyThreadState *tstate, dlregitem_t *matched = _xidregistry_find_type(xidregistry, cls); if (matched != NULL) { - assert(matched->getdata == getdata); + assert(matched->getdata.basic == getdata.basic); + assert(matched->getdata.fallback == getdata.fallback); matched->refcount += 1; goto finally; } @@ -608,7 +613,8 @@ _tuple_shared_free(void* data) } static int -_tuple_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) +_tuple_shared(PyThreadState *tstate, PyObject *obj, xidata_fallback_t fallback, + _PyXIData_t *xidata) { Py_ssize_t len = PyTuple_GET_SIZE(obj); if (len < 0) { @@ -636,7 +642,7 @@ _tuple_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) int res = -1; if (!_Py_EnterRecursiveCallTstate(tstate, " while sharing a tuple")) { - res = _PyObject_GetXIData(tstate, item, xidata_i); + res = _PyObject_GetXIData(tstate, item, fallback, xidata_i); _Py_LeaveRecursiveCallTstate(tstate); } if (res < 0) { @@ -737,40 +743,48 @@ _PyFunction_GetXIData(PyThreadState *tstate, PyObject *func, static void _register_builtins_for_crossinterpreter_data(dlregistry_t *xidregistry) { +#define REGISTER(TYPE, GETDATA) \ + _xidregistry_add_type(xidregistry, (PyTypeObject *)TYPE, \ + ((_PyXIData_getdata_t){.basic=(GETDATA)})) +#define REGISTER_FALLBACK(TYPE, GETDATA) \ + _xidregistry_add_type(xidregistry, (PyTypeObject *)TYPE, \ + ((_PyXIData_getdata_t){.fallback=(GETDATA)})) // None - if (_xidregistry_add_type(xidregistry, (PyTypeObject *)PyObject_Type(Py_None), _none_shared) != 0) { + if (REGISTER(Py_TYPE(Py_None), _none_shared) != 0) { Py_FatalError("could not register None for cross-interpreter sharing"); } // int - if (_xidregistry_add_type(xidregistry, &PyLong_Type, _long_shared) != 0) { + if (REGISTER(&PyLong_Type, _long_shared) != 0) { Py_FatalError("could not register int for cross-interpreter sharing"); } // bytes - if (_xidregistry_add_type(xidregistry, &PyBytes_Type, _PyBytes_GetXIData) != 0) { + if (REGISTER(&PyBytes_Type, _PyBytes_GetXIData) != 0) { Py_FatalError("could not register bytes for cross-interpreter sharing"); } // str - if (_xidregistry_add_type(xidregistry, &PyUnicode_Type, _str_shared) != 0) { + if (REGISTER(&PyUnicode_Type, _str_shared) != 0) { Py_FatalError("could not register str for cross-interpreter sharing"); } // bool - if (_xidregistry_add_type(xidregistry, &PyBool_Type, _bool_shared) != 0) { + if (REGISTER(&PyBool_Type, _bool_shared) != 0) { Py_FatalError("could not register bool for cross-interpreter sharing"); } // float - if (_xidregistry_add_type(xidregistry, &PyFloat_Type, _float_shared) != 0) { + if (REGISTER(&PyFloat_Type, _float_shared) != 0) { Py_FatalError("could not register float for cross-interpreter sharing"); } // tuple - if (_xidregistry_add_type(xidregistry, &PyTuple_Type, _tuple_shared) != 0) { + if (REGISTER_FALLBACK(&PyTuple_Type, _tuple_shared) != 0) { Py_FatalError("could not register tuple for cross-interpreter sharing"); } // For now, we do not register PyCode_Type or PyFunction_Type. +#undef REGISTER +#undef REGISTER_FALLBACK } From 15a8b5b9bd58a12f28aaee71f36f7224ebee42c0 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Wed, 21 May 2025 22:52:02 +0900 Subject: [PATCH 263/989] gh-110631: Fix some incorrect indents in the documentation (#129312) --- Doc/library/functions.rst | 76 ++++++++++++------------- Doc/library/importlib.resources.abc.rst | 48 ++++++++-------- Doc/library/signal.rst | 4 +- Doc/library/socket.rst | 10 ++-- 4 files changed, 69 insertions(+), 69 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7e367a0f2b6b25..2ecce3dba5a0b9 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1154,44 +1154,44 @@ are always available. They are listed here in alphabetical order. .. function:: locals() - Return a mapping object representing the current local symbol table, with - variable names as the keys, and their currently bound references as the - values. - - At module scope, as well as when using :func:`exec` or :func:`eval` with - a single namespace, this function returns the same namespace as - :func:`globals`. - - At class scope, it returns the namespace that will be passed to the - metaclass constructor. - - When using ``exec()`` or ``eval()`` with separate local and global - arguments, it returns the local namespace passed in to the function call. - - In all of the above cases, each call to ``locals()`` in a given frame of - execution will return the *same* mapping object. Changes made through - the mapping object returned from ``locals()`` will be visible as assigned, - reassigned, or deleted local variables, and assigning, reassigning, or - deleting local variables will immediately affect the contents of the - returned mapping object. - - In an :term:`optimized scope` (including functions, generators, and - coroutines), each call to ``locals()`` instead returns a fresh dictionary - containing the current bindings of the function's local variables and any - nonlocal cell references. In this case, name binding changes made via the - returned dict are *not* written back to the corresponding local variables - or nonlocal cell references, and assigning, reassigning, or deleting local - variables and nonlocal cell references does *not* affect the contents - of previously returned dictionaries. - - Calling ``locals()`` as part of a comprehension in a function, generator, or - coroutine is equivalent to calling it in the containing scope, except that - the comprehension's initialised iteration variables will be included. In - other scopes, it behaves as if the comprehension were running as a nested - function. - - Calling ``locals()`` as part of a generator expression is equivalent to - calling it in a nested generator function. + Return a mapping object representing the current local symbol table, with + variable names as the keys, and their currently bound references as the + values. + + At module scope, as well as when using :func:`exec` or :func:`eval` with + a single namespace, this function returns the same namespace as + :func:`globals`. + + At class scope, it returns the namespace that will be passed to the + metaclass constructor. + + When using ``exec()`` or ``eval()`` with separate local and global + arguments, it returns the local namespace passed in to the function call. + + In all of the above cases, each call to ``locals()`` in a given frame of + execution will return the *same* mapping object. Changes made through + the mapping object returned from ``locals()`` will be visible as assigned, + reassigned, or deleted local variables, and assigning, reassigning, or + deleting local variables will immediately affect the contents of the + returned mapping object. + + In an :term:`optimized scope` (including functions, generators, and + coroutines), each call to ``locals()`` instead returns a fresh dictionary + containing the current bindings of the function's local variables and any + nonlocal cell references. In this case, name binding changes made via the + returned dict are *not* written back to the corresponding local variables + or nonlocal cell references, and assigning, reassigning, or deleting local + variables and nonlocal cell references does *not* affect the contents + of previously returned dictionaries. + + Calling ``locals()`` as part of a comprehension in a function, generator, or + coroutine is equivalent to calling it in the containing scope, except that + the comprehension's initialised iteration variables will be included. In + other scopes, it behaves as if the comprehension were running as a nested + function. + + Calling ``locals()`` as part of a generator expression is equivalent to + calling it in a nested generator function. .. versionchanged:: 3.12 The behaviour of ``locals()`` in a comprehension has been updated as diff --git a/Doc/library/importlib.resources.abc.rst b/Doc/library/importlib.resources.abc.rst index 7a77466bcbaf27..8253a33f591a0b 100644 --- a/Doc/library/importlib.resources.abc.rst +++ b/Doc/library/importlib.resources.abc.rst @@ -49,44 +49,44 @@ .. method:: open_resource(resource) :abstractmethod: - Returns an opened, :term:`file-like object` for binary reading - of the *resource*. + Returns an opened, :term:`file-like object` for binary reading + of the *resource*. - If the resource cannot be found, :exc:`FileNotFoundError` is - raised. + If the resource cannot be found, :exc:`FileNotFoundError` is + raised. .. method:: resource_path(resource) :abstractmethod: - Returns the file system path to the *resource*. + Returns the file system path to the *resource*. - If the resource does not concretely exist on the file system, - raise :exc:`FileNotFoundError`. + If the resource does not concretely exist on the file system, + raise :exc:`FileNotFoundError`. .. method:: is_resource(name) :abstractmethod: - Returns ``True`` if the named *name* is considered a resource. - :exc:`FileNotFoundError` is raised if *name* does not exist. + Returns ``True`` if the named *name* is considered a resource. + :exc:`FileNotFoundError` is raised if *name* does not exist. .. method:: contents() :abstractmethod: - Returns an :term:`iterable` of strings over the contents of - the package. Do note that it is not required that all names - returned by the iterator be actual resources, e.g. it is - acceptable to return names for which :meth:`is_resource` would - be false. - - Allowing non-resource names to be returned is to allow for - situations where how a package and its resources are stored - are known a priori and the non-resource names would be useful. - For instance, returning subdirectory names is allowed so that - when it is known that the package and resources are stored on - the file system then those subdirectory names can be used - directly. - - The abstract method returns an iterable of no items. + Returns an :term:`iterable` of strings over the contents of + the package. Do note that it is not required that all names + returned by the iterator be actual resources, e.g. it is + acceptable to return names for which :meth:`is_resource` would + be false. + + Allowing non-resource names to be returned is to allow for + situations where how a package and its resources are stored + are known a priori and the non-resource names would be useful. + For instance, returning subdirectory names is allowed so that + when it is known that the package and resources are stored on + the file system then those subdirectory names can be used + directly. + + The abstract method returns an iterable of no items. .. class:: Traversable diff --git a/Doc/library/signal.rst b/Doc/library/signal.rst index c28841dbb8cfc8..b0307d3dea1170 100644 --- a/Doc/library/signal.rst +++ b/Doc/library/signal.rst @@ -211,8 +211,8 @@ The variables defined in the :mod:`signal` module are: .. data:: SIGSTKFLT - Stack fault on coprocessor. The Linux kernel does not raise this signal: it - can only be raised in user space. + Stack fault on coprocessor. The Linux kernel does not raise this signal: it + can only be raised in user space. .. availability:: Linux. diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 8fd5187e3a4a36..75fd637045d7d8 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -362,10 +362,10 @@ Exceptions Constants ^^^^^^^^^ - The AF_* and SOCK_* constants are now :class:`AddressFamily` and - :class:`SocketKind` :class:`.IntEnum` collections. +The AF_* and SOCK_* constants are now :class:`AddressFamily` and +:class:`SocketKind` :class:`.IntEnum` collections. - .. versionadded:: 3.4 +.. versionadded:: 3.4 .. data:: AF_UNIX AF_INET @@ -773,9 +773,9 @@ Constants Constant to optimize CPU locality, to be used in conjunction with :data:`SO_REUSEPORT`. - .. versionadded:: 3.11 + .. versionadded:: 3.11 - .. availability:: Linux >= 3.9 + .. availability:: Linux >= 3.9 .. data:: SO_REUSEPORT_LB From 109f7597d29f0b504e9b42cb398b44177157abaa Mon Sep 17 00:00:00 2001 From: Christian Harries <68507104+ChristianHrs@users.noreply.github.com> Date: Wed, 21 May 2025 14:59:09 +0100 Subject: [PATCH 264/989] gh-90871: fix connection backlog offset in asyncio (gh-134392) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Langa Co-authored-by: Kumar Aditya --- Lib/asyncio/selector_events.py | 2 +- Lib/test/test_asyncio/test_selector_events.py | 16 ++++++++++++++-- ...2025-05-20-21-45-58.gh-issue-90871.Gkvtp6.rst | 2 ++ 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-20-21-45-58.gh-issue-90871.Gkvtp6.rst diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index 22147451fa7ebd..6ad84044adf146 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -173,7 +173,7 @@ def _accept_connection( # listening socket has triggered an EVENT_READ. There may be multiple # connections waiting for an .accept() so it is called in a loop. # See https://bugs.python.org/issue27906 for more details. - for _ in range(backlog): + for _ in range(backlog + 1): try: conn, addr = sock.accept() if self._debug: diff --git a/Lib/test/test_asyncio/test_selector_events.py b/Lib/test/test_asyncio/test_selector_events.py index de81936b7456f2..aab6a779170eb9 100644 --- a/Lib/test/test_asyncio/test_selector_events.py +++ b/Lib/test/test_asyncio/test_selector_events.py @@ -347,6 +347,18 @@ def test_process_events_write_cancelled(self): selectors.EVENT_WRITE)]) self.loop._remove_writer.assert_called_with(1) + def test_accept_connection_zero_one(self): + for backlog in [0, 1]: + sock = mock.Mock() + sock.accept.return_value = (mock.Mock(), mock.Mock()) + with self.subTest(backlog): + mock_obj = mock.patch.object + with mock_obj(self.loop, '_accept_connection2') as accept2_mock: + self.loop._accept_connection( + mock.Mock(), sock, backlog=backlog) + self.loop.run_until_complete(asyncio.sleep(0)) + self.assertEqual(sock.accept.call_count, backlog + 1) + def test_accept_connection_multiple(self): sock = mock.Mock() sock.accept.return_value = (mock.Mock(), mock.Mock()) @@ -362,7 +374,7 @@ def test_accept_connection_multiple(self): self.loop._accept_connection( mock.Mock(), sock, backlog=backlog) self.loop.run_until_complete(asyncio.sleep(0)) - self.assertEqual(sock.accept.call_count, backlog) + self.assertEqual(sock.accept.call_count, backlog + 1) def test_accept_connection_skip_connectionabortederror(self): sock = mock.Mock() @@ -388,7 +400,7 @@ def mock_sock_accept(): # as in test_accept_connection_multiple avoid task pending # warnings by using asyncio.sleep(0) self.loop.run_until_complete(asyncio.sleep(0)) - self.assertEqual(sock.accept.call_count, backlog) + self.assertEqual(sock.accept.call_count, backlog + 1) class SelectorTransportTests(test_utils.TestCase): diff --git a/Misc/NEWS.d/next/Library/2025-05-20-21-45-58.gh-issue-90871.Gkvtp6.rst b/Misc/NEWS.d/next/Library/2025-05-20-21-45-58.gh-issue-90871.Gkvtp6.rst new file mode 100644 index 00000000000000..49397c9705ecfe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-20-21-45-58.gh-issue-90871.Gkvtp6.rst @@ -0,0 +1,2 @@ +Fixed an off by one error concerning the backlog parameter in +:meth:`~asyncio.loop.create_unix_server`. Contributed by Christian Harries. From c7364f79b2fb01c251e22115875a46a2ec134dcd Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 21 May 2025 16:01:52 +0200 Subject: [PATCH 265/989] gh-127833: lexical analysis: Improve section on Names (GH-131474) Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Co-authored-by: Blaise Pabon --- Doc/reference/lexical_analysis.rst | 128 +++++++++++++++++------------ Tools/unicode/makeunicodedata.py | 2 +- 2 files changed, 77 insertions(+), 53 deletions(-) diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index 001e2547fe8031..6c4a4ea81afe29 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -288,58 +288,81 @@ forms a legal token, when read from left to right. .. _identifiers: -Identifiers and keywords -======================== +Names (identifiers and keywords) +================================ .. index:: identifier, name -Identifiers (also referred to as *names*) are described by the following lexical -definitions. +:data:`~token.NAME` tokens represent *identifiers*, *keywords*, and +*soft keywords*. -The syntax of identifiers in Python is based on the Unicode standard annex -UAX-31, with elaboration and changes as defined below; see also :pep:`3131` for -further details. - -Within the ASCII range (U+0001..U+007F), the valid characters for identifiers -include the uppercase and lowercase letters ``A`` through -``Z``, the underscore ``_`` and, except for the first character, the digits +Within the ASCII range (U+0001..U+007F), the valid characters for names +include the uppercase and lowercase letters (``A-Z`` and ``a-z``), +the underscore ``_`` and, except for the first character, the digits ``0`` through ``9``. -Python 3.0 introduced additional characters from outside the ASCII range (see -:pep:`3131`). For these characters, the classification uses the version of the -Unicode Character Database as included in the :mod:`unicodedata` module. -Identifiers are unlimited in length. Case is significant. +Names must contain at least one character, but have no upper length limit. +Case is significant. -.. productionlist:: python-grammar - identifier: `xid_start` `xid_continue`* - id_start: - id_continue: - xid_start: - xid_continue: - -The Unicode category codes mentioned above stand for: - -* *Lu* - uppercase letters -* *Ll* - lowercase letters -* *Lt* - titlecase letters -* *Lm* - modifier letters -* *Lo* - other letters -* *Nl* - letter numbers -* *Mn* - nonspacing marks -* *Mc* - spacing combining marks -* *Nd* - decimal numbers -* *Pc* - connector punctuations -* *Other_ID_Start* - explicit list of characters in `PropList.txt - `_ to support backwards - compatibility -* *Other_ID_Continue* - likewise - -All identifiers are converted into the normal form NFKC while parsing; comparison -of identifiers is based on NFKC. - -A non-normative HTML file listing all valid identifier characters for Unicode -16.0.0 can be found at -https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt +Besides ``A-Z``, ``a-z``, ``_`` and ``0-9``, names can also use "letter-like" +and "number-like" characters from outside the ASCII range, as detailed below. + +All identifiers are converted into the `normalization form`_ NFKC while +parsing; comparison of identifiers is based on NFKC. + +Formally, the first character of a normalized identifier must belong to the +set ``id_start``, which is the union of: + +* Unicode category ```` - uppercase letters (includes ``A`` to ``Z``) +* Unicode category ```` - lowercase letters (includes ``a`` to ``z``) +* Unicode category ```` - titlecase letters +* Unicode category ```` - modifier letters +* Unicode category ```` - other letters +* Unicode category ```` - letter numbers +* {``"_"``} - the underscore +* ```` - an explicit set of characters in `PropList.txt`_ + to support backwards compatibility + +The remaining characters must belong to the set ``id_continue``, which is the +union of: + +* all characters in ``id_start`` +* Unicode category ```` - decimal numbers (includes ``0`` to ``9``) +* Unicode category ```` - connector punctuations +* Unicode category ```` - nonspacing marks +* Unicode category ```` - spacing combining marks +* ```` - another explicit set of characters in + `PropList.txt`_ to support backwards compatibility + +Unicode categories use the version of the Unicode Character Database as +included in the :mod:`unicodedata` module. + +These sets are based on the Unicode standard annex `UAX-31`_. +See also :pep:`3131` for further details. + +Even more formally, names are described by the following lexical definitions: + +.. grammar-snippet:: + :group: python-grammar + + NAME: `xid_start` `xid_continue`* + id_start: | | | | | | "_" | + id_continue: `id_start` | | | | | + xid_start: + xid_continue: + identifier: <`NAME`, except keywords> + +A non-normative listing of all valid identifier characters as defined by +Unicode is available in the `DerivedCoreProperties.txt`_ file in the Unicode +Character Database. + + +.. _UAX-31: https://www.unicode.org/reports/tr31/ +.. _PropList.txt: https://www.unicode.org/Public/16.0.0/ucd/PropList.txt +.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt +.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms .. _keywords: @@ -351,7 +374,7 @@ Keywords single: keyword single: reserved word -The following identifiers are used as reserved words, or *keywords* of the +The following names are used as reserved words, or *keywords* of the language, and cannot be used as ordinary identifiers. They must be spelled exactly as written here: @@ -375,18 +398,19 @@ Soft Keywords .. versionadded:: 3.10 -Some identifiers are only reserved under specific contexts. These are known as -*soft keywords*. The identifiers ``match``, ``case``, ``type`` and ``_`` can -syntactically act as keywords in certain contexts, +Some names are only reserved under specific contexts. These are known as +*soft keywords*: + +- ``match``, ``case``, and ``_``, when used in the :keyword:`match` statement. +- ``type``, when used in the :keyword:`type` statement. + +These syntactically act as keywords in their specific contexts, but this distinction is done at the parser level, not when tokenizing. As soft keywords, their use in the grammar is possible while still preserving compatibility with existing code that uses these names as identifier names. -``match``, ``case``, and ``_`` are used in the :keyword:`match` statement. -``type`` is used in the :keyword:`type` statement. - .. versionchanged:: 3.12 ``type`` is now a soft keyword. diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index 889ae8fc869b8a..d4cca68c3e3e71 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -43,7 +43,7 @@ # When changing UCD version please update # * Doc/library/stdtypes.rst, and # * Doc/library/unicodedata.rst -# * Doc/reference/lexical_analysis.rst (two occurrences) +# * Doc/reference/lexical_analysis.rst (three occurrences) UNIDATA_VERSION = "16.0.0" UNICODE_DATA = "UnicodeData%s.txt" COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" From 4eacf3883dd041c31133ea407204b797a17559b1 Mon Sep 17 00:00:00 2001 From: Yash Vijay Date: Wed, 21 May 2025 19:39:28 +0530 Subject: [PATCH 266/989] gh-134026: Fix grammar description of for statement (GH-134034) --- Doc/reference/compound_stmts.rst | 14 +++++++------- Doc/reference/expressions.rst | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst index 5d4298f70e0e14..e95fa3a6424e23 100644 --- a/Doc/reference/compound_stmts.rst +++ b/Doc/reference/compound_stmts.rst @@ -154,15 +154,15 @@ The :keyword:`for` statement is used to iterate over the elements of a sequence (such as a string, tuple or list) or other iterable object: .. productionlist:: python-grammar - for_stmt: "for" `target_list` "in" `starred_list` ":" `suite` + for_stmt: "for" `target_list` "in" `starred_expression_list` ":" `suite` : ["else" ":" `suite`] -The ``starred_list`` expression is evaluated once; it should yield an -:term:`iterable` object. An :term:`iterator` is created for that iterable. -The first item provided -by the iterator is then assigned to the target list using the standard -rules for assignments (see :ref:`assignment`), and the suite is executed. This -repeats for each item provided by the iterator. When the iterator is exhausted, +The :token:`~python-grammar:starred_expression_list` expression is evaluated +once; it should yield an :term:`iterable` object. An :term:`iterator` is +created for that iterable. The first item provided by the iterator is then +assigned to the target list using the standard rules for assignments +(see :ref:`assignment`), and the suite is executed. This repeats for each +item provided by the iterator. When the iterator is exhausted, the suite in the :keyword:`!else` clause, if present, is executed, and the loop terminates. diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 8837344e5ddca1..2a550b504ca765 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1928,7 +1928,7 @@ Expression lists single: , (comma); expression list .. productionlist:: python-grammar - starred_expression: ["*"] `or_expr` + starred_expression: "*" `or_expr` | `expression` flexible_expression: `assignment_expression` | `starred_expression` flexible_expression_list: `flexible_expression` ("," `flexible_expression`)* [","] starred_expression_list: `starred_expression` ("," `starred_expression`)* [","] From d862b6de1bcff01229318d7138ddaab755723a43 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Wed, 21 May 2025 10:18:21 -0400 Subject: [PATCH 267/989] gh-132983: Add documentation for compression.zstd (GH-133911) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add documentation for compression & compression.zstd. 🎉 --------- Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Sumana Harihareswara Co-authored-by: Martin Panter --- Doc/library/archiving.rst | 4 +- Doc/library/compression.rst | 18 + Doc/library/compression.zstd.rst | 840 +++++++++++++++++++++++++++++++ 3 files changed, 861 insertions(+), 1 deletion(-) create mode 100644 Doc/library/compression.rst create mode 100644 Doc/library/compression.zstd.rst diff --git a/Doc/library/archiving.rst b/Doc/library/archiving.rst index c9284949af4972..da0b3f8c3e7693 100644 --- a/Doc/library/archiving.rst +++ b/Doc/library/archiving.rst @@ -5,13 +5,15 @@ Data Compression and Archiving ****************************** The modules described in this chapter support data compression with the zlib, -gzip, bzip2 and lzma algorithms, and the creation of ZIP- and tar-format +gzip, bzip2, lzma, and zstd algorithms, and the creation of ZIP- and tar-format archives. See also :ref:`archiving-operations` provided by the :mod:`shutil` module. .. toctree:: + compression.rst + compression.zstd.rst zlib.rst gzip.rst bz2.rst diff --git a/Doc/library/compression.rst b/Doc/library/compression.rst new file mode 100644 index 00000000000000..618b4a3c2bd170 --- /dev/null +++ b/Doc/library/compression.rst @@ -0,0 +1,18 @@ +The :mod:`!compression` package +=============================== + +.. versionadded:: 3.14 + +The :mod:`!compression` package contains the canonical compression modules +containing interfaces to several different compression algorithms. Some of +these modules have historically been available as separate modules; those will +continue to be available under their original names for compatibility reasons, +and will not be removed without a deprecation cycle. The use of modules in +:mod:`!compression` is encouraged where practical. + +* :mod:`!compression.bz2` -- Re-exports :mod:`bz2` +* :mod:`!compression.gzip` -- Re-exports :mod:`gzip` +* :mod:`!compression.lzma` -- Re-exports :mod:`lzma` +* :mod:`!compression.zlib` -- Re-exports :mod:`zlib` +* :mod:`compression.zstd` -- Wrapper for the Zstandard compression library + diff --git a/Doc/library/compression.zstd.rst b/Doc/library/compression.zstd.rst new file mode 100644 index 00000000000000..1e1802155a19ec --- /dev/null +++ b/Doc/library/compression.zstd.rst @@ -0,0 +1,840 @@ +:mod:`!compression.zstd` --- Compression compatible with the Zstandard format +============================================================================= + +.. module:: compression.zstd + :synopsis: Low-level interface to compression and decompression routines in + the zstd library. + +.. versionadded:: 3.14 + +**Source code:** :source:`Lib/compression/zstd/__init__.py` + +-------------- + +This module provides classes and functions for compressing and decompressing +data using the Zstandard (or *zstd*) compression algorithm. The +`zstd manual `__ +describes Zstandard as "a fast lossless compression algorithm, targeting +real-time compression scenarios at zlib-level and better compression ratios." +Also included is a file interface that supports reading and writing the +contents of ``.zst`` files created by the :program:`zstd` utility, as well as +raw zstd compressed streams. + +The :mod:`!compression.zstd` module contains: + +* The :func:`.open` function and :class:`ZstdFile` class for reading and + writing compressed files. +* The :class:`ZstdCompressor` and :class:`ZstdDecompressor` classes for + incremental (de)compression. +* The :func:`compress` and :func:`decompress` functions for one-shot + (de)compression. +* The :func:`train_dict` and :func:`finalize_dict` functions and the + :class:`ZstdDict` class to train and manage Zstandard dictionaries. +* The :class:`CompressionParameter`, :class:`DecompressionParameter`, and + :class:`Strategy` classes for setting advanced (de)compression parameters. + + +Exceptions +---------- + +.. exception:: ZstdError + + This exception is raised when an error occurs during compression or + decompression, or while initializing the (de)compressor state. + + +Reading and writing compressed files +------------------------------------ + +.. function:: open(file, /, mode='rb', *, level=None, options=None, \ + zstd_dict=None, encoding=None, errors=None, newline=None) + + Open a Zstandard-compressed file in binary or text mode, returning a + :term:`file object`. + + The *file* argument can be either a file name (given as a + :class:`str`, :class:`bytes` or :term:`path-like ` + object), in which case the named file is opened, or it can be an existing + file object to read from or write to. + + The mode argument can be either ``'rb'`` for reading (default), ``'wb'`` for + overwriting, ``'ab'`` for appending, or ``'xb'`` for exclusive creation. + These can equivalently be given as ``'r'``, ``'w'``, ``'a'``, and ``'x'`` + respectively. You may also open in text mode with ``'rt'``, ``'wt'``, + ``'at'``, and ``'xt'`` respectively. + + When reading, the *options* argument can be a dictionary providing advanced + decompression parameters; see :class:`DecompressionParameter` for detailed + information about supported + parameters. The *zstd_dict* argument is a :class:`ZstdDict` instance to be + used during decompression. When reading, if the *level* + argument is not None, a :exc:`!TypeError` will be raised. + + When writing, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`CompressionParameter` for detailed information about supported + parameters. The *level* argument is the compression level to use when + writing compressed data. Only one of *level* or *options* may be non-None. + The *zstd_dict* argument is a :class:`ZstdDict` instance to be used during + compression. + + In binary mode, this function is equivalent to the :class:`ZstdFile` + constructor: ``ZstdFile(file, mode, ...)``. In this case, the + *encoding*, *errors*, and *newline* parameters must not be provided. + + In text mode, a :class:`ZstdFile` object is created, and wrapped in an + :class:`io.TextIOWrapper` instance with the specified encoding, error + handling behavior, and line endings. + + +.. class:: ZstdFile(file, /, mode='rb', *, level=None, options=None, \ + zstd_dict=None) + + Open a Zstandard-compressed file in binary mode. + + A :class:`ZstdFile` can wrap an already-open :term:`file object`, or operate + directly on a named file. The *file* argument specifies either the file + object to wrap, or the name of the file to open (as a :class:`str`, + :class:`bytes` or :term:`path-like ` object). If + wrapping an existing file object, the wrapped file will not be closed when + the :class:`ZstdFile` is closed. + + The *mode* argument can be either ``'rb'`` for reading (default), ``'wb'`` + for overwriting, ``'xb'`` for exclusive creation, or ``'ab'`` for appending. + These can equivalently be given as ``'r'``, ``'w'``, ``'x'`` and ``'a'`` + respectively. + + If *file* is a file object (rather than an actual file name), a mode of + ``'w'`` does not truncate the file, and is instead equivalent to ``'a'``. + + When reading, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`DecompressionParameter` for detailed information about supported + parameters. The *zstd_dict* argument is a :class:`ZstdDict` instance to be + used during decompression. When reading, if the *level* + argument is not None, a :exc:`!TypeError` will be raised. + + When writing, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`CompressionParameter` for detailed information about supported + parameters. The *level* argument is the compression level to use when + writing compressed data. Only one of *level* or *options* may be passed. The + *zstd_dict* argument is a :class:`ZstdDict` instance to be used during + compression. + + :class:`!ZstdFile` supports all the members specified by + :class:`io.BufferedIOBase`, except for :meth:`~io.BufferedIOBase.detach` + and :meth:`~io.IOBase.truncate`. + Iteration and the :keyword:`with` statement are supported. + + The following method and attributes are also provided: + + .. method:: peek(size=-1) + + Return buffered data without advancing the file position. At least one + byte of data will be returned, unless EOF has been reached. The exact + number of bytes returned is unspecified (the *size* argument is ignored). + + .. note:: While calling :meth:`peek` does not change the file position of + the :class:`ZstdFile`, it may change the position of the underlying + file object (for example, if the :class:`ZstdFile` was constructed by + passing a file object for *file*). + + .. attribute:: mode + + ``'rb'`` for reading and ``'wb'`` for writing. + + .. attribute:: name + + The name of the Zstandard file. Equivalent to the :attr:`~io.FileIO.name` + attribute of the underlying :term:`file object`. + + +Compressing and decompressing data in memory +-------------------------------------------- + +.. function:: compress(data, level=None, options=None, zstd_dict=None) + + Compress *data* (a :term:`bytes-like object`), returning the compressed + data as a :class:`bytes` object. + + The *level* argument is an integer controlling the level of + compression. *level* is an alternative to setting + :attr:`CompressionParameter.compression_level` in *options*. Use + :meth:`~CompressionParameter.bounds` on + :attr:`~CompressionParameter.compression_level` to get the values that can + be passed for *level*. If advanced compression options are needed, the + *level* argument must be omitted and in the *options* dictionary the + :attr:`!CompressionParameter.compression_level` parameter should be set. + + The *options* argument is a Python dictionary containing advanced + compression parameters. The valid keys and values for compression parameters + are documented as part of the :class:`CompressionParameter` documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data to improve compression efficiency. The + function :func:`train_dict` can be used to generate a Zstandard dictionary. + + +.. function:: decompress(data, zstd_dict=None, options=None) + + Decompress *data* (a :term:`bytes-like object`), returning the uncompressed + data as a :class:`bytes` object. + + The *options* argument is a Python dictionary containing advanced + decompression parameters. The valid keys and values for compression + parameters are documented as part of the :class:`DecompressionParameter` + documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data used during compression. This must be + the same Zstandard dictionary used during compression. + + If *data* is the concatenation of multiple distinct compressed frames, + decompress all of these frames, and return the concatenation of the results. + + +.. class:: ZstdCompressor(level=None, options=None, zstd_dict=None) + + Create a compressor object, which can be used to compress data + incrementally. + + For a more convenient way of compressing a single chunk of data, see the + module-level function :func:`compress`. + + The *level* argument is an integer controlling the level of + compression. *level* is an alternative to setting + :attr:`CompressionParameter.compression_level` in *options*. Use + :meth:`~CompressionParameter.bounds` on + :attr:`~CompressionParameter.compression_level` to get the values that can + be passed for *level*. If advanced compression options are needed, the + *level* argument must be omitted and in the *options* dictionary the + :attr:`!CompressionParameter.compression_level` parameter should be set. + + The *options* argument is a Python dictionary containing advanced + compression parameters. The valid keys and values for compression parameters + are documented as part of the :class:`CompressionParameter` documentation. + + The *zstd_dict* argument is an optional instance of :class:`ZstdDict` + containing trained data to improve compression efficiency. The + function :func:`train_dict` can be used to generate a Zstandard dictionary. + + + .. method:: compress(data, mode=ZstdCompressor.CONTINUE) + + Compress *data* (a :term:`bytes-like object`), returning a :class:`bytes` + object with compressed data if possible, or otherwise an empty + :class:`!bytes` object. Some of *data* may be buffered internally, for + use in later calls to :meth:`!compress` and :meth:`~.flush`. The returned + data should be concatenated with the output of any previous calls to + :meth:`~.compress`. + + The *mode* argument is a :class:`ZstdCompressor` attribute, either + :attr:`~.CONTINUE`, :attr:`~.FLUSH_BLOCK`, + or :attr:`~.FLUSH_FRAME`. + + When all data has been provided to the compressor, call the + :meth:`~.flush` method to finish the compression process. If + :meth:`~.compress` is called with *mode* set to :attr:`~.FLUSH_FRAME`, + :meth:`~.flush` should not be called, as it would write out a new empty + frame. + + .. method:: flush(mode=ZstdCompressor.FLUSH_FRAME) + + Finish the compression process, returning a :class:`bytes` object + containing any data stored in the compressor's internal buffers. + + The *mode* argument is a :class:`ZstdCompressor` attribute, either + :attr:`~.FLUSH_BLOCK`, or :attr:`~.FLUSH_FRAME`. + + .. attribute:: CONTINUE + + Collect more data for compression, which may or may not generate output + immediately. This mode optimizes the compression ratio by maximizing the + amount of data per block and frame. + + .. attribute:: FLUSH_BLOCK + + Complete and write a block to the data stream. The data returned so far + can be immediately decompressed. Past data can still be referenced in + future blocks generated by calls to :meth:`~.compress`, + improving compression. + + .. attribute:: FLUSH_FRAME + + Complete and write out a frame. Future data provided to + :meth:`~.compress` will be written into a new frame and + *cannot* reference past data. + + +.. class:: ZstdDecompressor(zstd_dict=None, options=None) + + Create a decompressor object, which can be used to decompress data + incrementally. + + For a more convenient way of decompressing an entire compressed stream at + once, see the module-level function :func:`decompress`. + + The *options* argument is a Python dictionary containing advanced + decompression parameters. The valid keys and values for compression + parameters are documented as part of the :class:`DecompressionParameter` + documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data used during compression. This must be + the same Zstandard dictionary used during compression. + + .. note:: + This class does not transparently handle inputs containing multiple + compressed frames, unlike the :func:`decompress` function and + :class:`ZstdFile` class. To decompress a multi-frame input, you should + use :func:`decompress`, :class:`ZstdFile` if working with a + :term:`file object`, or multiple :class:`!ZstdDecompressor` instances. + + .. method:: decompress(data, max_length=-1) + + Decompress *data* (a :term:`bytes-like object`), returning + uncompressed data as bytes. Some of *data* may be buffered + internally, for use in later calls to :meth:`!decompress`. + The returned data should be concatenated with the output of any previous + calls to :meth:`!decompress`. + + If *max_length* is non-negative, the method returns at most *max_length* + bytes of decompressed data. If this limit is reached and further + output can be produced, the :attr:`~.needs_input` attribute will + be set to ``False``. In this case, the next call to + :meth:`~.decompress` may provide *data* as ``b''`` to obtain + more of the output. + + If all of the input data was decompressed and returned (either + because this was less than *max_length* bytes, or because + *max_length* was negative), the :attr:`~.needs_input` attribute + will be set to ``True``. + + Attempting to decompress data after the end of a frame will raise a + :exc:`ZstdError`. Any data found after the end of the frame is ignored + and saved in the :attr:`~.unused_data` attribute. + + .. attribute:: eof + + ``True`` if the end-of-stream marker has been reached. + + .. attribute:: unused_data + + Data found after the end of the compressed stream. + + Before the end of the stream is reached, this will be ``b''``. + + .. attribute:: needs_input + + ``False`` if the :meth:`.decompress` method can provide more + decompressed data before requiring new compressed input. + + +Zstandard dictionaries +---------------------- + + +.. function:: train_dict(samples, dict_size) + + Train a Zstandard dictionary, returning a :class:`ZstdDict` instance. + Zstandard dictionaries enable more efficient compression of smaller sizes + of data, which is traditionally difficult to compress due to less + repetition. If you are compressing multiple similar groups of data (such as + similar files), Zstandard dictionaries can improve compression ratios and + speed significantly. + + The *samples* argument (an iterable of :class:`bytes` objects), is the + population of samples used to train the Zstandard dictionary. + + The *dict_size* argument, an integer, is the maximum size (in bytes) the + Zstandard dictionary should be. The Zstandard documentation suggests an + absolute maximum of no more than 100 KB, but the maximum can often be smaller + depending on the data. Larger dictionaries generally slow down compression, + but improve compression ratios. Smaller dictionaries lead to faster + compression, but reduce the compression ratio. + + +.. function:: finalize_dict(zstd_dict, /, samples, dict_size, level) + + An advanced function for converting a "raw content" Zstandard dictionary into + a regular Zstandard dictionary. "Raw content" dictionaries are a sequence of + bytes that do not need to follow the structure of a normal Zstandard + dictionary. + + The *zstd_dict* argument is a :class:`ZstdDict` instance with + the :attr:`~ZstdDict.dict_content` containing the raw dictionary contents. + + The *samples* argument (an iterable of :class:`bytes` objects), contains + sample data for generating the Zstandard dictionary. + + The *dict_size* argument, an integer, is the maximum size (in bytes) the + Zstandard dictionary should be. See :func:`train_dict` for + suggestions on the maximum dictionary size. + + The *level* argument (an integer) is the compression level expected to be + passed to the compressors using this dictionary. The dictionary information + varies for each compression level, so tuning for the proper compression + level can make compression more efficient. + + +.. class:: ZstdDict(dict_content, /, *, is_raw=False) + + A wrapper around Zstandard dictionaries. Dictionaries can be used to improve + the compression of many small chunks of data. Use :func:`train_dict` if you + need to train a new dictionary from sample data. + + The *dict_content* argument (a :term:`bytes-like object`), is the already + trained dictionary information. + + The *is_raw* argument, a boolean, is an advanced parameter controlling the + meaning of *dict_content*. ``True`` means *dict_content* is a "raw content" + dictionary, without any format restrictions. ``False`` means *dict_content* + is an ordinary Zstandard dictionary, created from Zstandard functions, + for example, :func:`train_dict` or the external :program:`zstd` CLI. + + When passing a :class:`!ZstdDict` to a function, the + :attr:`!as_digested_dict` and :attr:`!as_undigested_dict` attributes can + control how the dictionary is loaded by passing them as the ``zstd_dict`` + argument, for example, ``compress(data, zstd_dict=zd.as_digested_dict)``. + Digesting a dictionary is a costly operation that occurs when loading a + Zstandard dictionary. When making multiple calls to compression or + decompression, passing a digested dictionary will reduce the overhead of + loading the dictionary. + + .. list-table:: Difference for compression + :widths: 10 14 10 + :header-rows: 1 + + * - + - Digested dictionary + - Undigested dictionary + * - Advanced parameters of the compressor which may be overridden by + the dictionary's parameters + - ``window_log``, ``hash_log``, ``chain_log``, ``search_log``, + ``min_match``, ``target_length``, ``strategy``, + ``enable_long_distance_matching``, ``ldm_hash_log``, + ``ldm_min_match``, ``ldm_bucket_size_log``, ``ldm_hash_rate_log``, + and some non-public parameters. + - None + * - :class:`!ZstdDict` internally caches the dictionary + - Yes. It's faster when loading a digested dictionary again with the + same compression level. + - No. If you wish to load an undigested dictionary multiple times, + consider reusing a compressor object. + + If passing a :class:`!ZstdDict` without any attribute, an undigested + dictionary is passed by default when compressing and a digested dictionary + is generated if necessary and passed by default when decompressing. + + .. attribute:: dict_content + + The content of the Zstandard dictionary, a ``bytes`` object. It's the + same as the *dict_content* argument in the ``__init__`` method. It can + be used with other programs, such as the ``zstd`` CLI program. + + .. attribute:: dict_id + + Identifier of the Zstandard dictionary, a non-negative int value. + + Non-zero means the dictionary is ordinary, created by Zstandard + functions and following the Zstandard format. + + ``0`` means a "raw content" dictionary, free of any format restriction, + used for advanced users. + + .. note:: + + The meaning of ``0`` for :attr:`!ZstdDict.dict_id` is different + from the ``dictionary_id`` attribute to the :func:`get_frame_info` + function. + + .. attribute:: as_digested_dict + + Load as a digested dictionary. + + .. attribute:: as_undigested_dict + + Load as an undigested dictionary. + + +Advanced parameter control +-------------------------- + +.. class:: CompressionParameter() + + An :class:`~enum.IntEnum` containing the advanced compression parameter + keys that can be used when compressing data. + + The :meth:`~.bounds` method can be used on any attribute to get the valid + values for that parameter. + + Parameters are optional; any omitted parameter will have it's value selected + automatically. + + Example getting the lower and upper bound of :attr:`~.compression_level`:: + + lower, upper = CompressionParameter.compression_level.bounds() + + Example setting the :attr:`~.window_log` to the maximum size:: + + _lower, upper = CompressionParameter.window_log.bounds() + options = {CompressionParameter.window_log: upper} + compress(b'venezuelan beaver cheese', options=options) + + .. method:: bounds() + + Return the tuple of int bounds, ``(lower, upper)``, of a compression + parameter. This method should be called on the attribute you wish to + retrieve the bounds of. For example, to get the valid values for + :attr:`~.compression_level`, one may check the result of + ``CompressionParameter.compression_level.bounds()``. + + Both the lower and upper bounds are inclusive. + + .. attribute:: compression_level + + A high-level means of setting other compression parameters that affect + the speed and ratio of compressing data. Setting the level to zero uses + :attr:`COMPRESSION_LEVEL_DEFAULT`. + + .. attribute:: window_log + + Maximum allowed back-reference distance the compressor can use when + compressing data, expressed as power of two, ``1 << window_log`` bytes. + This parameter greatly influences the memory usage of compression. Higher + values require more memory but gain better compression values. + + A value of zero causes the value to be selected automatically. + + .. attribute:: hash_log + + Size of the initial probe table, as a power of two. The resulting memory + usage is ``1 << (hash_log+2)`` bytes. Larger tables improve compression + ratio of strategies <= :attr:`~Strategy.dfast`, and improve compression + speed of strategies > :attr:`~Strategy.dfast`. + + A value of zero causes the value to be selected automatically. + + .. attribute:: chain_log + + Size of the multi-probe search table, as a power of two. The resulting + memory usage is ``1 << (chain_log+2)`` bytes. Larger tables result in + better and slower compression. This parameter has no effect for the + :attr:`~Strategy.fast` strategy. It's still useful when using + :attr:`~Strategy.dfast` strategy, in which case it defines a secondary + probe table. + + A value of zero causes the value to be selected automatically. + + .. attribute:: search_log + + Number of search attempts, as a power of two. More attempts result in + better and slower compression. This parameter is useless for + :attr:`~Strategy.fast` and :attr:`~Strategy.dfast` strategies. + + A value of zero causes the value to be selected automatically. + + .. attribute:: min_match + + Minimum size of searched matches. Larger values increase compression and + decompression speed, but decrease ratio. Note that Zstandard can still + find matches of smaller size, it just tweaks its search algorithm to look + for this size and larger. For all strategies < :attr:`~Strategy.btopt`, + the effective minimum is ``4``; for all strategies + > :attr:`~Strategy.fast`, the effective maximum is ``6``. + + A value of zero causes the value to be selected automatically. + + .. attribute:: target_length + + The impact of this field depends on the selected :class:`Strategy`. + + For strategies :attr:`~Strategy.btopt`, :attr:`~Strategy.btultra` and + :attr:`~Strategy.btultra2`, the value is the length of a match + considered "good enough" to stop searching. Larger values make + compression ratios better, but compresses slower. + + For strategy :attr:`~Strategy.fast`, it is the distance between match + sampling. Larger values make compression faster, but with a worse + compression ratio. + + A value of zero causes the value to be selected automatically. + + .. attribute:: strategy + + The higher the value of selected strategy, the more complex the + compression technique used by zstd, resulting in higher compression + ratios but slower compression. + + .. seealso:: :class:`Strategy` + + .. attribute:: enable_long_distance_matching + + Long distance matching can be used to improve compression for large + inputs by finding large matches at greater distances. It increases memory + usage and window size. + + ``True`` or ``1`` enable long distance matching while ``False`` or ``0`` + disable it. + + Enabling this parameter increases default + :attr:`~CompressionParameter.window_log` to 128 MiB except when expressly + set to a different value. This setting is enabled by default if + :attr:`!window_log` >= 128 MiB and the compression + strategy >= :attr:`~Strategy.btopt` (compression level 16+). + + .. attribute:: ldm_hash_log + + Size of the table for long distance matching, as a power of two. Larger + values increase memory usage and compression ratio, but decrease + compression speed. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_min_match + + Minimum match size for long distance matcher. Larger or too small values + can often decrease the compression ratio. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_bucket_size_log + + Log size of each bucket in the long distance matcher hash table for + collision resolution. Larger values improve collision resolution but + decrease compression speed. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_hash_rate_log + + Frequency of inserting/looking up entries into the long distance matcher + hash table. Larger values improve compression speed. Deviating far from + the default value will likely result in a compression ratio decrease. + + A value of zero causes the value to be selected automatically. + + .. attribute:: checksum_flag + + A four-byte checksum using XXHash64 of the uncompressed content is + written at the end of each frame. Zstandard's decompression code verifies + the checksum. If there is a mismatch a :class:`ZstdError` exception is + raised. + + ``True`` or ``1`` enable checksum generation while ``False`` or ``0`` + disable it. + + .. attribute:: dict_id_flag + + When compressing with a :class:`ZstdDict`, the dictionary's ID is written + into the frame header. + + ``True`` or ``1`` enable storing the dictionary ID while ``False`` or + ``0`` disable it. + + .. attribute:: nb_workers + + Select how many threads will be spawned to compress in parallel. When + :attr:`!nb_workers` > 0, enables multi-threaded compression, a value of + ``1`` means "one-thread multi-threaded mode". More workers improve speed, + but also increase memory usage and slightly reduce compression ratio. + + A value of zero disables multi-threading. + + .. attribute:: job_size + + Size of a compression job, in bytes. This value is enforced only when + :attr:`~CompressionParameter.nb_workers` >= 1. Each compression job is + completed in parallel, so this value can indirectly impact the number of + active threads. + + A value of zero causes the value to be selected automatically. + + .. attribute:: overlap_log + + Sets how much data is reloaded from previous jobs (threads) for new jobs + to be used by the look behind window during compression. This value is + only used when :attr:`~CompressionParameter.nb_workers` >= 1. Acceptable + values vary from 0 to 9. + + * 0 means dynamically set the overlap amount + * 1 means no overlap + * 9 means use a full window size from the previous job + + Each increment halves/doubles the overlap size. "8" means an overlap of + ``window_size/2``, "7" means an overlap of ``window_size/4``, etc. + +.. class:: DecompressionParameter() + + An :class:`~enum.IntEnum` containing the advanced decompression parameter + keys that can be used when decompressing data. Parameters are optional; any + omitted parameter will have it's value selected automatically. + + The :meth:`~.bounds` method can be used on any attribute to get the valid + values for that parameter. + + Example setting the :attr:`~.window_log_max` to the maximum size:: + + data = compress(b'Some very long buffer of bytes...') + + _lower, upper = DecompressionParameter.window_log_max.bounds() + + options = {DecompressionParameter.window_log_max: upper} + decompress(data, options=options) + + .. method:: bounds() + + Return the tuple of int bounds, ``(lower, upper)``, of a decompression + parameter. This method should be called on the attribute you wish to + retrieve the bounds of. + + Both the lower and upper bounds are inclusive. + + .. attribute:: window_log_max + + The base-two logarithm of the maximum size of the window used during + decompression. This can be useful to limit the amount of memory used when + decompressing data. A larger maximum window size leads to faster + decompression. + + A value of zero causes the value to be selected automatically. + + +.. class:: Strategy() + + An :class:`~enum.IntEnum` containing strategies for compression. + Higher-numbered strategies correspond to more complex and slower + compression. + + .. note:: + + The values of attributes of :class:`!Strategy` are not necessarily stable + across zstd versions. Only the ordering of the attributes may be relied + upon. The attributes are listed below in order. + + The following strategies are available: + + .. attribute:: fast + + .. attribute:: dfast + + .. attribute:: greedy + + .. attribute:: lazy + + .. attribute:: lazy2 + + .. attribute:: btlazy2 + + .. attribute:: btopt + + .. attribute:: btultra + + .. attribute:: btultra2 + + +Miscellaneous +------------- + +.. function:: get_frame_info(frame_buffer) + + Retrieve a :class:`FrameInfo` object containing metadata about a Zstandard + frame. Frames contain metadata related to the compressed data they hold. + + +.. class:: FrameInfo + + Metadata related to a Zstandard frame. + + .. attribute:: decompressed_size + + The size of the decompressed contents of the frame. + + .. attribute:: dictionary_id + + An integer representing the Zstandard dictionary ID needed for + decompressing the frame. ``0`` means the dictionary ID was not + recorded in the frame header. This may mean that a Zstandard dictionary + is not needed, or that the ID of a required dictionary was not recorded. + + +.. attribute:: COMPRESSION_LEVEL_DEFAULT + + The default compression level for Zstandard: ``3``. + + +.. attribute:: zstd_version_info + + Version number of the runtime zstd library as a tuple of integers + (major, minor, release). + + +Examples +-------- + +Reading in a compressed file: + +.. code-block:: python + + from compression import zstd + + with zstd.open("file.zst") as f: + file_content = f.read() + +Creating a compressed file: + +.. code-block:: python + + from compression import zstd + + data = b"Insert Data Here" + with zstd.open("file.zst", "w") as f: + f.write(data) + +Compressing data in memory: + +.. code-block:: python + + from compression import zstd + + data_in = b"Insert Data Here" + data_out = zstd.compress(data_in) + +Incremental compression: + +.. code-block:: python + + from compression import zstd + + comp = zstd.ZstdCompressor() + out1 = comp.compress(b"Some data\n") + out2 = comp.compress(b"Another piece of data\n") + out3 = comp.compress(b"Even more data\n") + out4 = comp.flush() + # Concatenate all the partial results: + result = b"".join([out1, out2, out3, out4]) + +Writing compressed data to an already-open file: + +.. code-block:: python + + from compression import zstd + + with open("myfile", "wb") as f: + f.write(b"This data will not be compressed\n") + with zstd.open(f, "w") as zstf: + zstf.write(b"This *will* be compressed\n") + f.write(b"Not compressed\n") + +Creating a compressed file using compression parameters: + +.. code-block:: python + + from compression import zstd + + options = { + zstd.CompressionParameter.checksum_flag: 1 + } + with zstd.open("file.zst", "w", options=options) as f: + f.write(b"Mind if I squeeze in?") From b529b60fc239d19245e5fafd0514d90097c2eb40 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 10:21:57 -0400 Subject: [PATCH 268/989] gh-132246: Add special buffer methods to C API Type Object docs (gh-132247) Two special methods, __buffer__ and __release_buffer__ were added to Python 3.12 by PEP 688. The C API Type Object documentation for slots includes `tp_as_buffer`, and sub-slots `bf_getbuffer`, `bf_releasebuffer` but does not refer to the Python Data Model version of those. Add the missing references. --- Doc/c-api/typeobj.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 64353a8daca5f6..5df0c0fe608e53 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -79,7 +79,7 @@ Quick Reference | :c:member:`~PyTypeObject.tp_setattro` | :c:type:`setattrofunc` | __setattr__, | X | X | | G | | | | __delattr__ | | | | | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ - | :c:member:`~PyTypeObject.tp_as_buffer` | :c:type:`PyBufferProcs` * | | | | | % | + | :c:member:`~PyTypeObject.tp_as_buffer` | :c:type:`PyBufferProcs` * | :ref:`sub-slots` | | | | % | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ | :c:member:`~PyTypeObject.tp_flags` | unsigned long | | X | X | | ? | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ @@ -325,9 +325,10 @@ sub-slots +---------------------------------------------------------+-----------------------------------+---------------+ | | +---------------------------------------------------------+-----------------------------------+---------------+ - | :c:member:`~PyBufferProcs.bf_getbuffer` | :c:func:`getbufferproc` | | + | :c:member:`~PyBufferProcs.bf_getbuffer` | :c:func:`getbufferproc` | __buffer__ | +---------------------------------------------------------+-----------------------------------+---------------+ - | :c:member:`~PyBufferProcs.bf_releasebuffer` | :c:func:`releasebufferproc` | | + | :c:member:`~PyBufferProcs.bf_releasebuffer` | :c:func:`releasebufferproc` | __release_\ | + | | | buffer\__ | +---------------------------------------------------------+-----------------------------------+---------------+ .. _slot-typedefs-table: From 84d5f8d799dbbf86248375b6edbbcf4a022788c4 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 10:40:50 -0400 Subject: [PATCH 269/989] gh-133982: Update test_bufio to use self.open (gh-133983) --- Lib/test/test_bufio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_bufio.py b/Lib/test/test_bufio.py index dc9a82dc635318..cb9cb4d0bc7e9c 100644 --- a/Lib/test/test_bufio.py +++ b/Lib/test/test_bufio.py @@ -28,7 +28,7 @@ def try_one(self, s): f.write(b"\n") f.write(s) f.close() - f = open(os_helper.TESTFN, "rb") + f = self.open(os_helper.TESTFN, "rb") line = f.readline() self.assertEqual(line, s + b"\n") line = f.readline() From 06eaf4055c1d7359e129efb65b94f34d2ec51a57 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 10:45:00 -0400 Subject: [PATCH 270/989] gh-71253: Match _io exception in _pyio (gh-133985) Test was only testing _io, expanded to cover _pyio. Co-authored-by: Peter Bierma --- Lib/_pyio.py | 3 ++- Lib/test/test_io.py | 4 ++-- .../Library/2025-05-13-18-21-59.gh-issue-71253.-3Sf_K.rst | 3 +++ 3 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-13-18-21-59.gh-issue-71253.-3Sf_K.rst diff --git a/Lib/_pyio.py b/Lib/_pyio.py index a870de5b532542..f79674fb7a9f5e 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1563,7 +1563,8 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if not isinstance(fd, int): raise TypeError('expected integer from opener') if fd < 0: - raise OSError('Negative file descriptor') + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') owned_fd = fd if not noinherit_flag: os.set_inheritable(fd, False) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 90680c6d47ab41..4625e3a01faa7b 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -918,7 +918,7 @@ def test_bad_opener_negative_1(self): def badopener(fname, flags): return -1 with self.assertRaises(ValueError) as cm: - open('non-existent', 'r', opener=badopener) + self.open('non-existent', 'r', opener=badopener) self.assertEqual(str(cm.exception), 'opener returned -1') def test_bad_opener_other_negative(self): @@ -926,7 +926,7 @@ def test_bad_opener_other_negative(self): def badopener(fname, flags): return -2 with self.assertRaises(ValueError) as cm: - open('non-existent', 'r', opener=badopener) + self.open('non-existent', 'r', opener=badopener) self.assertEqual(str(cm.exception), 'opener returned -2') def test_opener_invalid_fd(self): diff --git a/Misc/NEWS.d/next/Library/2025-05-13-18-21-59.gh-issue-71253.-3Sf_K.rst b/Misc/NEWS.d/next/Library/2025-05-13-18-21-59.gh-issue-71253.-3Sf_K.rst new file mode 100644 index 00000000000000..714d707f488709 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-13-18-21-59.gh-issue-71253.-3Sf_K.rst @@ -0,0 +1,3 @@ +Raise :exc:`ValueError` in :func:`open` if *opener* returns a negative +file-descriptor in the Python implementation of :mod:`io` to match the +C implementation. From 5b0e82752120a5dc66ce6ee778751d71ba2c33b2 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 10:51:56 -0400 Subject: [PATCH 271/989] gh-133982: Run unclosed file test on all io implementations (gh-134165) Update `test_io` `_check_warn_on_dealloc` to use `self.` to dispatch to different I/O implementations. Update the `_pyio` implementation to match expected behavior, using the same `_dealloc_warn` design as the C implementation uses to report the topmost `__del__` object. The FileIO one now matches all the others, so can use IOBase. There was a missing check on closing (self._fd must be valid), add that check --- Lib/_pyio.py | 18 ++++++++++++++---- Lib/test/test_io.py | 4 ++-- ...5-05-17-20-23-57.gh-issue-133982.smS7au.rst | 3 +++ 3 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-17-20-23-57.gh-issue-133982.smS7au.rst diff --git a/Lib/_pyio.py b/Lib/_pyio.py index f79674fb7a9f5e..300d5b3cbce32b 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -407,6 +407,9 @@ def __del__(self): if closed: return + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + # If close() fails, the caller logs the exception with # sys.unraisablehook. close() must be called at the end at __del__(). self.close() @@ -853,6 +856,10 @@ def __repr__(self): else: return "<{}.{} name={!r}>".format(modname, clsname, name) + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + ### Lower-level APIs ### def fileno(self): @@ -1601,12 +1608,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): raise self._fd = fd - def __del__(self): + def _dealloc_warn(self, source): if self._fd >= 0 and self._closefd and not self.closed: import warnings - warnings.warn('unclosed file %r' % (self,), ResourceWarning, + warnings.warn(f'unclosed file {source!r}', ResourceWarning, stacklevel=2, source=self) - self.close() def __getstate__(self): raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") @@ -1781,7 +1787,7 @@ def close(self): if not self.closed: self._stat_atopen = None try: - if self._closefd: + if self._closefd and self._fd >= 0: os.close(self._fd) finally: super().close() @@ -2690,6 +2696,10 @@ def readline(self, size=None): def newlines(self): return self._decoder.newlines if self._decoder else None + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + class StringIO(TextIOWrapper): """Text I/O implementation using an in-memory buffer. diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 4625e3a01faa7b..aa619a96ab7535 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4417,7 +4417,7 @@ def test_abc_inheritance_official(self): self._check_abc_inheritance(io) def _check_warn_on_dealloc(self, *args, **kwargs): - f = open(*args, **kwargs) + f = self.open(*args, **kwargs) r = repr(f) with self.assertWarns(ResourceWarning) as cm: f = None @@ -4446,7 +4446,7 @@ def cleanup_fds(): r, w = os.pipe() fds += r, w with warnings_helper.check_no_resource_warning(self): - open(r, *args, closefd=False, **kwargs) + self.open(r, *args, closefd=False, **kwargs) @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") def test_warn_on_dealloc_fd(self): diff --git a/Misc/NEWS.d/next/Library/2025-05-17-20-23-57.gh-issue-133982.smS7au.rst b/Misc/NEWS.d/next/Library/2025-05-17-20-23-57.gh-issue-133982.smS7au.rst new file mode 100644 index 00000000000000..a6753145981181 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-17-20-23-57.gh-issue-133982.smS7au.rst @@ -0,0 +1,3 @@ +Emit :exc:`RuntimeWarning` in the Python implementation of :mod:`io` when +the :term:`file-like object ` is not closed explicitly in the +presence of multiple I/O layers. From 0a68068bd2a5bff98998067a141b17af5be9b750 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 11:29:18 -0400 Subject: [PATCH 272/989] gh-62184: Remove _pyio import of _io.FileIO (gh-134192) This was added in the add of `_io`, isn't used since bpo-21859 when a `_pyio` implementation was added which defines `FileIO` lower down in the file. --- Lib/_pyio.py | 2 -- .../next/Library/2025-05-18-12-48-39.gh-issue-62184.y11l10.rst | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-18-12-48-39.gh-issue-62184.y11l10.rst diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 300d5b3cbce32b..fb2a6d049caab6 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -648,8 +648,6 @@ def write(self, b): self._unsupported("write") io.RawIOBase.register(RawIOBase) -from _io import FileIO -RawIOBase.register(FileIO) class BufferedIOBase(IOBase): diff --git a/Misc/NEWS.d/next/Library/2025-05-18-12-48-39.gh-issue-62184.y11l10.rst b/Misc/NEWS.d/next/Library/2025-05-18-12-48-39.gh-issue-62184.y11l10.rst new file mode 100644 index 00000000000000..7bc994e57fb58f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-18-12-48-39.gh-issue-62184.y11l10.rst @@ -0,0 +1,2 @@ +Remove import of C implementation of :class:`io.FileIO` from Python +implementation which has its own implementation From c64a21454b3c139af9e88941a286885fc4828a7e Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Wed, 21 May 2025 11:53:13 -0400 Subject: [PATCH 273/989] gh-132983: Refactor shared code in train_dict and finalize_dict (GH-134432) Refactor shared code in train_dict and finalize_dict --- Modules/_zstd/_zstdmodule.c | 123 ++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 68 deletions(-) diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 0294828aa106ea..b2e4f95b906356 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -172,6 +172,49 @@ get_zstd_state(PyObject *module) return (_zstd_state *)state; } +static Py_ssize_t +calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, + size_t **chunk_sizes) +{ + Py_ssize_t chunks_number; + Py_ssize_t sizes_sum; + Py_ssize_t i; + + chunks_number = Py_SIZE(samples_sizes); + if ((size_t) chunks_number > UINT32_MAX) { + PyErr_Format(PyExc_ValueError, + "The number of samples should be <= %u.", UINT32_MAX); + return -1; + } + + /* Prepare chunk_sizes */ + *chunk_sizes = PyMem_New(size_t, chunks_number); + if (*chunk_sizes == NULL) { + PyErr_NoMemory(); + return -1; + } + + sizes_sum = 0; + for (i = 0; i < chunks_number; i++) { + PyObject *size = PyTuple_GetItem(samples_sizes, i); + (*chunk_sizes)[i] = PyLong_AsSize_t(size); + if ((*chunk_sizes)[i] == (size_t)-1 && PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, + "Items in samples_sizes should be an int " + "object, with a value between 0 and %u.", SIZE_MAX); + return -1; + } + sizes_sum += (*chunk_sizes)[i]; + } + + if (sizes_sum != Py_SIZE(samples_bytes)) { + PyErr_SetString(PyExc_ValueError, + "The samples size tuple doesn't match the concatenation's size."); + return -1; + } + return chunks_number; +} + /*[clinic input] _zstd.train_dict @@ -192,14 +235,10 @@ _zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, PyObject *samples_sizes, Py_ssize_t dict_size) /*[clinic end generated code: output=8e87fe43935e8f77 input=d20dedb21c72cb62]*/ { - // TODO(emmatyping): The preamble and suffix to this function and _finalize_dict - // are pretty similar. We should see if we can refactor them to share that code. - Py_ssize_t chunks_number; - size_t *chunk_sizes = NULL; PyObject *dst_dict_bytes = NULL; + size_t *chunk_sizes = NULL; + Py_ssize_t chunks_number; size_t zstd_ret; - Py_ssize_t sizes_sum; - Py_ssize_t i; /* Check arguments */ if (dict_size <= 0) { @@ -207,39 +246,14 @@ _zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, return NULL; } - chunks_number = Py_SIZE(samples_sizes); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_Format(PyExc_ValueError, - "The number of samples should be <= %u.", UINT32_MAX); + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) + { return NULL; } - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_New(size_t, chunks_number); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyTuple_GetItem(samples_sizes, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size tuple doesn't match the concatenation's size."); - goto error; - } - /* Allocate dict buffer */ dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size); if (dst_dict_bytes == NULL) { @@ -307,8 +321,6 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, PyObject *dst_dict_bytes = NULL; size_t zstd_ret; ZDICT_params_t params; - Py_ssize_t sizes_sum; - Py_ssize_t i; /* Check arguments */ if (dict_size <= 0) { @@ -316,39 +328,14 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, return NULL; } - chunks_number = Py_SIZE(samples_sizes); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_Format(PyExc_ValueError, - "The number of samples should be <= %u.", UINT32_MAX); + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) + { return NULL; } - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_New(size_t, chunks_number); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyTuple_GetItem(samples_sizes, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size tuple doesn't match the concatenation's size."); - goto error; - } - /* Allocate dict buffer */ dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size); if (dst_dict_bytes == NULL) { From e1f891414b2329414a6160ed246f5f869a218bfd Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 21 May 2025 12:06:40 -0400 Subject: [PATCH 274/989] gh-80050: Update BufferedReader.read docs around non-blocking (GH-130653) --- Doc/library/io.rst | 85 ++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 44 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 08c76da3d8c00a..de5cab5aee649f 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -528,14 +528,13 @@ I/O Base Classes It inherits from :class:`IOBase`. The main difference with :class:`RawIOBase` is that methods :meth:`read`, - :meth:`readinto` and :meth:`write` will try (respectively) to read as much - input as requested or to consume all given output, at the expense of - making perhaps more than one system call. + :meth:`readinto` and :meth:`write` will try (respectively) to read + as much input as requested or to emit all provided data. - In addition, those methods can raise :exc:`BlockingIOError` if the - underlying raw stream is in non-blocking mode and cannot take or give - enough data; unlike their :class:`RawIOBase` counterparts, they will - never return ``None``. + In addition, if the underlying raw stream is in non-blocking mode, when the + system returns would block :meth:`write` will raise :exc:`BlockingIOError` + with :attr:`BlockingIOError.characters_written` and :meth:`read` will return + data read so far or ``None`` if no data is available. Besides, the :meth:`read` method does not have a default implementation that defers to :meth:`readinto`. @@ -568,29 +567,40 @@ I/O Base Classes .. method:: read(size=-1, /) - Read and return up to *size* bytes. If the argument is omitted, ``None``, - or negative, data is read and returned until EOF is reached. An empty - :class:`bytes` object is returned if the stream is already at EOF. + Read and return up to *size* bytes. If the argument is omitted, ``None``, + or negative read as much as possible. - If the argument is positive, and the underlying raw stream is not - interactive, multiple raw reads may be issued to satisfy the byte count - (unless EOF is reached first). But for interactive raw streams, at most - one raw read will be issued, and a short result does not imply that EOF is - imminent. + Fewer bytes may be returned than requested. An empty :class:`bytes` object + is returned if the stream is already at EOF. More than one read may be + made and calls may be retried if specific errors are encountered, see + :meth:`os.read` and :pep:`475` for more details. Less than size bytes + being returned does not imply that EOF is imminent. - A :exc:`BlockingIOError` is raised if the underlying raw stream is in - non blocking-mode, and has no data available at the moment. + When reading as much as possible the default implementation will use + ``raw.readall`` if available (which should implement + :meth:`RawIOBase.readall`), otherwise will read in a loop until read + returns ``None``, an empty :class:`bytes`, or a non-retryable error. For + most streams this is to EOF, but for non-blocking streams more data may + become available. + + .. note:: + + When the underlying raw stream is non-blocking, implementations may + either raise :exc:`BlockingIOError` or return ``None`` if no data is + available. :mod:`io` implementations return ``None``. .. method:: read1(size=-1, /) - Read and return up to *size* bytes, with at most one call to the - underlying raw stream's :meth:`~RawIOBase.read` (or - :meth:`~RawIOBase.readinto`) method. This can be useful if you are - implementing your own buffering on top of a :class:`BufferedIOBase` - object. + Read and return up to *size* bytes, calling :meth:`~RawIOBase.readinto` + which may retry if :py:const:`~errno.EINTR` is encountered per + :pep:`475`. If *size* is ``-1`` or not provided, the implementation will + choose an arbitrary value for *size*. - If *size* is ``-1`` (the default), an arbitrary number of bytes are - returned (more than zero unless EOF is reached). + .. note:: + + When the underlying raw stream is non-blocking, implementations may + either raise :exc:`BlockingIOError` or return ``None`` if no data is + available. :mod:`io` implementations return ``None``. .. method:: readinto(b, /) @@ -767,34 +777,21 @@ than raw I/O does. .. method:: peek(size=0, /) - Return bytes from the stream without advancing the position. At most one - single read on the raw stream is done to satisfy the call. The number of - bytes returned may be less or more than requested. + Return bytes from the stream without advancing the position. The number of + bytes returned may be less or more than requested. If the underlying raw + stream is non-blocking and the operation would block, returns empty bytes. .. method:: read(size=-1, /) - Read and return *size* bytes, or if *size* is not given or negative, until - EOF or if the read call would block in non-blocking mode. - - .. note:: - - When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` - may be raised if a read operation cannot be completed immediately. + In :class:`BufferedReader` this is the same as :meth:`io.BufferedIOBase.read` .. method:: read1(size=-1, /) - Read and return up to *size* bytes with only one call on the raw stream. - If at least one byte is buffered, only buffered bytes are returned. - Otherwise, one raw stream read call is made. + In :class:`BufferedReader` this is the same as :meth:`io.BufferedIOBase.read1` .. versionchanged:: 3.7 The *size* argument is now optional. - .. note:: - - When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` - may be raised if a read operation cannot be completed immediately. - .. class:: BufferedWriter(raw, buffer_size=DEFAULT_BUFFER_SIZE) A buffered binary stream providing higher-level access to a writeable, non @@ -826,8 +823,8 @@ than raw I/O does. Write the :term:`bytes-like object`, *b*, and return the number of bytes written. When in non-blocking mode, a - :exc:`BlockingIOError` is raised if the buffer needs to be written out but - the raw stream blocks. + :exc:`BlockingIOError` with :attr:`BlockingIOError.characters_written` set + is raised if the buffer needs to be written out but the raw stream blocks. .. class:: BufferedRandom(raw, buffer_size=DEFAULT_BUFFER_SIZE) From 1a07a01014bde23acd2684916ef38dc0cd73c2de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 21 May 2025 19:10:31 +0200 Subject: [PATCH 275/989] gh-132124: improve safety nets for creating AF_UNIX socket files (GH-134085) * ensure that we can create AF_UNIX socket files * emit a warning if system-wide temporary directory is used --- Lib/multiprocessing/connection.py | 2 +- Lib/multiprocessing/util.py | 79 ++++++++++++++++++- Lib/tempfile.py | 5 +- ...-05-16-12-40-37.gh-issue-132124.T_5Odx.rst | 6 ++ 4 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-16-12-40-37.gh-issue-132124.T_5Odx.rst diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index 5f288a8d393240..fc00d2861260a8 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -76,7 +76,7 @@ def arbitrary_address(family): if family == 'AF_INET': return ('localhost', 0) elif family == 'AF_UNIX': - return tempfile.mktemp(prefix='listener-', dir=util.get_temp_dir()) + return tempfile.mktemp(prefix='sock-', dir=util.get_temp_dir()) elif family == 'AF_PIPE': return tempfile.mktemp(prefix=r'\\.\pipe\pyc-%d-%d-' % (os.getpid(), next(_mmap_counter)), dir="") diff --git a/Lib/multiprocessing/util.py b/Lib/multiprocessing/util.py index b7192042b9cf47..a1a537dd48dea7 100644 --- a/Lib/multiprocessing/util.py +++ b/Lib/multiprocessing/util.py @@ -19,7 +19,7 @@ from . import process __all__ = [ - 'sub_debug', 'debug', 'info', 'sub_warning', 'get_logger', + 'sub_debug', 'debug', 'info', 'sub_warning', 'warn', 'get_logger', 'log_to_stderr', 'get_temp_dir', 'register_after_fork', 'is_exiting', 'Finalize', 'ForkAwareThreadLock', 'ForkAwareLocal', 'close_all_fds_except', 'SUBDEBUG', 'SUBWARNING', @@ -34,6 +34,7 @@ DEBUG = 10 INFO = 20 SUBWARNING = 25 +WARNING = 30 LOGGER_NAME = 'multiprocessing' DEFAULT_LOGGING_FORMAT = '[%(levelname)s/%(processName)s] %(message)s' @@ -53,6 +54,10 @@ def info(msg, *args): if _logger: _logger.log(INFO, msg, *args, stacklevel=2) +def warn(msg, *args): + if _logger: + _logger.log(WARNING, msg, *args, stacklevel=2) + def sub_warning(msg, *args): if _logger: _logger.log(SUBWARNING, msg, *args, stacklevel=2) @@ -121,6 +126,21 @@ def is_abstract_socket_namespace(address): # Function returning a temp directory which will be removed on exit # +# Maximum length of a socket file path is usually between 92 and 108 [1], +# but Linux is known to use a size of 108 [2]. BSD-based systems usually +# use a size of 104 or 108 and Windows does not create AF_UNIX sockets. +# +# [1]: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/sys_un.h.html +# [2]: https://man7.org/linux/man-pages/man7/unix.7.html. + +if sys.platform == 'linux': + _SUN_PATH_MAX = 108 +elif sys.platform.startswith(('openbsd', 'freebsd')): + _SUN_PATH_MAX = 104 +else: + # On Windows platforms, we do not create AF_UNIX sockets. + _SUN_PATH_MAX = None if os.name == 'nt' else 92 + def _remove_temp_dir(rmtree, tempdir): rmtree(tempdir) @@ -130,12 +150,67 @@ def _remove_temp_dir(rmtree, tempdir): if current_process is not None: current_process._config['tempdir'] = None +def _get_base_temp_dir(tempfile): + """Get a temporary directory where socket files will be created. + + To prevent additional imports, pass a pre-imported 'tempfile' module. + """ + if os.name == 'nt': + return None + # Most of the time, the default temporary directory is /tmp. Thus, + # listener sockets files "$TMPDIR/pymp-XXXXXXXX/sock-XXXXXXXX" do + # not have a path length exceeding SUN_PATH_MAX. + # + # If users specify their own temporary directory, we may be unable + # to create those files. Therefore, we fall back to the system-wide + # temporary directory /tmp, assumed to exist on POSIX systems. + # + # See https://github.com/python/cpython/issues/132124. + base_tempdir = tempfile.gettempdir() + # Files created in a temporary directory are suffixed by a string + # generated by tempfile._RandomNameSequence, which, by design, + # is 8 characters long. + # + # Thus, the length of socket filename will be: + # + # len(base_tempdir + '/pymp-XXXXXXXX' + '/sock-XXXXXXXX') + sun_path_len = len(base_tempdir) + 14 + 14 + if sun_path_len <= _SUN_PATH_MAX: + return base_tempdir + # Fallback to the default system-wide temporary directory. + # This ignores user-defined environment variables. + # + # On POSIX systems, /tmp MUST be writable by any application [1]. + # We however emit a warning if this is not the case to prevent + # obscure errors later in the execution. + # + # On some legacy systems, /var/tmp and /usr/tmp can be present + # and will be used instead. + # + # [1]: https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch03s18.html + dirlist = ['/tmp', '/var/tmp', '/usr/tmp'] + try: + base_system_tempdir = tempfile._get_default_tempdir(dirlist) + except FileNotFoundError: + warn("Process-wide temporary directory %s will not be usable for " + "creating socket files and no usable system-wide temporary " + "directory was found in %s", base_tempdir, dirlist) + # At this point, the system-wide temporary directory is not usable + # but we may assume that the user-defined one is, even if we will + # not be able to write socket files out there. + return base_tempdir + warn("Ignoring user-defined temporary directory: %s", base_tempdir) + # at most max(map(len, dirlist)) + 14 + 14 = 36 characters + assert len(base_system_tempdir) + 14 + 14 <= _SUN_PATH_MAX + return base_system_tempdir + def get_temp_dir(): # get name of a temp directory which will be automatically cleaned up tempdir = process.current_process()._config.get('tempdir') if tempdir is None: import shutil, tempfile - tempdir = tempfile.mkdtemp(prefix='pymp-') + base_tempdir = _get_base_temp_dir(tempfile) + tempdir = tempfile.mkdtemp(prefix='pymp-', dir=base_tempdir) info('created temp directory %s', tempdir) # keep a strong reference to shutil.rmtree(), since the finalizer # can be called late during Python shutdown diff --git a/Lib/tempfile.py b/Lib/tempfile.py index cadb0bed3cce3b..5e3ccab5f48502 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -180,7 +180,7 @@ def _candidate_tempdir_list(): return dirlist -def _get_default_tempdir(): +def _get_default_tempdir(dirlist=None): """Calculate the default directory to use for temporary files. This routine should be called exactly once. @@ -190,7 +190,8 @@ def _get_default_tempdir(): service, the name of the test file must be randomized.""" namer = _RandomNameSequence() - dirlist = _candidate_tempdir_list() + if dirlist is None: + dirlist = _candidate_tempdir_list() for dir in dirlist: if dir != _os.curdir: diff --git a/Misc/NEWS.d/next/Library/2025-05-16-12-40-37.gh-issue-132124.T_5Odx.rst b/Misc/NEWS.d/next/Library/2025-05-16-12-40-37.gh-issue-132124.T_5Odx.rst new file mode 100644 index 00000000000000..acf3577ece4e9c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-16-12-40-37.gh-issue-132124.T_5Odx.rst @@ -0,0 +1,6 @@ +On POSIX-compliant systems, :func:`!multiprocessing.util.get_temp_dir` now +ignores :envvar:`TMPDIR` (and similar environment variables) if the path +length of ``AF_UNIX`` socket files exceeds the platform-specific maximum +length when using the :ref:`forkserver +` start method. Patch by Bénédikt +Tran. From fb68776591485cacd63a97a342bf294a6ae6d4e4 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Wed, 21 May 2025 15:09:34 -0400 Subject: [PATCH 276/989] gh-132983: Fix refleak in zstd dictionary functions (gh-134459) --- Modules/_zstd/_zstdmodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index b2e4f95b906356..17d3bff1e98769 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -251,7 +251,7 @@ _zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, &chunk_sizes); if (chunks_number < 0) { - return NULL; + goto error; } /* Allocate dict buffer */ @@ -333,7 +333,7 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, &chunk_sizes); if (chunks_number < 0) { - return NULL; + goto error; } /* Allocate dict buffer */ From a66bae8bb52721ea597ade6222f83876f9e939ba Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 21 May 2025 14:16:55 -0600 Subject: [PATCH 277/989] gh-132775: Use _PyFunction_VerifyStateless() and _PyCode_VerifyStateless() (gh-134439) --- Include/internal/pycore_pyerrors.h | 4 +- Lib/test/test__interpreters.py | 3 +- Modules/_interpretersmodule.c | 139 ++++++++++++++--------------- 3 files changed, 70 insertions(+), 76 deletions(-) diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index f357b88e220e6e..2c2048f7e1272a 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -94,13 +94,13 @@ extern void _PyErr_Fetch( PyObject **value, PyObject **traceback); -extern PyObject* _PyErr_GetRaisedException(PyThreadState *tstate); +PyAPI_FUNC(PyObject*) _PyErr_GetRaisedException(PyThreadState *tstate); PyAPI_FUNC(int) _PyErr_ExceptionMatches( PyThreadState *tstate, PyObject *exc); -extern void _PyErr_SetRaisedException(PyThreadState *tstate, PyObject *exc); +PyAPI_FUNC(void) _PyErr_SetRaisedException(PyThreadState *tstate, PyObject *exc); extern void _PyErr_Restore( PyThreadState *tstate, diff --git a/Lib/test/test__interpreters.py b/Lib/test/test__interpreters.py index 0c43f46300f67d..63fdaad8de7ef5 100644 --- a/Lib/test/test__interpreters.py +++ b/Lib/test/test__interpreters.py @@ -1054,7 +1054,7 @@ def test_closure(self): def script(): assert spam - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): _interpreters.run_func(self.id, script) # XXX This hasn't been fixed yet. @@ -1065,6 +1065,7 @@ def script(): with self.assertRaises(ValueError): _interpreters.run_func(self.id, script) + @unittest.skip("we're not quite there yet") def test_args(self): with self.subTest('args'): def script(a, b=0): diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index f3c571e717fd0e..91cd92806206be 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -8,6 +8,8 @@ #include "Python.h" #include "pycore_code.h" // _PyCode_HAS_EXECUTORS() #include "pycore_crossinterp.h" // _PyXIData_t +#include "pycore_pyerrors.h" // _PyErr_GetRaisedException() +#include "pycore_function.h" // _PyFunction_VerifyStateless() #include "pycore_interp.h" // _PyInterpreterState_IDIncref() #include "pycore_modsupport.h" // _PyArg_BadArgument() #include "pycore_namespace.h" // _PyNamespace_New() @@ -374,34 +376,17 @@ check_code_str(PyUnicodeObject *text) return NULL; } -static const char * -check_code_object(PyCodeObject *code) +#ifndef NDEBUG +static int +code_has_args(PyCodeObject *code) { assert(code != NULL); - if (code->co_argcount > 0 + return (code->co_argcount > 0 || code->co_posonlyargcount > 0 || code->co_kwonlyargcount > 0 - || code->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) - { - return "arguments not supported"; - } - if (code->co_ncellvars > 0) { - return "closures not supported"; - } - // We trust that no code objects under co_consts have unbound cell vars. - - if (_PyCode_HAS_EXECUTORS(code) || _PyCode_HAS_INSTRUMENTATION(code)) { - return "only basic functions are supported"; - } - if (code->_co_monitoring != NULL) { - return "only basic functions are supported"; - } - if (code->co_extra != NULL) { - return "only basic functions are supported"; - } - - return NULL; + || code->co_flags & (CO_VARARGS | CO_VARKEYWORDS)); } +#endif #define RUN_TEXT 1 #define RUN_CODE 2 @@ -429,8 +414,10 @@ get_code_str(PyObject *arg, Py_ssize_t *len_p, PyObject **bytes_p, int *flags_p) flags = RUN_TEXT; } else { - assert(PyCode_Check(arg) - && (check_code_object((PyCodeObject *)arg) == NULL)); + assert(PyCode_Check(arg)); + assert(_PyCode_VerifyStateless( + PyThreadState_Get(), (PyCodeObject *)arg, NULL, NULL, NULL) == 0); + assert(!code_has_args((PyCodeObject *)arg)); flags = RUN_CODE; // Serialize the code object. @@ -949,7 +936,8 @@ Bind the given attributes in the interpreter's __main__ module."); static PyUnicodeObject * -convert_script_arg(PyObject *arg, const char *fname, const char *displayname, +convert_script_arg(PyThreadState *tstate, + PyObject *arg, const char *fname, const char *displayname, const char *expected) { PyUnicodeObject *str = NULL; @@ -968,8 +956,8 @@ convert_script_arg(PyObject *arg, const char *fname, const char *displayname, const char *err = check_code_str(str); if (err != NULL) { Py_DECREF(str); - PyErr_Format(PyExc_ValueError, - "%.200s(): bad script text (%s)", fname, err); + _PyErr_Format(tstate, PyExc_ValueError, + "%.200s(): bad script text (%s)", fname, err); return NULL; } @@ -977,51 +965,44 @@ convert_script_arg(PyObject *arg, const char *fname, const char *displayname, } static PyCodeObject * -convert_code_arg(PyObject *arg, const char *fname, const char *displayname, +convert_code_arg(PyThreadState *tstate, + PyObject *arg, const char *fname, const char *displayname, const char *expected) { - const char *kind = NULL; + PyObject *cause; PyCodeObject *code = NULL; if (PyFunction_Check(arg)) { - if (PyFunction_GetClosure(arg) != NULL) { - PyErr_Format(PyExc_ValueError, - "%.200s(): closures not supported", fname); - return NULL; - } - code = (PyCodeObject *)PyFunction_GetCode(arg); - if (code == NULL) { - if (PyErr_Occurred()) { - // This chains. - PyErr_Format(PyExc_ValueError, - "%.200s(): bad func", fname); - } - else { - PyErr_Format(PyExc_ValueError, - "%.200s(): func.__code__ missing", fname); - } - return NULL; + // For now we allow globals, so we can't use + // _PyFunction_VerifyStateless(). + PyObject *codeobj = PyFunction_GetCode(arg); + if (_PyCode_VerifyStateless( + tstate, (PyCodeObject *)codeobj, NULL, NULL, NULL) < 0) { + goto chained; } - Py_INCREF(code); - kind = "func"; + code = (PyCodeObject *)Py_NewRef(codeobj); } else if (PyCode_Check(arg)) { + if (_PyCode_VerifyStateless( + tstate, (PyCodeObject *)arg, NULL, NULL, NULL) < 0) { + goto chained; + } code = (PyCodeObject *)Py_NewRef(arg); - kind = "code object"; } else { _PyArg_BadArgument(fname, displayname, expected, arg); return NULL; } - const char *err = check_code_object(code); - if (err != NULL) { - Py_DECREF(code); - PyErr_Format(PyExc_ValueError, - "%.200s(): bad %s (%s)", fname, kind, err); - return NULL; - } - return code; + +chained: + cause = _PyErr_GetRaisedException(tstate); + assert(cause != NULL); + _PyArg_BadArgument(fname, displayname, expected, arg); + PyObject *exc = _PyErr_GetRaisedException(tstate); + PyException_SetCause(exc, cause); + _PyErr_SetRaisedException(tstate, exc); + return NULL; } static int @@ -1057,12 +1038,14 @@ _interp_exec(PyObject *self, PyInterpreterState *interp, static PyObject * interp_exec(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".exec" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "code", "shared", "restrict", NULL}; PyObject *id, *code; PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" MODULE_NAME_STR ".exec", kwlist, + "OO|O$p:" FUNCNAME, kwlist, &id, &code, &shared, &restricted)) { return NULL; @@ -1077,12 +1060,12 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) const char *expected = "a string, a function, or a code object"; if (PyUnicode_Check(code)) { - code = (PyObject *)convert_script_arg(code, MODULE_NAME_STR ".exec", - "argument 2", expected); + code = (PyObject *)convert_script_arg(tstate, code, FUNCNAME, + "argument 2", expected); } else { - code = (PyObject *)convert_code_arg(code, MODULE_NAME_STR ".exec", - "argument 2", expected); + code = (PyObject *)convert_code_arg(tstate, code, FUNCNAME, + "argument 2", expected); } if (code == NULL) { return NULL; @@ -1096,6 +1079,7 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) return excinfo; } Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(exec_doc, @@ -1118,13 +1102,15 @@ is ignored, including its __globals__ dict."); static PyObject * interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".run_string" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "script", "shared", "restrict", NULL}; PyObject *id, *script; PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OU|O$p:" MODULE_NAME_STR ".run_string", - kwlist, &id, &script, &shared, &restricted)) + "OU|O$p:" FUNCNAME, kwlist, + &id, &script, &shared, &restricted)) { return NULL; } @@ -1136,7 +1122,7 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - script = (PyObject *)convert_script_arg(script, MODULE_NAME_STR ".run_string", + script = (PyObject *)convert_script_arg(tstate, script, FUNCNAME, "argument 2", "a string"); if (script == NULL) { return NULL; @@ -1150,6 +1136,7 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) return excinfo; } Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(run_string_doc, @@ -1162,13 +1149,15 @@ Execute the provided string in the identified interpreter.\n\ static PyObject * interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".run_func" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "func", "shared", "restrict", NULL}; PyObject *id, *func; PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" MODULE_NAME_STR ".run_func", - kwlist, &id, &func, &shared, &restricted)) + "OO|O$p:" FUNCNAME, kwlist, + &id, &func, &shared, &restricted)) { return NULL; } @@ -1180,7 +1169,7 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyCodeObject *code = convert_code_arg(func, MODULE_NAME_STR ".exec", + PyCodeObject *code = convert_code_arg(tstate, func, FUNCNAME, "argument 2", "a function or a code object"); if (code == NULL) { @@ -1195,6 +1184,7 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) return excinfo; } Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(run_func_doc, @@ -1209,6 +1199,8 @@ are not supported. Methods and other callables are not supported either.\n\ static PyObject * interp_call(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".call" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "callable", "args", "kwargs", "restrict", NULL}; PyObject *id, *callable; @@ -1216,7 +1208,7 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) PyObject *kwargs_obj = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|OO$p:" MODULE_NAME_STR ".call", kwlist, + "OO|OO$p:" FUNCNAME, kwlist, &id, &callable, &args_obj, &kwargs_obj, &restricted)) { @@ -1231,15 +1223,15 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) } if (args_obj != NULL) { - PyErr_SetString(PyExc_ValueError, "got unexpected args"); + _PyErr_SetString(tstate, PyExc_ValueError, "got unexpected args"); return NULL; } if (kwargs_obj != NULL) { - PyErr_SetString(PyExc_ValueError, "got unexpected kwargs"); + _PyErr_SetString(tstate, PyExc_ValueError, "got unexpected kwargs"); return NULL; } - PyObject *code = (PyObject *)convert_code_arg(callable, MODULE_NAME_STR ".call", + PyObject *code = (PyObject *)convert_code_arg(tstate, callable, FUNCNAME, "argument 2", "a function"); if (code == NULL) { return NULL; @@ -1253,6 +1245,7 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) return excinfo; } Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(call_doc, From b1b8962443e7d418601658a4b05347a5a9161910 Mon Sep 17 00:00:00 2001 From: "Yuichiro Tachibana (Tsuchiya)" Date: Wed, 21 May 2025 19:18:00 -0500 Subject: [PATCH 278/989] gh-127960 Fix the REPL to set the correct namespace by setting the correct `__main__` module (gh-134275) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `__main__` module imported in the `_pyrepl` module points to the `_pyrepl` module itself when the interpreter was launched without `-m` option and didn't execute a module, while it's an unexpected behavior that `__main__` can be `_pyrepl` and relative imports such as `from . import *` works based on the `_pyrepl` module. Co-authored-by: Łukasz Langa --- Lib/_pyrepl/_module_completer.py | 4 +- Lib/_pyrepl/main.py | 11 ++- Lib/_pyrepl/readline.py | 1 + Lib/test/support/__init__.py | 6 -- Lib/test/test_pyrepl/test_pyrepl.py | 78 +++++++++++++++---- ...-05-21-18-02-56.gh-issue-127960.W3J_2X.rst | 3 + Modules/main.c | 16 +++- 7 files changed, 85 insertions(+), 34 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-18-02-56.gh-issue-127960.W3J_2X.rst diff --git a/Lib/_pyrepl/_module_completer.py b/Lib/_pyrepl/_module_completer.py index 9aafb55090e2ce..494a501101a9b2 100644 --- a/Lib/_pyrepl/_module_completer.py +++ b/Lib/_pyrepl/_module_completer.py @@ -17,8 +17,8 @@ def make_default_module_completer() -> ModuleCompleter: - # Inside pyrepl, __package__ is set to '_pyrepl' - return ModuleCompleter(namespace={'__package__': '_pyrepl'}) + # Inside pyrepl, __package__ is set to None by default + return ModuleCompleter(namespace={'__package__': None}) class ModuleCompleter: diff --git a/Lib/_pyrepl/main.py b/Lib/_pyrepl/main.py index a6f824dcc4ad14..447eb1e551e774 100644 --- a/Lib/_pyrepl/main.py +++ b/Lib/_pyrepl/main.py @@ -1,6 +1,7 @@ import errno import os import sys +import types CAN_USE_PYREPL: bool @@ -29,12 +30,10 @@ def interactive_console(mainmodule=None, quiet=False, pythonstartup=False): print(FAIL_REASON, file=sys.stderr) return sys._baserepl() - if mainmodule: - namespace = mainmodule.__dict__ - else: - import __main__ - namespace = __main__.__dict__ - namespace.pop("__pyrepl_interactive_console", None) + if not mainmodule: + mainmodule = types.ModuleType("__main__") + + namespace = mainmodule.__dict__ # sys._baserepl() above does this internally, we do it here startup_path = os.getenv("PYTHONSTARTUP") diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 560a9db192169e..572eee520e53f3 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -606,6 +606,7 @@ def _setup(namespace: Mapping[str, Any]) -> None: # set up namespace in rlcompleter, which requires it to be a bona fide dict if not isinstance(namespace, dict): namespace = dict(namespace) + _wrapper.config.module_completer = ModuleCompleter(namespace) _wrapper.config.readline_completer = RLCompleter(namespace).complete # this is not really what readline.c does. Better than nothing I guess diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 9b6e80fdad9747..b7cd7940eb15b3 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2929,12 +2929,6 @@ def make_clean_env() -> dict[str, str]: return clean_env -def initialized_with_pyrepl(): - """Detect whether PyREPL was used during Python initialization.""" - # If the main module has a __file__ attribute it's a Python module, which means PyREPL. - return hasattr(sys.modules["__main__"], "__file__") - - WINDOWS_STATUS = { 0xC0000005: "STATUS_ACCESS_VIOLATION", 0xC00000FD: "STATUS_STACK_OVERFLOW", diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index 29762232d43b89..abb4bd1bc25fb1 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -926,6 +926,7 @@ def tearDown(self): def prepare_reader(self, events, namespace): console = FakeConsole(events) config = ReadlineConfig() + config.module_completer = ModuleCompleter(namespace) config.readline_completer = rlcompleter.Completer(namespace).complete reader = ReadlineAlikeReader(console=console, config=config) return reader @@ -1022,13 +1023,15 @@ def test_builtin_completion_top_level(self): def test_relative_import_completions(self): cases = ( - ("from .readl\t\n", "from .readline"), - ("from . import readl\t\n", "from . import readline"), + (None, "from .readl\t\n", "from .readl"), + (None, "from . import readl\t\n", "from . import readl"), + ("_pyrepl", "from .readl\t\n", "from .readline"), + ("_pyrepl", "from . import readl\t\n", "from . import readline"), ) - for code, expected in cases: + for package, code, expected in cases: with self.subTest(code=code): events = code_to_events(code) - reader = self.prepare_reader(events, namespace={}) + reader = self.prepare_reader(events, namespace={"__package__": package}) output = reader.readline() self.assertEqual(output, expected) @@ -1397,7 +1400,7 @@ def _assertMatchOK( ) @force_not_colorized - def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False): + def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False, pythonstartup=False): clean_env = make_clean_env() clean_env["NO_COLOR"] = "1" # force_not_colorized doesn't touch subprocesses @@ -1406,9 +1409,13 @@ def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False blue.mkdir() mod = blue / "calx.py" mod.write_text("FOO = 42", encoding="utf-8") + startup = blue / "startup.py" + startup.write_text("BAR = 64", encoding="utf-8") commands = [ "print(f'^{" + var + "=}')" for var in expectations ] + ["exit()"] + if pythonstartup: + clean_env["PYTHONSTARTUP"] = str(startup) if as_file and as_module: self.fail("as_file and as_module are mutually exclusive") elif as_file: @@ -1427,7 +1434,13 @@ def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False skip=True, ) else: - self.fail("Choose one of as_file or as_module") + output, exit_code = self.run_repl( + commands, + cmdline_args=[], + env=clean_env, + cwd=td, + skip=True, + ) self.assertEqual(exit_code, 0) for var, expected in expectations.items(): @@ -1440,6 +1453,23 @@ def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False self.assertNotIn("Exception", output) self.assertNotIn("Traceback", output) + def test_globals_initialized_as_default(self): + expectations = { + "__name__": "'__main__'", + "__package__": "None", + # "__file__" is missing in -i, like in the basic REPL + } + self._run_repl_globals_test(expectations) + + def test_globals_initialized_from_pythonstartup(self): + expectations = { + "BAR": "64", + "__name__": "'__main__'", + "__package__": "None", + # "__file__" is missing in -i, like in the basic REPL + } + self._run_repl_globals_test(expectations, pythonstartup=True) + def test_inspect_keeps_globals_from_inspected_file(self): expectations = { "FOO": "42", @@ -1449,6 +1479,16 @@ def test_inspect_keeps_globals_from_inspected_file(self): } self._run_repl_globals_test(expectations, as_file=True) + def test_inspect_keeps_globals_from_inspected_file_with_pythonstartup(self): + expectations = { + "FOO": "42", + "BAR": "64", + "__name__": "'__main__'", + "__package__": "None", + # "__file__" is missing in -i, like in the basic REPL + } + self._run_repl_globals_test(expectations, as_file=True, pythonstartup=True) + def test_inspect_keeps_globals_from_inspected_module(self): expectations = { "FOO": "42", @@ -1458,26 +1498,32 @@ def test_inspect_keeps_globals_from_inspected_module(self): } self._run_repl_globals_test(expectations, as_module=True) + def test_inspect_keeps_globals_from_inspected_module_with_pythonstartup(self): + expectations = { + "FOO": "42", + "BAR": "64", + "__name__": "'__main__'", + "__package__": "'blue'", + "__file__": re.compile(r"^'.*calx.py'$"), + } + self._run_repl_globals_test(expectations, as_module=True, pythonstartup=True) + @force_not_colorized def test_python_basic_repl(self): env = os.environ.copy() - commands = ("from test.support import initialized_with_pyrepl\n" - "initialized_with_pyrepl()\n" - "exit()\n") - + pyrepl_commands = "clear\nexit()\n" env.pop("PYTHON_BASIC_REPL", None) - output, exit_code = self.run_repl(commands, env=env, skip=True) + output, exit_code = self.run_repl(pyrepl_commands, env=env, skip=True) self.assertEqual(exit_code, 0) - self.assertIn("True", output) - self.assertNotIn("False", output) self.assertNotIn("Exception", output) + self.assertNotIn("NameError", output) self.assertNotIn("Traceback", output) + basic_commands = "help\nexit()\n" env["PYTHON_BASIC_REPL"] = "1" - output, exit_code = self.run_repl(commands, env=env) + output, exit_code = self.run_repl(basic_commands, env=env) self.assertEqual(exit_code, 0) - self.assertIn("False", output) - self.assertNotIn("True", output) + self.assertIn("Type help() for interactive help", output) self.assertNotIn("Exception", output) self.assertNotIn("Traceback", output) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-18-02-56.gh-issue-127960.W3J_2X.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-18-02-56.gh-issue-127960.W3J_2X.rst new file mode 100644 index 00000000000000..730d8a5af51f54 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-18-02-56.gh-issue-127960.W3J_2X.rst @@ -0,0 +1,3 @@ +PyREPL interactive shell no longer starts with ``__package__`` and +``__file__`` global names set to ``_pyrepl`` package internals. Contributed +by Yuichiro Tachibana. diff --git a/Modules/main.c b/Modules/main.c index 2be194bdadf7d0..2d7ed25f5f9790 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -269,13 +269,14 @@ pymain_run_command(wchar_t *command) static int -pymain_start_pyrepl_no_main(void) +pymain_start_pyrepl(int pythonstartup) { int res = 0; PyObject *console = NULL; PyObject *empty_tuple = NULL; PyObject *kwargs = NULL; PyObject *console_result = NULL; + PyObject *main_module = NULL; PyObject *pyrepl = PyImport_ImportModule("_pyrepl.main"); if (pyrepl == NULL) { @@ -299,7 +300,13 @@ pymain_start_pyrepl_no_main(void) res = pymain_exit_err_print(); goto done; } - if (!PyDict_SetItemString(kwargs, "pythonstartup", _PyLong_GetOne())) { + main_module = PyImport_AddModuleRef("__main__"); + if (main_module == NULL) { + res = pymain_exit_err_print(); + goto done; + } + if (!PyDict_SetItemString(kwargs, "mainmodule", main_module) + && !PyDict_SetItemString(kwargs, "pythonstartup", pythonstartup ? Py_True : Py_False)) { console_result = PyObject_Call(console, empty_tuple, kwargs); if (console_result == NULL) { res = pymain_exit_err_print(); @@ -311,6 +318,7 @@ pymain_start_pyrepl_no_main(void) Py_XDECREF(empty_tuple); Py_XDECREF(console); Py_XDECREF(pyrepl); + Py_XDECREF(main_module); return res; } @@ -562,7 +570,7 @@ pymain_run_stdin(PyConfig *config) int run = PyRun_AnyFileExFlags(stdin, "", 0, &cf); return (run != 0); } - return pymain_run_module(L"_pyrepl", 0); + return pymain_start_pyrepl(0); } @@ -595,7 +603,7 @@ pymain_repl(PyConfig *config, int *exitcode) *exitcode = (run != 0); return; } - int run = pymain_start_pyrepl_no_main(); + int run = pymain_start_pyrepl(1); *exitcode = (run != 0); return; } From 518c95b5529ed3379b5a3065b09f71411efe72fb Mon Sep 17 00:00:00 2001 From: Marcin Bachry Date: Thu, 22 May 2025 04:38:01 +0200 Subject: [PATCH 279/989] gh-127840: pass flags and address from send_fds (GH-127841) socket: pass flags and address from send_fds Co-authored-by: Peter Bierma --- Lib/socket.py | 2 +- Lib/test/test_socket.py | 24 +++++++++++++++++++ ...-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst | 1 + 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst diff --git a/Lib/socket.py b/Lib/socket.py index 727b0e75f03595..2cbaf221a59dea 100644 --- a/Lib/socket.py +++ b/Lib/socket.py @@ -563,7 +563,7 @@ def send_fds(sock, buffers, fds, flags=0, address=None): import array return sock.sendmsg(buffers, [(_socket.SOL_SOCKET, - _socket.SCM_RIGHTS, array.array("i", fds))]) + _socket.SCM_RIGHTS, array.array("i", fds))], flags, address) __all__.append("send_fds") if hasattr(_socket.socket, "recvmsg"): diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 03c54151a2218f..04dfb682ec6831 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -7366,6 +7366,30 @@ def close_fds(fds): data = os.read(rfd, 100) self.assertEqual(data, str(index).encode()) + def testSendAndRecvFdsByAddress(self): + rfd, wfd = os.pipe() + self.addCleanup(os.close, rfd) + self.addCleanup(os.close, wfd) + + sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) + address = socket_helper.create_unix_domain_name() + self.addCleanup(os_helper.unlink, address) + socket_helper.bind_unix_socket(sock, address) + + socket.send_fds(sock, [MSG], [rfd], 0, address) + + # request more data and file descriptors than expected + msg, (rfd2,), flags, addr = socket.recv_fds(sock, len(MSG) * 2, 2) + self.addCleanup(os.close, rfd2) + self.assertEqual(msg, MSG) + self.assertEqual(flags, 0) + self.assertEqual(addr, address) + + # test that the file descriptor is connected + os.write(wfd, b'data') + data = os.read(rfd2, 100) + self.assertEqual(data, b'data') + class FreeThreadingTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst b/Misc/NEWS.d/next/Library/2024-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst new file mode 100644 index 00000000000000..51eaaa96ebaa07 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst @@ -0,0 +1 @@ +Fix :func:`socket.send_fds` ignoring flags and address parameters. From f3fc0c16e08b317cb201cf1073e934e6909f1251 Mon Sep 17 00:00:00 2001 From: Mike Salvatore Date: Wed, 21 May 2025 22:48:10 -0400 Subject: [PATCH 280/989] gh-134062: Fix hash collisions in IPv4Network and IPv6Network (GH-134063) gh-134062: Fix hash collisions in IPv4Network and IPv6Network gh-134062: Add hash collision regression test --- Lib/ipaddress.py | 2 +- Lib/test/test_ipaddress.py | 28 +++++++++++++++++++ ...-05-15-14-27-01.gh-issue-134062.fRbJet.rst | 3 ++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-15-14-27-01.gh-issue-134062.fRbJet.rst diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py index 703fa289dda1fb..d8a84f33264dc5 100644 --- a/Lib/ipaddress.py +++ b/Lib/ipaddress.py @@ -729,7 +729,7 @@ def __eq__(self, other): return NotImplemented def __hash__(self): - return hash(int(self.network_address) ^ int(self.netmask)) + return hash((int(self.network_address), int(self.netmask))) def __contains__(self, other): # always false if one is v4 and the other is v6. diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py index d04012d1afd540..a06608d0016253 100644 --- a/Lib/test/test_ipaddress.py +++ b/Lib/test/test_ipaddress.py @@ -2762,6 +2762,34 @@ def testV6HashIsNotConstant(self): ipv6_address2 = ipaddress.IPv6Interface("2001:658:22a:cafe:200:0:0:2") self.assertNotEqual(ipv6_address1.__hash__(), ipv6_address2.__hash__()) + # issue 134062 Hash collisions in IPv4Network and IPv6Network + def testNetworkV4HashCollisions(self): + self.assertNotEqual( + ipaddress.IPv4Network("192.168.1.255/32").__hash__(), + ipaddress.IPv4Network("192.168.1.0/24").__hash__() + ) + self.assertNotEqual( + ipaddress.IPv4Network("172.24.255.0/24").__hash__(), + ipaddress.IPv4Network("172.24.0.0/16").__hash__() + ) + self.assertNotEqual( + ipaddress.IPv4Network("192.168.1.87/32").__hash__(), + ipaddress.IPv4Network("192.168.1.86/31").__hash__() + ) + + # issue 134062 Hash collisions in IPv4Network and IPv6Network + def testNetworkV6HashCollisions(self): + self.assertNotEqual( + ipaddress.IPv6Network("fe80::/64").__hash__(), + ipaddress.IPv6Network("fe80::ffff:ffff:ffff:0/112").__hash__() + ) + self.assertNotEqual( + ipaddress.IPv4Network("10.0.0.0/8").__hash__(), + ipaddress.IPv6Network( + "ffff:ffff:ffff:ffff:ffff:ffff:aff:0/112" + ).__hash__() + ) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-05-15-14-27-01.gh-issue-134062.fRbJet.rst b/Misc/NEWS.d/next/Library/2025-05-15-14-27-01.gh-issue-134062.fRbJet.rst new file mode 100644 index 00000000000000..f62a3ec480193d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-15-14-27-01.gh-issue-134062.fRbJet.rst @@ -0,0 +1,3 @@ +:mod:`ipaddress`: fix collisions in :meth:`~object.__hash__` for +:class:`~ipaddress.IPv4Network` and :class:`~ipaddress.IPv6Network` +objects. From 296a66051ede5cc112ca38d17304e518ffb02e23 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Wed, 21 May 2025 23:38:44 -0400 Subject: [PATCH 281/989] gh-127840: Revert "gh-127840: pass flags and address from send_fds (GH-127841)" (#134482) Revert "gh-127840: pass flags and address from send_fds (GH-127841)" This reverts commit 518c95b5529ed3379b5a3065b09f71411efe72fb. --- Lib/socket.py | 2 +- Lib/test/test_socket.py | 24 ------------------- ...-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst | 1 - 3 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2024-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst diff --git a/Lib/socket.py b/Lib/socket.py index 2cbaf221a59dea..727b0e75f03595 100644 --- a/Lib/socket.py +++ b/Lib/socket.py @@ -563,7 +563,7 @@ def send_fds(sock, buffers, fds, flags=0, address=None): import array return sock.sendmsg(buffers, [(_socket.SOL_SOCKET, - _socket.SCM_RIGHTS, array.array("i", fds))], flags, address) + _socket.SCM_RIGHTS, array.array("i", fds))]) __all__.append("send_fds") if hasattr(_socket.socket, "recvmsg"): diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 04dfb682ec6831..03c54151a2218f 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -7366,30 +7366,6 @@ def close_fds(fds): data = os.read(rfd, 100) self.assertEqual(data, str(index).encode()) - def testSendAndRecvFdsByAddress(self): - rfd, wfd = os.pipe() - self.addCleanup(os.close, rfd) - self.addCleanup(os.close, wfd) - - sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) - address = socket_helper.create_unix_domain_name() - self.addCleanup(os_helper.unlink, address) - socket_helper.bind_unix_socket(sock, address) - - socket.send_fds(sock, [MSG], [rfd], 0, address) - - # request more data and file descriptors than expected - msg, (rfd2,), flags, addr = socket.recv_fds(sock, len(MSG) * 2, 2) - self.addCleanup(os.close, rfd2) - self.assertEqual(msg, MSG) - self.assertEqual(flags, 0) - self.assertEqual(addr, address) - - # test that the file descriptor is connected - os.write(wfd, b'data') - data = os.read(rfd2, 100) - self.assertEqual(data, b'data') - class FreeThreadingTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst b/Misc/NEWS.d/next/Library/2024-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst deleted file mode 100644 index 51eaaa96ebaa07..00000000000000 --- a/Misc/NEWS.d/next/Library/2024-12-11-20-15-00.gh-issue-127840.pt8fiQ.rst +++ /dev/null @@ -1 +0,0 @@ -Fix :func:`socket.send_fds` ignoring flags and address parameters. From 979d81a17905e922d32fb1671f9ed394e0ffbda6 Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 21 May 2025 23:49:07 -0400 Subject: [PATCH 282/989] gh-134309: Add ``github.actor`` to the GitHub Actions concurrency key (#134310) When inexperienced users create a PR from their default branch, all of the concurrency keys collide as there is no namespacing. This becomes an issue at events with many new contributors, where workflow runs are cancelled on other pull requests. Disambiguate by adding the username of the relevant 'actor' to the concurrency key. Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Sviatoslav Sydorenko Co-authored-by: C.A.M. Gerlach Authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- .github/workflows/build.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b192508c78685c..54ebc914b46821 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,13 @@ permissions: contents: read concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}-reusable + # https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#concurrency + # 'group' must be a key uniquely representing a PR or push event. + # github.workflow is the workflow name + # github.actor is the user invoking the workflow + # github.head_ref is the source branch of the PR or otherwise blank + # github.run_id is a unique number for the current run + group: ${{ github.workflow }}-${{ github.actor }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: From 7309eb60c077836b4b05a86ed23c710dbff737ec Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Thu, 22 May 2025 00:01:46 -0700 Subject: [PATCH 283/989] gh-131357: Add some extra tests for empty bytes and bytearray (#134458) --- Lib/test/test_capi/test_bytearray.py | 6 ++++++ Lib/test/test_capi/test_bytes.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/Lib/test/test_capi/test_bytearray.py b/Lib/test/test_capi/test_bytearray.py index dfa98de9f007d8..52565ea34c61b8 100644 --- a/Lib/test/test_capi/test_bytearray.py +++ b/Lib/test/test_capi/test_bytearray.py @@ -66,6 +66,7 @@ def test_fromobject(self): # Test PyByteArray_FromObject() fromobject = _testlimitedcapi.bytearray_fromobject + self.assertEqual(fromobject(b''), bytearray(b'')) self.assertEqual(fromobject(b'abc'), bytearray(b'abc')) self.assertEqual(fromobject(bytearray(b'abc')), bytearray(b'abc')) self.assertEqual(fromobject(ByteArraySubclass(b'abc')), bytearray(b'abc')) @@ -115,6 +116,7 @@ def test_concat(self): self.assertEqual(concat(b'abc', bytearray(b'def')), bytearray(b'abcdef')) self.assertEqual(concat(bytearray(b'abc'), b''), bytearray(b'abc')) self.assertEqual(concat(b'', bytearray(b'def')), bytearray(b'def')) + self.assertEqual(concat(bytearray(b''), bytearray(b'')), bytearray(b'')) self.assertEqual(concat(memoryview(b'xabcy')[1:4], b'def'), bytearray(b'abcdef')) self.assertEqual(concat(b'abc', memoryview(b'xdefy')[1:4]), @@ -150,6 +152,10 @@ def test_resize(self): self.assertEqual(resize(ba, 0), 0) self.assertEqual(ba, bytearray()) + ba = bytearray(b'') + self.assertEqual(resize(ba, 0), 0) + self.assertEqual(ba, bytearray()) + ba = ByteArraySubclass(b'abcdef') self.assertEqual(resize(ba, 3), 0) self.assertEqual(ba, bytearray(b'abc')) diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 86d5692893fc98..bc820bd68d9e21 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -81,6 +81,7 @@ def test_fromobject(self): # Test PyBytes_FromObject() fromobject = _testlimitedcapi.bytes_fromobject + self.assertEqual(fromobject(b''), b'') self.assertEqual(fromobject(b'abc'), b'abc') self.assertEqual(fromobject(bytearray(b'abc')), b'abc') self.assertEqual(fromobject(BytesSubclass(b'abc')), b'abc') @@ -138,6 +139,7 @@ def test_repr(self): # Test PyBytes_Repr() bytes_repr = _testlimitedcapi.bytes_repr + self.assertEqual(bytes_repr(b'', 0), r"""b''""") self.assertEqual(bytes_repr(b'''abc''', 0), r"""b'abc'""") self.assertEqual(bytes_repr(b'''abc''', 1), r"""b'abc'""") self.assertEqual(bytes_repr(b'''a'b"c"d''', 0), r"""b'a\'b"c"d'""") @@ -197,6 +199,7 @@ def test_decodeescape(self): """Test PyBytes_DecodeEscape()""" decodeescape = _testlimitedcapi.bytes_decodeescape + self.assertEqual(decodeescape(b''), b'') self.assertEqual(decodeescape(b'abc'), b'abc') self.assertEqual(decodeescape(br'\t\n\r\x0b\x0c\x00\\\'\"'), b'''\t\n\r\v\f\0\\'"''') From 1f0a294e8c2ff009c6b74ca5aa71da6269aec0dd Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 22 May 2025 09:36:37 +0200 Subject: [PATCH 284/989] Add notes on nogil & reinitialization to the Opt-Out section in Module Isolation HOWTO (GH-134141) Co-authored-by: Sam Gross --- Doc/howto/isolating-extensions.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index a636e06bda8344..5513cd7367519f 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -215,21 +215,36 @@ multiple interpreters correctly. If this is not yet the case for your module, you can explicitly make your module loadable only once per process. For example:: + // A process-wide flag static int loaded = 0; + // Mutex to provide thread safety (only needed for free-threaded Python) + static PyMutex modinit_mutex = {0}; + static int exec_module(PyObject* module) { + PyMutex_Lock(&modinit_mutex); if (loaded) { + PyMutex_Unlock(&modinit_mutex); PyErr_SetString(PyExc_ImportError, "cannot load module more than once per process"); return -1; } loaded = 1; + PyMutex_Unlock(&modinit_mutex); // ... rest of initialization } +If your module's :c:member:`PyModuleDef.m_clear` function is able to prepare +for future re-initialization, it should clear the ``loaded`` flag. +In this case, your module won't support multiple instances existing +*concurrently*, but it will, for example, support being loaded after +Python runtime shutdown (:c:func:`Py_FinalizeEx`) and re-initialization +(:c:func:`Py_Initialize`). + + Module State Access from Functions ---------------------------------- From 29f6dc6323759ab89ddc33096bf075037db5de5a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 22 May 2025 09:54:04 +0100 Subject: [PATCH 285/989] GH-133932: Tagged ints are heap safe (GH-134244) --- Include/internal/pycore_stackref.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 3ead6bc63c1d12..40ec00c8119108 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -238,6 +238,7 @@ PyStackRef_IsNullOrInt(_PyStackRef ref); #else #define Py_INT_TAG 3 +#define Py_TAG_REFCNT 1 static inline bool PyStackRef_IsTaggedInt(_PyStackRef i) @@ -263,7 +264,7 @@ PyStackRef_UntagInt(_PyStackRef i) #ifdef Py_GIL_DISABLED -#define Py_TAG_DEFERRED (1) +#define Py_TAG_DEFERRED Py_TAG_REFCNT #define Py_TAG_PTR ((uintptr_t)0) #define Py_TAG_BITS ((uintptr_t)1) @@ -441,14 +442,13 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) /* References to immortal objects always have their tag bit set to Py_TAG_REFCNT * as they can (must) have their reclamation deferred */ -#define Py_TAG_BITS 1 -#define Py_TAG_REFCNT 1 +#define Py_TAG_BITS 3 #if _Py_IMMORTAL_FLAGS != Py_TAG_REFCNT # error "_Py_IMMORTAL_FLAGS != Py_TAG_REFCNT" #endif #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) -#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) +#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_REFCNT))) #define PyStackRef_NULL_BITS Py_TAG_REFCNT static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; @@ -528,7 +528,7 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); #if SIZEOF_VOID_P > 4 - unsigned int tag = obj->ob_flags & Py_TAG_BITS; + unsigned int tag = obj->ob_flags & Py_TAG_REFCNT; #else unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_REFCNT : 0; #endif @@ -547,12 +547,6 @@ PyStackRef_FromPyObjectStealMortal(PyObject *obj) return ref; } -// Check if a stackref is exactly the same as another stackref, including the -// the deferred bit. This can only be used safely if you know that the deferred -// bits of `a` and `b` match. -#define PyStackRef_IsExactly(a, b) \ - (assert(((a).bits & Py_TAG_BITS) == ((b).bits & Py_TAG_BITS)), (a).bits == (b).bits) - static inline _PyStackRef _PyStackRef_FromPyObjectNew(PyObject *obj) { @@ -604,7 +598,7 @@ PyStackRef_DUP(_PyStackRef ref) static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return (ref.bits & Py_TAG_BITS) == 0 || ref.bits == PyStackRef_NULL_BITS || _Py_IsImmortal(BITS_TO_PTR_MASKED(ref)); + return (ref.bits & Py_TAG_BITS) != Py_TAG_REFCNT || ref.bits == PyStackRef_NULL_BITS || _Py_IsImmortal(BITS_TO_PTR_MASKED(ref)); } static inline _PyStackRef @@ -679,7 +673,7 @@ PyStackRef_XCLOSE(_PyStackRef ref) // Note: this is a macro because MSVC (Windows) has trouble inlining it. -#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_BITS)) == ((b).bits & (~Py_TAG_BITS))) +#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_REFCNT)) == ((b).bits & (~Py_TAG_REFCNT))) #endif // !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) From fbe7b87c0660e14075bc8e3c1b35b625da751316 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 22 May 2025 09:54:35 +0100 Subject: [PATCH 286/989] GH-131688: Mark instructions and uops with DECREF_INPUTS as escaping. (GH-133501) --- Include/internal/pycore_opcode_metadata.h | 24 +++++++++++------------ Include/internal/pycore_uop_metadata.h | 20 +++++++++---------- Tools/cases_generator/analyzer.py | 2 +- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index b7787653d257cc..39d6a912a5437f 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1083,7 +1083,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_LIST_SLICE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_OP_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [BINARY_OP_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1091,8 +1091,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BUILD_MAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_SET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BUILD_SLICE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, - [BUILD_STRING] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, + [BUILD_SLICE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BUILD_STRING] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_TEMPLATE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [CACHE] = { true, INSTR_FMT_IX, 0 }, @@ -1188,7 +1188,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, - [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD_JIT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD_NO_INTERRUPT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, @@ -1197,8 +1197,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [LIST_APPEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [LIST_EXTEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG }, - [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG }, [LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, @@ -1208,7 +1208,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, - [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_COMMON_CONSTANT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1244,8 +1244,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_ITER] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, + [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_TOP] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, @@ -1282,7 +1282,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, [TO_BOOL_INT] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, + [TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1290,8 +1290,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, - [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 5f8c4f3210abff..fc2c4c2924d9ad 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -75,7 +75,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_NOS_LIST] = HAS_EXIT_FLAG, [_GUARD_TOS_LIST] = HAS_EXIT_FLAG, [_GUARD_TOS_SLICE] = HAS_EXIT_FLAG, - [_TO_BOOL_LIST] = 0, + [_TO_BOOL_LIST] = HAS_ESCAPES_FLAG, [_TO_BOOL_NONE] = HAS_EXIT_FLAG, [_GUARD_NOS_UNICODE] = HAS_EXIT_FLAG, [_GUARD_TOS_UNICODE] = HAS_EXIT_FLAG, @@ -103,7 +103,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_SUBSCR_STR_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_GUARD_NOS_TUPLE] = HAS_EXIT_FLAG, [_GUARD_TOS_TUPLE] = HAS_EXIT_FLAG, - [_BINARY_OP_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, + [_BINARY_OP_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_GUARD_NOS_DICT] = HAS_EXIT_FLAG, [_GUARD_TOS_DICT] = HAS_EXIT_FLAG, [_BINARY_OP_SUBSCR_DICT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -130,8 +130,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DELETE_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_SEQUENCE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_SEQUENCE_TWO_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_EX] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -151,7 +151,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_DEREF] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG, [_COPY_FREE_VARS] = HAS_ARG_FLAG, - [_BUILD_STRING] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_BUILD_STRING] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BUILD_INTERPOLATION] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BUILD_TEMPLATE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BUILD_TUPLE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, @@ -173,9 +173,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_ATTR_INSTANCE_VALUE] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_ATTR_SLOT] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_SLOT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_CHECK_ATTR_CLASS] = HAS_EXIT_FLAG, - [_LOAD_ATTR_CLASS] = 0, + [_LOAD_ATTR_CLASS] = HAS_ESCAPES_FLAG, [_LOAD_ATTR_PROPERTY_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_GUARD_DORV_NO_DICT] = HAS_EXIT_FLAG, [_STORE_ATTR_INSTANCE_VALUE] = HAS_ESCAPES_FLAG, @@ -185,7 +185,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_COMPARE_OP_FLOAT] = HAS_ARG_FLAG, [_COMPARE_OP_INT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_COMPARE_OP_STR] = HAS_ARG_FLAG, - [_IS_OP] = HAS_ARG_FLAG, + [_IS_OP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_CONTAINS_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_TOS_ANY_SET] = HAS_DEOPT_FLAG, [_CONTAINS_OP_SET] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -194,7 +194,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_EXC_MATCH] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_IMPORT_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_IMPORT_FROM] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_IS_NONE] = 0, + [_IS_NONE] = HAS_ESCAPES_FLAG, [_GET_LEN] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MATCH_CLASS] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MATCH_MAPPING] = 0, @@ -283,7 +283,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_MAKE_FUNCTION] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_SET_FUNCTION_ATTRIBUTE] = HAS_ARG_FLAG, [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 825c72eaa1634a..96d64a27e0a6ed 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -832,7 +832,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: ) error_with_pop = has_error_with_pop(op) error_without_pop = has_error_without_pop(op) - escapes = bool(escaping_calls) + escapes = bool(escaping_calls) or variable_used(op, "DECREF_INPUTS") pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations return Properties( From bb244fd33d3eb67923ec3253568867bfaed9895f Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 22 May 2025 12:51:02 +0300 Subject: [PATCH 287/989] Consistent sentence case in docs template files (#134412) --- Doc/tools/templates/customsourcelink.html | 6 +++--- Doc/tools/templates/download.html | 2 +- Doc/tools/templates/indexcontent.html | 2 +- Doc/tools/templates/indexsidebar.html | 10 +++++----- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/tools/templates/customsourcelink.html b/Doc/tools/templates/customsourcelink.html index eb9db9e341bd75..43d3a7a892a880 100644 --- a/Doc/tools/templates/customsourcelink.html +++ b/Doc/tools/templates/customsourcelink.html @@ -1,11 +1,11 @@ {%- if show_source and has_source and sourcename %}
-

{{ _('This Page') }}

+

{{ _('This page') }}

diff --git a/Doc/tools/templates/download.html b/Doc/tools/templates/download.html index 4645f7d394e29e..47a57eb111ba50 100644 --- a/Doc/tools/templates/download.html +++ b/Doc/tools/templates/download.html @@ -27,7 +27,7 @@ {%- endblock -%} {% block body %} -

{% trans %}Download Python {{ dl_version }} Documentation{% endtrans %}

+

{% trans %}Download Python {{ dl_version }} documentation{% endtrans %}

{% if last_updated %}

{% trans %}Last updated on: {{ last_updated }}.{% endtrans %}

{% endif %} diff --git a/Doc/tools/templates/indexcontent.html b/Doc/tools/templates/indexcontent.html index 06a4223643a05a..544cc4234f441e 100644 --- a/Doc/tools/templates/indexcontent.html +++ b/Doc/tools/templates/indexcontent.html @@ -72,7 +72,7 @@

{{ docstitle|e }}

- + diff --git a/Doc/tools/templates/indexsidebar.html b/Doc/tools/templates/indexsidebar.html index eea29e2449a9cf..086f15662cf87b 100644 --- a/Doc/tools/templates/indexsidebar.html +++ b/Doc/tools/templates/indexsidebar.html @@ -9,9 +9,9 @@

{% trans %}Docs by version{% endtrans %}

{% trans %}Other resources{% endtrans %}

From 2602d8ae981c4bae1cada2c174b367d97f712efb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 22 May 2025 13:17:22 +0300 Subject: [PATCH 288/989] gh-71339: Use new assertion methods in tests (GH-129046) --- Lib/test/_test_embed_structseq.py | 2 +- Lib/test/datetimetester.py | 12 ++++---- Lib/test/mapping_tests.py | 4 +-- Lib/test/pickletester.py | 4 +-- Lib/test/support/warnings_helper.py | 3 +- Lib/test/test__osx_support.py | 4 +-- Lib/test/test_abstract_numbers.py | 30 ++++++++++---------- Lib/test/test_argparse.py | 2 +- Lib/test/test_ast/test_ast.py | 18 ++++++------ Lib/test/test_audit.py | 2 +- Lib/test/test_base64.py | 2 +- Lib/test/test_baseexception.py | 8 ++---- Lib/test/test_binascii.py | 6 ++-- Lib/test/test_binop.py | 2 +- Lib/test/test_buffer.py | 4 +-- Lib/test/test_builtin.py | 4 +-- Lib/test/test_bytes.py | 10 +++---- Lib/test/test_bz2.py | 4 +-- Lib/test/test_calendar.py | 2 +- Lib/test/test_call.py | 4 +-- Lib/test/test_clinic.py | 23 ++++++--------- Lib/test/test_cmd_line.py | 11 +++---- Lib/test/test_cmd_line_script.py | 11 ++++--- Lib/test/test_code_module.py | 2 +- Lib/test/test_codecs.py | 2 +- Lib/test/test_compileall.py | 2 +- Lib/test/test_compiler_assemble.py | 2 +- Lib/test/test_contextlib.py | 8 +++--- Lib/test/test_contextlib_async.py | 8 +++--- Lib/test/test_copy.py | 2 +- Lib/test/test_coroutines.py | 2 +- Lib/test/test_curses.py | 2 +- Lib/test/test_dataclasses/__init__.py | 24 ++++++++-------- Lib/test/test_dbm.py | 2 +- Lib/test/test_dbm_sqlite3.py | 4 +-- Lib/test/test_deque.py | 2 +- Lib/test/test_descr.py | 18 ++++++------ Lib/test/test_dict.py | 4 +-- Lib/test/test_dynamicclassattribute.py | 4 +-- Lib/test/test_enum.py | 18 ++++++------ Lib/test/test_errno.py | 6 ++-- Lib/test/test_exception_group.py | 10 +++---- Lib/test/test_exceptions.py | 20 ++++++------- Lib/test/test_fileinput.py | 2 +- Lib/test/test_gc.py | 2 +- Lib/test/test_genericalias.py | 10 +++---- Lib/test/test_genericpath.py | 4 +-- Lib/test/test_gzip.py | 8 +++--- Lib/test/test_hashlib.py | 4 +-- Lib/test/test_inspect/test_inspect.py | 10 +++---- Lib/test/test_int.py | 2 +- Lib/test/test_io.py | 18 ++++++------ Lib/test/test_json/test_fail.py | 2 +- Lib/test/test_json/test_tool.py | 2 +- Lib/test/test_launcher.py | 8 +++--- Lib/test/test_lzma.py | 4 +-- Lib/test/test_memoryio.py | 4 +-- Lib/test/test_ordered_dict.py | 8 +++--- Lib/test/test_os.py | 14 ++++----- Lib/test/test_pathlib/test_pathlib.py | 8 +++--- Lib/test/test_peepholer.py | 2 +- Lib/test/test_peg_generator/test_c_parser.py | 4 +-- Lib/test/test_peg_generator/test_pegen.py | 6 ++-- Lib/test/test_perf_profiler.py | 4 +-- Lib/test/test_pickle.py | 4 +-- Lib/test/test_platform.py | 2 +- Lib/test/test_posix.py | 12 ++++---- Lib/test/test_property.py | 4 +-- Lib/test/test_pulldom.py | 4 +-- Lib/test/test_pyclbr.py | 2 +- Lib/test/test_pydoc/test_pydoc.py | 2 +- Lib/test/test_random.py | 12 ++++---- Lib/test/test_re.py | 8 +++--- Lib/test/test_reprlib.py | 20 ++++++------- Lib/test/test_rlcompleter.py | 2 +- Lib/test/test_runpy.py | 2 +- Lib/test/test_scope.py | 2 +- Lib/test/test_script_helper.py | 3 +- Lib/test/test_set.py | 6 ++-- Lib/test/test_shutil.py | 6 ++-- Lib/test/test_site.py | 22 +++++++------- Lib/test/test_socket.py | 22 +++++++------- Lib/test/test_source_encoding.py | 3 +- Lib/test/test_ssl.py | 10 +++---- Lib/test/test_stat.py | 6 ++-- Lib/test/test_statistics.py | 21 ++++---------- Lib/test/test_structseq.py | 4 +-- Lib/test/test_subprocess.py | 8 +++--- Lib/test/test_super.py | 10 +++---- Lib/test/test_support.py | 4 +-- Lib/test/test_sys.py | 26 ++++++++--------- Lib/test/test_sysconfig.py | 10 +++---- Lib/test/test_tarfile.py | 2 +- Lib/test/test_tempfile.py | 16 +++++------ Lib/test/test_termios.py | 4 +-- Lib/test/test_time.py | 12 ++++---- Lib/test/test_timeit.py | 4 +-- Lib/test/test_tkinter/support.py | 2 +- Lib/test/test_tkinter/test_misc.py | 24 ++++++++-------- Lib/test/test_type_comments.py | 2 +- Lib/test/test_types.py | 22 +++++++------- Lib/test/test_unittest/test_case.py | 8 +++--- Lib/test/test_userdict.py | 2 +- Lib/test/test_venv.py | 4 +-- Lib/test/test_warnings/__init__.py | 10 +++---- Lib/test/test_weakref.py | 6 ++-- Lib/test/test_weakset.py | 4 +-- Lib/test/test_winconsoleio.py | 6 ++-- Lib/test/test_with.py | 2 +- Lib/test/test_wmi.py | 4 +-- Lib/test/test_wsgiref.py | 14 ++++----- Lib/test/test_xml_etree.py | 5 ++-- Lib/test/test_xxlimited.py | 2 +- Lib/test/test_zipapp.py | 6 ++-- Lib/test/test_zipfile/test_core.py | 6 ++-- Lib/test/test_zipimport.py | 4 +-- Lib/test/test_zoneinfo/test_zoneinfo.py | 4 +-- 117 files changed, 407 insertions(+), 445 deletions(-) diff --git a/Lib/test/_test_embed_structseq.py b/Lib/test/_test_embed_structseq.py index 154662efce9412..4cac84d7a469ac 100644 --- a/Lib/test/_test_embed_structseq.py +++ b/Lib/test/_test_embed_structseq.py @@ -11,7 +11,7 @@ def check_structseq(self, obj_type): # ob_refcnt self.assertGreaterEqual(sys.getrefcount(obj_type), 1) # tp_base - self.assertTrue(issubclass(obj_type, tuple)) + self.assertIsSubclass(obj_type, tuple) # tp_bases self.assertEqual(obj_type.__bases__, (tuple,)) # tp_dict diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index df5e45f5f20e32..93b3382b9c654e 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -183,7 +183,7 @@ class NotEnough(tzinfo): def __init__(self, offset, name): self.__offset = offset self.__name = name - self.assertTrue(issubclass(NotEnough, tzinfo)) + self.assertIsSubclass(NotEnough, tzinfo) ne = NotEnough(3, "NotByALongShot") self.assertIsInstance(ne, tzinfo) @@ -232,7 +232,7 @@ def test_pickling_subclass(self): self.assertIs(type(derived), otype) self.assertEqual(derived.utcoffset(None), offset) self.assertEqual(derived.tzname(None), oname) - self.assertFalse(hasattr(derived, 'spam')) + self.assertNotHasAttr(derived, 'spam') def test_issue23600(self): DSTDIFF = DSTOFFSET = timedelta(hours=1) @@ -813,7 +813,7 @@ def test_roundtrip(self): # Verify td -> string -> td identity. s = repr(td) - self.assertTrue(s.startswith('datetime.')) + self.assertStartsWith(s, 'datetime.') s = s[9:] td2 = eval(s) self.assertEqual(td, td2) @@ -1231,7 +1231,7 @@ def test_roundtrip(self): self.theclass.today()): # Verify dt -> string -> date identity. s = repr(dt) - self.assertTrue(s.startswith('datetime.')) + self.assertStartsWith(s, 'datetime.') s = s[9:] dt2 = eval(s) self.assertEqual(dt, dt2) @@ -2218,7 +2218,7 @@ def test_roundtrip(self): self.theclass.now()): # Verify dt -> string -> datetime identity. s = repr(dt) - self.assertTrue(s.startswith('datetime.')) + self.assertStartsWith(s, 'datetime.') s = s[9:] dt2 = eval(s) self.assertEqual(dt, dt2) @@ -3687,7 +3687,7 @@ def test_roundtrip(self): # Verify t -> string -> time identity. s = repr(t) - self.assertTrue(s.startswith('datetime.')) + self.assertStartsWith(s, 'datetime.') s = s[9:] t2 = eval(s) self.assertEqual(t, t2) diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py index 9d38da5a86ee5a..20306e1526d7b8 100644 --- a/Lib/test/mapping_tests.py +++ b/Lib/test/mapping_tests.py @@ -70,8 +70,8 @@ def test_read(self): if not d: self.fail("Full mapping must compare to True") # keys(), items(), iterkeys() ... def check_iterandlist(iter, lst, ref): - self.assertTrue(hasattr(iter, '__next__')) - self.assertTrue(hasattr(iter, '__iter__')) + self.assertHasAttr(iter, '__next__') + self.assertHasAttr(iter, '__iter__') x = list(iter) self.assertTrue(set(x)==set(lst)==set(ref)) check_iterandlist(iter(d.keys()), list(d.keys()), diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index dcba6369541049..9d6ae3e4d00ece 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -3068,7 +3068,7 @@ def test_proto(self): pickled = self.dumps(None, proto) if proto >= 2: proto_header = pickle.PROTO + bytes([proto]) - self.assertTrue(pickled.startswith(proto_header)) + self.assertStartsWith(pickled, proto_header) else: self.assertEqual(count_opcode(pickle.PROTO, pickled), 0) @@ -5007,7 +5007,7 @@ def test_default_dispatch_table(self): p = self.pickler_class(f, 0) with self.assertRaises(AttributeError): p.dispatch_table - self.assertFalse(hasattr(p, 'dispatch_table')) + self.assertNotHasAttr(p, 'dispatch_table') def test_class_dispatch_table(self): # A dispatch_table attribute can be specified class-wide diff --git a/Lib/test/support/warnings_helper.py b/Lib/test/support/warnings_helper.py index a6e43dff2003b7..5f6f14afd74a6e 100644 --- a/Lib/test/support/warnings_helper.py +++ b/Lib/test/support/warnings_helper.py @@ -23,8 +23,7 @@ def check_syntax_warning(testcase, statement, errtext='', testcase.assertEqual(len(warns), 1, warns) warn, = warns - testcase.assertTrue(issubclass(warn.category, SyntaxWarning), - warn.category) + testcase.assertIsSubclass(warn.category, SyntaxWarning) if errtext: testcase.assertRegex(str(warn.message), errtext) testcase.assertEqual(warn.filename, '') diff --git a/Lib/test/test__osx_support.py b/Lib/test/test__osx_support.py index 53aa26620a6475..0813c4804c1cdc 100644 --- a/Lib/test/test__osx_support.py +++ b/Lib/test/test__osx_support.py @@ -66,8 +66,8 @@ def test__find_build_tool(self): 'cc not found - check xcode-select') def test__get_system_version(self): - self.assertTrue(platform.mac_ver()[0].startswith( - _osx_support._get_system_version())) + self.assertStartsWith(platform.mac_ver()[0], + _osx_support._get_system_version()) def test__remove_original_values(self): config_vars = { diff --git a/Lib/test/test_abstract_numbers.py b/Lib/test/test_abstract_numbers.py index 72232b670cdb89..cf071d2c933dd2 100644 --- a/Lib/test/test_abstract_numbers.py +++ b/Lib/test/test_abstract_numbers.py @@ -24,11 +24,11 @@ def not_implemented(*args, **kwargs): class TestNumbers(unittest.TestCase): def test_int(self): - self.assertTrue(issubclass(int, Integral)) - self.assertTrue(issubclass(int, Rational)) - self.assertTrue(issubclass(int, Real)) - self.assertTrue(issubclass(int, Complex)) - self.assertTrue(issubclass(int, Number)) + self.assertIsSubclass(int, Integral) + self.assertIsSubclass(int, Rational) + self.assertIsSubclass(int, Real) + self.assertIsSubclass(int, Complex) + self.assertIsSubclass(int, Number) self.assertEqual(7, int(7).real) self.assertEqual(0, int(7).imag) @@ -38,11 +38,11 @@ def test_int(self): self.assertEqual(1, int(7).denominator) def test_float(self): - self.assertFalse(issubclass(float, Integral)) - self.assertFalse(issubclass(float, Rational)) - self.assertTrue(issubclass(float, Real)) - self.assertTrue(issubclass(float, Complex)) - self.assertTrue(issubclass(float, Number)) + self.assertNotIsSubclass(float, Integral) + self.assertNotIsSubclass(float, Rational) + self.assertIsSubclass(float, Real) + self.assertIsSubclass(float, Complex) + self.assertIsSubclass(float, Number) self.assertEqual(7.3, float(7.3).real) self.assertEqual(0, float(7.3).imag) @@ -50,11 +50,11 @@ def test_float(self): self.assertEqual(-7.3, float(-7.3).conjugate()) def test_complex(self): - self.assertFalse(issubclass(complex, Integral)) - self.assertFalse(issubclass(complex, Rational)) - self.assertFalse(issubclass(complex, Real)) - self.assertTrue(issubclass(complex, Complex)) - self.assertTrue(issubclass(complex, Number)) + self.assertNotIsSubclass(complex, Integral) + self.assertNotIsSubclass(complex, Rational) + self.assertNotIsSubclass(complex, Real) + self.assertIsSubclass(complex, Complex) + self.assertIsSubclass(complex, Number) c1, c2 = complex(3, 2), complex(4,1) # XXX: This is not ideal, but see the comment in math_trunc(). diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 58853ba4eb3674..08ff41368d9bb0 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -6805,7 +6805,7 @@ class TestImportStar(TestCase): def test(self): for name in argparse.__all__: - self.assertTrue(hasattr(argparse, name)) + self.assertHasAttr(argparse, name) def test_all_exports_everything_but_modules(self): items = [ diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 0776559b9003db..1479a8eafd62ec 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -275,12 +275,12 @@ def test_alias(self): self.assertEqual(alias.end_col_offset, 17) def test_base_classes(self): - self.assertTrue(issubclass(ast.For, ast.stmt)) - self.assertTrue(issubclass(ast.Name, ast.expr)) - self.assertTrue(issubclass(ast.stmt, ast.AST)) - self.assertTrue(issubclass(ast.expr, ast.AST)) - self.assertTrue(issubclass(ast.comprehension, ast.AST)) - self.assertTrue(issubclass(ast.Gt, ast.AST)) + self.assertIsSubclass(ast.For, ast.stmt) + self.assertIsSubclass(ast.Name, ast.expr) + self.assertIsSubclass(ast.stmt, ast.AST) + self.assertIsSubclass(ast.expr, ast.AST) + self.assertIsSubclass(ast.comprehension, ast.AST) + self.assertIsSubclass(ast.Gt, ast.AST) def test_field_attr_existence(self): for name, item in ast.__dict__.items(): @@ -1101,7 +1101,7 @@ def test_copy_with_parents(self): def test_replace_interface(self): for klass in self.iter_ast_classes(): with self.subTest(klass=klass): - self.assertTrue(hasattr(klass, '__replace__')) + self.assertHasAttr(klass, '__replace__') fields = set(klass._fields) with self.subTest(klass=klass, fields=fields): @@ -1330,7 +1330,7 @@ def test_replace_reject_known_custom_instance_fields_commits(self): context = node.ctx # explicit rejection of known instance fields - self.assertTrue(hasattr(node, 'extra')) + self.assertHasAttr(node, 'extra') msg = "Name.__replace__ got an unexpected keyword argument 'extra'." with self.assertRaisesRegex(TypeError, re.escape(msg)): copy.replace(node, extra=1) @@ -3071,7 +3071,7 @@ def test_FunctionDef(self): with self.assertWarnsRegex(DeprecationWarning, r"FunctionDef\.__init__ missing 1 required positional argument: 'name'"): node = ast.FunctionDef(args=args) - self.assertFalse(hasattr(node, "name")) + self.assertNotHasAttr(node, "name") self.assertEqual(node.decorator_list, []) node = ast.FunctionDef(name='foo', args=args) self.assertEqual(node.name, 'foo') diff --git a/Lib/test/test_audit.py b/Lib/test/test_audit.py index 2b24b5d79275fa..5f9eb381f605d9 100644 --- a/Lib/test/test_audit.py +++ b/Lib/test/test_audit.py @@ -134,7 +134,7 @@ def test_socket(self): self.assertEqual(events[0][0], "socket.gethostname") self.assertEqual(events[1][0], "socket.__new__") self.assertEqual(events[2][0], "socket.bind") - self.assertTrue(events[2][2].endswith("('127.0.0.1', 8080)")) + self.assertEndsWith(events[2][2], "('127.0.0.1', 8080)") def test_gc(self): returncode, events, stderr = self.run_python("test_gc") diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 9efebc43d911c4..ce2e3e3726fcd0 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -812,7 +812,7 @@ def test_decode_nonascii_str(self): self.assertRaises(ValueError, f, 'with non-ascii \xcb') def test_ErrorHeritage(self): - self.assertTrue(issubclass(binascii.Error, ValueError)) + self.assertIsSubclass(binascii.Error, ValueError) def test_RFC4648_test_cases(self): # test cases from RFC 4648 section 10 diff --git a/Lib/test/test_baseexception.py b/Lib/test/test_baseexception.py index e599b02c17d9c0..12d4088842b119 100644 --- a/Lib/test/test_baseexception.py +++ b/Lib/test/test_baseexception.py @@ -10,13 +10,11 @@ class ExceptionClassTests(unittest.TestCase): inheritance hierarchy)""" def test_builtins_new_style(self): - self.assertTrue(issubclass(Exception, object)) + self.assertIsSubclass(Exception, object) def verify_instance_interface(self, ins): for attr in ("args", "__str__", "__repr__"): - self.assertTrue(hasattr(ins, attr), - "%s missing %s attribute" % - (ins.__class__.__name__, attr)) + self.assertHasAttr(ins, attr) def test_inheritance(self): # Make sure the inheritance hierarchy matches the documentation @@ -65,7 +63,7 @@ def test_inheritance(self): elif last_depth > depth: while superclasses[-1][0] >= depth: superclasses.pop() - self.assertTrue(issubclass(exc, superclasses[-1][1]), + self.assertIsSubclass(exc, superclasses[-1][1], "%s is not a subclass of %s" % (exc.__name__, superclasses[-1][1].__name__)) try: # Some exceptions require arguments; just skip them diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 1f3b6746ce4a62..7ed7d7c47b6de1 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -38,13 +38,13 @@ def assertConversion(self, original, converted, restored, **kwargs): def test_exceptions(self): # Check module exceptions - self.assertTrue(issubclass(binascii.Error, Exception)) - self.assertTrue(issubclass(binascii.Incomplete, Exception)) + self.assertIsSubclass(binascii.Error, Exception) + self.assertIsSubclass(binascii.Incomplete, Exception) def test_functions(self): # Check presence of all functions for name in all_functions: - self.assertTrue(hasattr(getattr(binascii, name), '__call__')) + self.assertHasAttr(getattr(binascii, name), '__call__') self.assertRaises(TypeError, getattr(binascii, name)) def test_returned_value(self): diff --git a/Lib/test/test_binop.py b/Lib/test/test_binop.py index 299af09c4983df..b224c3d4e6078e 100644 --- a/Lib/test/test_binop.py +++ b/Lib/test/test_binop.py @@ -383,7 +383,7 @@ def test_comparison_orders(self): self.assertEqual(op_sequence(le, B, C), ['C.__ge__', 'B.__le__']) self.assertEqual(op_sequence(le, C, B), ['C.__le__', 'B.__ge__']) - self.assertTrue(issubclass(V, B)) + self.assertIsSubclass(V, B) self.assertEqual(op_sequence(eq, B, V), ['B.__eq__', 'V.__eq__']) self.assertEqual(op_sequence(le, B, V), ['B.__le__', 'V.__ge__']) diff --git a/Lib/test/test_buffer.py b/Lib/test/test_buffer.py index 61921e93e85e63..19582e757161fc 100644 --- a/Lib/test/test_buffer.py +++ b/Lib/test/test_buffer.py @@ -2879,11 +2879,11 @@ def test_memoryview_tolist(self): def test_memoryview_repr(self): m = memoryview(bytearray(9)) r = m.__repr__() - self.assertTrue(r.startswith("Calendar for 2004', output) def test_html_output_current_year(self): diff --git a/Lib/test/test_call.py b/Lib/test/test_call.py index 185ae84dc4d19f..1c73aaafb71fd5 100644 --- a/Lib/test/test_call.py +++ b/Lib/test/test_call.py @@ -695,8 +695,8 @@ class DerivedType(SuperType): UnaffectedType2 = _testcapi.make_vectorcall_class(SuperType) # Aside: Quickly check that the C helper actually made derived types - self.assertTrue(issubclass(UnaffectedType1, DerivedType)) - self.assertTrue(issubclass(UnaffectedType2, SuperType)) + self.assertIsSubclass(UnaffectedType1, DerivedType) + self.assertIsSubclass(UnaffectedType2, SuperType) # Initial state: tp_call self.assertEqual(instance(), "tp_call") diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index f7fc3b3873335f..580d54e0eb094d 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -238,11 +238,11 @@ def test_directive_output_print(self): # The generated output will differ for every run, but we can check that # it starts with the clinic block, we check that it contains all the # expected fields, and we check that it contains the checksum line. - self.assertTrue(out.startswith(dedent(""" + self.assertStartsWith(out, dedent(""" /*[clinic input] output print 'I told you once.' [clinic start generated code]*/ - """))) + """)) fields = { "cpp_endif", "cpp_if", @@ -259,9 +259,7 @@ def test_directive_output_print(self): with self.subTest(field=field): self.assertIn(field, out) last_line = out.rstrip().split("\n")[-1] - self.assertTrue( - last_line.startswith("/*[clinic end generated code: output=") - ) + self.assertStartsWith(last_line, "/*[clinic end generated code: output=") def test_directive_wrong_arg_number(self): raw = dedent(""" @@ -2705,8 +2703,7 @@ def test_cli_force(self): # Note, we cannot check the entire fail msg, because the path to # the tmp file will change for every run. _, err = self.expect_failure(fn) - self.assertTrue(err.endswith(fail_msg), - f"{err!r} does not end with {fail_msg!r}") + self.assertEndsWith(err, fail_msg) # Then, force regeneration; success expected. out = self.expect_success("-f", fn) self.assertEqual(out, "") @@ -2717,8 +2714,7 @@ def test_cli_force(self): ) with open(fn, encoding='utf-8') as f: generated = f.read() - self.assertTrue(generated.endswith(checksum), - (generated, checksum)) + self.assertEndsWith(generated, checksum) def test_cli_make(self): c_code = dedent(""" @@ -2867,8 +2863,8 @@ def test_cli_converters(self): # param may change (it's a set, thus unordered). So, let's compare the # start and end of the expected output, and then assert that the # converters appear lined up in alphabetical order. - self.assertTrue(out.startswith(prelude), out) - self.assertTrue(out.endswith(finale), out) + self.assertStartsWith(out, prelude) + self.assertEndsWith(out, finale) out = out.removeprefix(prelude) out = out.removesuffix(finale) @@ -2876,10 +2872,7 @@ def test_cli_converters(self): for converter, line in zip(expected_converters, lines): line = line.lstrip() with self.subTest(converter=converter): - self.assertTrue( - line.startswith(converter), - f"expected converter {converter!r}, got {line!r}" - ) + self.assertStartsWith(line, converter) def test_cli_fail_converters_and_filename(self): _, err = self.expect_failure("--converters", "test.c") diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index f540973c11e01c..c17d749d4a17ed 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -39,7 +39,8 @@ def test_directories(self): def verify_valid_flag(self, cmd_line): rc, out, err = assert_python_ok(cmd_line) - self.assertTrue(out == b'' or out.endswith(b'\n')) + if out != b'': + self.assertEndsWith(out, b'\n') self.assertNotIn(b'Traceback', out) self.assertNotIn(b'Traceback', err) return out @@ -89,8 +90,8 @@ def test_version(self): version = ('Python %d.%d' % sys.version_info[:2]).encode("ascii") for switch in '-V', '--version', '-VV': rc, out, err = assert_python_ok(switch) - self.assertFalse(err.startswith(version)) - self.assertTrue(out.startswith(version)) + self.assertNotStartsWith(err, version) + self.assertStartsWith(out, version) def test_verbose(self): # -v causes imports to write to stderr. If the write to @@ -380,7 +381,7 @@ def test_unbuffered_input(self): p.stdin.flush() data, rc = _kill_python_and_exit_code(p) self.assertEqual(rc, 0) - self.assertTrue(data.startswith(b'x'), data) + self.assertStartsWith(data, b'x') def test_large_PYTHONPATH(self): path1 = "ABCDE" * 100 @@ -1039,7 +1040,7 @@ def test_parsing_error(self): stderr=subprocess.PIPE, text=True) err_msg = "Unknown option: --unknown-option\nusage: " - self.assertTrue(proc.stderr.startswith(err_msg), proc.stderr) + self.assertStartsWith(proc.stderr, err_msg) self.assertNotEqual(proc.returncode, 0) def test_int_max_str_digits(self): diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 53dc9b1a7effb5..784c45aa96f8a7 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -553,9 +553,9 @@ def test_pep_409_verbiage(self): exitcode, stdout, stderr = assert_python_failure(script_name) text = stderr.decode('ascii').split('\n') self.assertEqual(len(text), 5) - self.assertTrue(text[0].startswith('Traceback')) - self.assertTrue(text[1].startswith(' File ')) - self.assertTrue(text[3].startswith('NameError')) + self.assertStartsWith(text[0], 'Traceback') + self.assertStartsWith(text[1], ' File ') + self.assertStartsWith(text[3], 'NameError') def test_non_ascii(self): # Apple platforms deny the creation of a file with an invalid UTF-8 name. @@ -708,9 +708,8 @@ def test_syntaxerror_does_not_crash(self): exitcode, stdout, stderr = assert_python_failure(script_name) text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read() # It used to crash in https://github.com/python/cpython/issues/111132 - self.assertTrue(text.endswith( - 'SyntaxError: nonlocal declaration not allowed at module level\n', - ), text) + self.assertEndsWith(text, + 'SyntaxError: nonlocal declaration not allowed at module level\n') def test_consistent_sys_path_for_direct_execution(self): # This test case ensures that the following all give the same diff --git a/Lib/test/test_code_module.py b/Lib/test/test_code_module.py index 57fb130070b34e..3642b47c2c1f03 100644 --- a/Lib/test/test_code_module.py +++ b/Lib/test/test_code_module.py @@ -133,7 +133,7 @@ def test_unicode_error(self): output = ''.join(''.join(call[1]) for call in self.stderr.method_calls) output = output[output.index('(InteractiveConsole)'):] output = output[output.index('\n') + 1:] - self.assertTrue(output.startswith('UnicodeEncodeError: '), output) + self.assertStartsWith(output, 'UnicodeEncodeError: ') self.assertIs(self.sysmod.last_type, UnicodeEncodeError) self.assertIs(type(self.sysmod.last_value), UnicodeEncodeError) self.assertIsNone(self.sysmod.last_traceback) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 1d6136405f428c..8c9a0972492294 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3802,7 +3802,7 @@ def check_decode_strings(self, errors): with self.assertRaises(RuntimeError) as cm: self.decode(encoded, errors) errmsg = str(cm.exception) - self.assertTrue(errmsg.startswith("decode error: "), errmsg) + self.assertStartsWith(errmsg, "decode error: ") else: decoded = self.decode(encoded, errors) self.assertEqual(decoded, expected) diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index a580a240d9f474..8384c183dd92dd 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -316,7 +316,7 @@ def _test_ddir_only(self, *, ddir, parallel=True): self.assertTrue(mods) for mod in mods: - self.assertTrue(mod.startswith(self.directory), mod) + self.assertStartsWith(mod, self.directory) modcode = importlib.util.cache_from_source(mod) modpath = mod[len(self.directory+os.sep):] _, _, err = script_helper.assert_python_failure(modcode) diff --git a/Lib/test/test_compiler_assemble.py b/Lib/test/test_compiler_assemble.py index c4962e3599986e..99a11e99d56485 100644 --- a/Lib/test/test_compiler_assemble.py +++ b/Lib/test/test_compiler_assemble.py @@ -146,4 +146,4 @@ def test_exception_table(self): L1 to L2 -> L2 [0] L2 to L3 -> L3 [1] lasti """) - self.assertTrue(output.getvalue().endswith(exc_table)) + self.assertEndsWith(output.getvalue(), exc_table) diff --git a/Lib/test/test_contextlib.py b/Lib/test/test_contextlib.py index cf6519598037e9..6a3329fa5aaace 100644 --- a/Lib/test/test_contextlib.py +++ b/Lib/test/test_contextlib.py @@ -48,23 +48,23 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): return None - self.assertTrue(issubclass(ManagerFromScratch, AbstractContextManager)) + self.assertIsSubclass(ManagerFromScratch, AbstractContextManager) class DefaultEnter(AbstractContextManager): def __exit__(self, *args): super().__exit__(*args) - self.assertTrue(issubclass(DefaultEnter, AbstractContextManager)) + self.assertIsSubclass(DefaultEnter, AbstractContextManager) class NoEnter(ManagerFromScratch): __enter__ = None - self.assertFalse(issubclass(NoEnter, AbstractContextManager)) + self.assertNotIsSubclass(NoEnter, AbstractContextManager) class NoExit(ManagerFromScratch): __exit__ = None - self.assertFalse(issubclass(NoExit, AbstractContextManager)) + self.assertNotIsSubclass(NoExit, AbstractContextManager) class ContextManagerTestCase(unittest.TestCase): diff --git a/Lib/test/test_contextlib_async.py b/Lib/test/test_contextlib_async.py index 7750186e56a5cc..dcd0072037950e 100644 --- a/Lib/test/test_contextlib_async.py +++ b/Lib/test/test_contextlib_async.py @@ -77,23 +77,23 @@ async def __aenter__(self): async def __aexit__(self, exc_type, exc_value, traceback): return None - self.assertTrue(issubclass(ManagerFromScratch, AbstractAsyncContextManager)) + self.assertIsSubclass(ManagerFromScratch, AbstractAsyncContextManager) class DefaultEnter(AbstractAsyncContextManager): async def __aexit__(self, *args): await super().__aexit__(*args) - self.assertTrue(issubclass(DefaultEnter, AbstractAsyncContextManager)) + self.assertIsSubclass(DefaultEnter, AbstractAsyncContextManager) class NoneAenter(ManagerFromScratch): __aenter__ = None - self.assertFalse(issubclass(NoneAenter, AbstractAsyncContextManager)) + self.assertNotIsSubclass(NoneAenter, AbstractAsyncContextManager) class NoneAexit(ManagerFromScratch): __aexit__ = None - self.assertFalse(issubclass(NoneAexit, AbstractAsyncContextManager)) + self.assertNotIsSubclass(NoneAexit, AbstractAsyncContextManager) class AsyncContextManagerTestCase(unittest.TestCase): diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py index d76341417e9bef..3cb8072abefe98 100644 --- a/Lib/test/test_copy.py +++ b/Lib/test/test_copy.py @@ -19,7 +19,7 @@ class TestCopy(unittest.TestCase): def test_exceptions(self): self.assertIs(copy.Error, copy.error) - self.assertTrue(issubclass(copy.Error, Exception)) + self.assertIsSubclass(copy.Error, Exception) # The copy() method diff --git a/Lib/test/test_coroutines.py b/Lib/test/test_coroutines.py index 761cb230277bd9..4755046fe1904d 100644 --- a/Lib/test/test_coroutines.py +++ b/Lib/test/test_coroutines.py @@ -527,7 +527,7 @@ class CoroutineTest(unittest.TestCase): def test_gen_1(self): def gen(): yield - self.assertFalse(hasattr(gen, '__await__')) + self.assertNotHasAttr(gen, '__await__') def test_func_1(self): async def foo(): diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py index feca82681deb7c..d5ca7f2ca1ae65 100644 --- a/Lib/test/test_curses.py +++ b/Lib/test/test_curses.py @@ -1260,7 +1260,7 @@ class TestAscii(unittest.TestCase): def test_controlnames(self): for name in curses.ascii.controlnames: - self.assertTrue(hasattr(curses.ascii, name), name) + self.assertHasAttr(curses.ascii, name) def test_ctypes(self): def check(func, expected): diff --git a/Lib/test/test_dataclasses/__init__.py b/Lib/test/test_dataclasses/__init__.py index ac78f8327b808e..e98a8f284cec9f 100644 --- a/Lib/test/test_dataclasses/__init__.py +++ b/Lib/test/test_dataclasses/__init__.py @@ -120,7 +120,7 @@ class Some: pass for param in inspect.signature(dataclass).parameters: if param == 'cls': continue - self.assertTrue(hasattr(Some.__dataclass_params__, param), msg=param) + self.assertHasAttr(Some.__dataclass_params__, param) def test_named_init_params(self): @dataclass @@ -671,7 +671,7 @@ class C: self.assertEqual(the_fields[0].name, 'x') self.assertEqual(the_fields[0].type, int) - self.assertFalse(hasattr(C, 'x')) + self.assertNotHasAttr(C, 'x') self.assertTrue (the_fields[0].init) self.assertTrue (the_fields[0].repr) self.assertEqual(the_fields[1].name, 'y') @@ -681,7 +681,7 @@ class C: self.assertTrue (the_fields[1].repr) self.assertEqual(the_fields[2].name, 'z') self.assertEqual(the_fields[2].type, str) - self.assertFalse(hasattr(C, 'z')) + self.assertNotHasAttr(C, 'z') self.assertTrue (the_fields[2].init) self.assertFalse(the_fields[2].repr) @@ -732,8 +732,8 @@ class C: z: object = default t: int = field(default=100) - self.assertFalse(hasattr(C, 'x')) - self.assertFalse(hasattr(C, 'y')) + self.assertNotHasAttr(C, 'x') + self.assertNotHasAttr(C, 'y') self.assertIs (C.z, default) self.assertEqual(C.t, 100) @@ -2912,10 +2912,10 @@ class C: pass c = C() - self.assertFalse(hasattr(c, 'i')) + self.assertNotHasAttr(c, 'i') with self.assertRaises(FrozenInstanceError): c.i = 5 - self.assertFalse(hasattr(c, 'i')) + self.assertNotHasAttr(c, 'i') with self.assertRaises(FrozenInstanceError): del c.i @@ -3144,7 +3144,7 @@ class S(D): del s.y self.assertEqual(s.y, 10) del s.cached - self.assertFalse(hasattr(s, 'cached')) + self.assertNotHasAttr(s, 'cached') with self.assertRaises(AttributeError) as cm: del s.cached self.assertNotIsInstance(cm.exception, FrozenInstanceError) @@ -3158,12 +3158,12 @@ class S(D): pass s = S() - self.assertFalse(hasattr(s, 'x')) + self.assertNotHasAttr(s, 'x') s.x = 5 self.assertEqual(s.x, 5) del s.x - self.assertFalse(hasattr(s, 'x')) + self.assertNotHasAttr(s, 'x') with self.assertRaises(AttributeError) as cm: del s.x self.assertNotIsInstance(cm.exception, FrozenInstanceError) @@ -3393,8 +3393,8 @@ class A: B = dataclass(A, slots=True) self.assertIsNot(A, B) - self.assertFalse(hasattr(A, "__slots__")) - self.assertTrue(hasattr(B, "__slots__")) + self.assertNotHasAttr(A, "__slots__") + self.assertHasAttr(B, "__slots__") # Can't be local to test_frozen_pickle. @dataclass(frozen=True, slots=True) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index 4be7c5649da68a..a10922a403ec38 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -66,7 +66,7 @@ def keys_helper(self, f): return keys def test_error(self): - self.assertTrue(issubclass(self.module.error, OSError)) + self.assertIsSubclass(self.module.error, OSError) def test_anydbm_not_existing(self): self.assertRaises(dbm.error, dbm.open, _fname) diff --git a/Lib/test/test_dbm_sqlite3.py b/Lib/test/test_dbm_sqlite3.py index 2e1f2d32924bad..9216da8a63f957 100644 --- a/Lib/test/test_dbm_sqlite3.py +++ b/Lib/test/test_dbm_sqlite3.py @@ -36,7 +36,7 @@ def test_uri_substitutions(self): ) for path, normalized in dataset: with self.subTest(path=path, normalized=normalized): - self.assertTrue(_normalize_uri(path).endswith(normalized)) + self.assertEndsWith(_normalize_uri(path), normalized) @unittest.skipUnless(sys.platform == "win32", "requires Windows") def test_uri_windows(self): @@ -55,7 +55,7 @@ def test_uri_windows(self): with self.subTest(path=path, normalized=normalized): if not Path(path).is_absolute(): self.skipTest(f"skipping relative path: {path!r}") - self.assertTrue(_normalize_uri(path).endswith(normalized)) + self.assertEndsWith(_normalize_uri(path), normalized) class ReadOnly(_SQLiteDbmTests): diff --git a/Lib/test/test_deque.py b/Lib/test/test_deque.py index 4679f297fd7f4a..4e1a489205a685 100644 --- a/Lib/test/test_deque.py +++ b/Lib/test/test_deque.py @@ -838,7 +838,7 @@ def test_copy_pickle(self): self.assertEqual(list(d), list(e)) self.assertEqual(e.x, d.x) self.assertEqual(e.z, d.z) - self.assertFalse(hasattr(e, 'y')) + self.assertNotHasAttr(e, 'y') def test_pickle_recursive(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 14026531e22333..d635855f537af0 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -409,7 +409,7 @@ class ClassPropertiesAndMethods(unittest.TestCase): def test_python_dicts(self): # Testing Python subclass of dict... - self.assertTrue(issubclass(dict, dict)) + self.assertIsSubclass(dict, dict) self.assertIsInstance({}, dict) d = dict() self.assertEqual(d, {}) @@ -433,7 +433,7 @@ def setstate(self, state): self.state = state def getstate(self): return self.state - self.assertTrue(issubclass(C, dict)) + self.assertIsSubclass(C, dict) a1 = C(12) self.assertEqual(a1.state, 12) a2 = C(foo=1, bar=2) @@ -1048,15 +1048,15 @@ class SubType(types.ModuleType): m = types.ModuleType("m") self.assertTrue(m.__class__ is types.ModuleType) - self.assertFalse(hasattr(m, "a")) + self.assertNotHasAttr(m, "a") m.__class__ = SubType self.assertTrue(m.__class__ is SubType) - self.assertTrue(hasattr(m, "a")) + self.assertHasAttr(m, "a") m.__class__ = types.ModuleType self.assertTrue(m.__class__ is types.ModuleType) - self.assertFalse(hasattr(m, "a")) + self.assertNotHasAttr(m, "a") # Make sure that builtin immutable objects don't support __class__ # assignment, because the object instances may be interned. @@ -1780,7 +1780,7 @@ class D(C): class E: # *not* subclassing from C foo = C.foo self.assertEqual(E().foo.__func__, C.foo) # i.e., unbound - self.assertTrue(repr(C.foo.__get__(C())).startswith("", report) else: self.assertIn("test message", report) - self.assertTrue(report.endswith("\n")) + self.assertEndsWith(report, "\n") @cpython_only # Python built with Py_TRACE_REFS fail with a fatal error in diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index b340ef7ed1621c..6524baabe7f96f 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -245,7 +245,7 @@ def test_detached_stdin_binary_mode(self): orig_stdin = sys.stdin try: sys.stdin = BytesIO(b'spam, bacon, sausage, and spam') - self.assertFalse(hasattr(sys.stdin, 'buffer')) + self.assertNotHasAttr(sys.stdin, 'buffer') fi = FileInput(files=['-'], mode='rb') lines = list(fi) self.assertEqual(lines, [b'spam, bacon, sausage, and spam']) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 95c98c6ac63f71..b4cbfb6d774080 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -914,7 +914,7 @@ def __del__(self): gc.collect() self.assertEqual(len(Lazarus.resurrected_instances), 1) instance = Lazarus.resurrected_instances.pop() - self.assertTrue(hasattr(instance, "cargo")) + self.assertHasAttr(instance, "cargo") self.assertEqual(id(instance.cargo), cargo_id) gc.collect() diff --git a/Lib/test/test_genericalias.py b/Lib/test/test_genericalias.py index ea0dc241e39475..7601cb00ff6685 100644 --- a/Lib/test/test_genericalias.py +++ b/Lib/test/test_genericalias.py @@ -236,13 +236,13 @@ class MyGeneric: self.assertEqual(repr(x2), 'tuple[*tuple[int, str]]') x3 = tuple[*tuple[int, ...]] self.assertEqual(repr(x3), 'tuple[*tuple[int, ...]]') - self.assertTrue(repr(MyList[int]).endswith('.BaseTest.test_repr..MyList[int]')) + self.assertEndsWith(repr(MyList[int]), '.BaseTest.test_repr..MyList[int]') self.assertEqual(repr(list[str]()), '[]') # instances should keep their normal repr # gh-105488 - self.assertTrue(repr(MyGeneric[int]).endswith('MyGeneric[int]')) - self.assertTrue(repr(MyGeneric[[]]).endswith('MyGeneric[[]]')) - self.assertTrue(repr(MyGeneric[[int, str]]).endswith('MyGeneric[[int, str]]')) + self.assertEndsWith(repr(MyGeneric[int]), 'MyGeneric[int]') + self.assertEndsWith(repr(MyGeneric[[]]), 'MyGeneric[[]]') + self.assertEndsWith(repr(MyGeneric[[int, str]]), 'MyGeneric[[int, str]]') def test_exposed_type(self): import types @@ -362,7 +362,7 @@ def test_isinstance(self): def test_issubclass(self): class L(list): ... - self.assertTrue(issubclass(L, list)) + self.assertIsSubclass(L, list) with self.assertRaises(TypeError): issubclass(L, list[str]) diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py index 6c3abe602f557c..df07af01fc7540 100644 --- a/Lib/test/test_genericpath.py +++ b/Lib/test/test_genericpath.py @@ -92,8 +92,8 @@ def test_commonprefix(self): for s1 in testlist: for s2 in testlist: p = commonprefix([s1, s2]) - self.assertTrue(s1.startswith(p)) - self.assertTrue(s2.startswith(p)) + self.assertStartsWith(s1, p) + self.assertStartsWith(s2, p) if s1 != s2: n = len(p) self.assertNotEqual(s1[n:n+1], s2[n:n+1]) diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index fa5de7c190e6a3..ccbacc7c19b6e6 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -331,13 +331,13 @@ def test_mode(self): def test_1647484(self): for mode in ('wb', 'rb'): with gzip.GzipFile(self.filename, mode) as f: - self.assertTrue(hasattr(f, "name")) + self.assertHasAttr(f, "name") self.assertEqual(f.name, self.filename) def test_paddedfile_getattr(self): self.test_write() with gzip.GzipFile(self.filename, 'rb') as f: - self.assertTrue(hasattr(f.fileobj, "name")) + self.assertHasAttr(f.fileobj, "name") self.assertEqual(f.fileobj.name, self.filename) def test_mtime(self): @@ -345,7 +345,7 @@ def test_mtime(self): with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite: fWrite.write(data1) with gzip.GzipFile(self.filename) as fRead: - self.assertTrue(hasattr(fRead, 'mtime')) + self.assertHasAttr(fRead, 'mtime') self.assertIsNone(fRead.mtime) dataRead = fRead.read() self.assertEqual(dataRead, data1) @@ -460,7 +460,7 @@ def test_zero_padded_file(self): self.assertEqual(d, data1 * 50, "Incorrect data in file") def test_gzip_BadGzipFile_exception(self): - self.assertTrue(issubclass(gzip.BadGzipFile, OSError)) + self.assertIsSubclass(gzip.BadGzipFile, OSError) def test_bad_gzip_file(self): with open(self.filename, 'wb') as file: diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 5e3356a02f31b6..de4c8a1670f591 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -152,8 +152,8 @@ def _test_algorithm_via_hashlib_new(data=None, _alg=algorithm, **kwargs): if _hashlib: # These algorithms should always be present when this module # is compiled. If not, something was compiled wrong. - self.assertTrue(hasattr(_hashlib, 'openssl_md5')) - self.assertTrue(hasattr(_hashlib, 'openssl_sha1')) + self.assertHasAttr(_hashlib, 'openssl_md5') + self.assertHasAttr(_hashlib, 'openssl_sha1') for algorithm, constructors in self.constructors_to_test.items(): constructor = getattr(_hashlib, 'openssl_'+algorithm, None) if constructor: diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index 1c325c0d5075a2..e584fb417b9d54 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -786,12 +786,12 @@ def test_getfile(self): def test_getfile_builtin_module(self): with self.assertRaises(TypeError) as e: inspect.getfile(sys) - self.assertTrue(str(e.exception).startswith(':5: EncodingWarning: ")) - self.assertTrue( - warnings[1].startswith(b":8: EncodingWarning: ")) + self.assertStartsWith(warnings[0], b":5: EncodingWarning: ") + self.assertStartsWith(warnings[1], b":8: EncodingWarning: ") def test_text_encoding(self): # PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8" @@ -4834,7 +4832,7 @@ def on_alarm(*args): os.read(r, len(data) * 100) exc = cm.exception if isinstance(exc, RuntimeError): - self.assertTrue(str(exc).startswith("reentrant call"), str(exc)) + self.assertStartsWith(str(exc), "reentrant call") finally: signal.alarm(0) wio.close() diff --git a/Lib/test/test_json/test_fail.py b/Lib/test/test_json/test_fail.py index 7c1696cc66d12b..79c44af2fbf0e1 100644 --- a/Lib/test/test_json/test_fail.py +++ b/Lib/test/test_json/test_fail.py @@ -102,7 +102,7 @@ def test_not_serializable(self): with self.assertRaisesRegex(TypeError, 'Object of type module is not JSON serializable') as cm: self.dumps(sys) - self.assertFalse(hasattr(cm.exception, '__notes__')) + self.assertNotHasAttr(cm.exception, '__notes__') with self.assertRaises(TypeError) as cm: self.dumps([1, [2, 3, sys]]) diff --git a/Lib/test/test_json/test_tool.py b/Lib/test/test_json/test_tool.py index 72cde3f0d6c1bd..9ea2679c77ec17 100644 --- a/Lib/test/test_json/test_tool.py +++ b/Lib/test/test_json/test_tool.py @@ -160,7 +160,7 @@ def test_help_flag(self): rc, out, err = assert_python_ok('-m', self.module, '-h', PYTHON_COLORS='0') self.assertEqual(rc, 0) - self.assertTrue(out.startswith(b'usage: ')) + self.assertStartsWith(out, b'usage: ') self.assertEqual(err, b'') def test_sort_keys_flag(self): diff --git a/Lib/test/test_launcher.py b/Lib/test/test_launcher.py index 173fc743cf68ae..caa1603c78eb01 100644 --- a/Lib/test/test_launcher.py +++ b/Lib/test/test_launcher.py @@ -443,7 +443,7 @@ def test_search_major_3(self): except subprocess.CalledProcessError: raise unittest.SkipTest("requires at least one Python 3.x install") self.assertEqual("PythonCore", data["env.company"]) - self.assertTrue(data["env.tag"].startswith("3."), data["env.tag"]) + self.assertStartsWith(data["env.tag"], "3.") def test_search_major_3_32(self): try: @@ -453,8 +453,8 @@ def test_search_major_3_32(self): raise unittest.SkipTest("requires at least one 32-bit Python 3.x install") raise self.assertEqual("PythonCore", data["env.company"]) - self.assertTrue(data["env.tag"].startswith("3."), data["env.tag"]) - self.assertTrue(data["env.tag"].endswith("-32"), data["env.tag"]) + self.assertStartsWith(data["env.tag"], "3.") + self.assertEndsWith(data["env.tag"], "-32") def test_search_major_2(self): try: @@ -463,7 +463,7 @@ def test_search_major_2(self): if not is_installed("2.7"): raise unittest.SkipTest("requires at least one Python 2.x install") self.assertEqual("PythonCore", data["env.company"]) - self.assertTrue(data["env.tag"].startswith("2."), data["env.tag"]) + self.assertStartsWith(data["env.tag"], "2.") def test_py_default(self): with self.py_ini(TEST_PY_DEFAULTS): diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index 9ffb93e797dd80..e93c3c37354e27 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -1025,12 +1025,12 @@ def test_peek(self): with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: result = f.peek() self.assertGreater(len(result), 0) - self.assertTrue(INPUT.startswith(result)) + self.assertStartsWith(INPUT, result) self.assertEqual(f.read(), INPUT) with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: result = f.peek(10) self.assertGreater(len(result), 0) - self.assertTrue(INPUT.startswith(result)) + self.assertStartsWith(INPUT, result) self.assertEqual(f.read(), INPUT) def test_peek_bad_args(self): diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 95629ed862d6eb..63998a86c45b53 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -265,8 +265,8 @@ def test_iterator(self): memio = self.ioclass(buf * 10) self.assertEqual(iter(memio), memio) - self.assertTrue(hasattr(memio, '__iter__')) - self.assertTrue(hasattr(memio, '__next__')) + self.assertHasAttr(memio, '__iter__') + self.assertHasAttr(memio, '__next__') i = 0 for line in memio: self.assertEqual(line, buf) diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py index 9f131a9110dccb..4204a6a47d2a81 100644 --- a/Lib/test/test_ordered_dict.py +++ b/Lib/test/test_ordered_dict.py @@ -147,7 +147,7 @@ def test_fromkeys(self): def test_abc(self): OrderedDict = self.OrderedDict self.assertIsInstance(OrderedDict(), MutableMapping) - self.assertTrue(issubclass(OrderedDict, MutableMapping)) + self.assertIsSubclass(OrderedDict, MutableMapping) def test_clear(self): OrderedDict = self.OrderedDict @@ -314,14 +314,14 @@ def check(dup): check(dup) self.assertIs(dup.x, od.x) self.assertIs(dup.z, od.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') dup = copy.deepcopy(od) check(dup) self.assertEqual(dup.x, od.x) self.assertIsNot(dup.x, od.x) self.assertEqual(dup.z, od.z) self.assertIsNot(dup.z, od.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') # pickle directly pulls the module, so we have to fake it with replaced_module('collections', self.module): for proto in range(pickle.HIGHEST_PROTOCOL + 1): @@ -330,7 +330,7 @@ def check(dup): check(dup) self.assertEqual(dup.x, od.x) self.assertEqual(dup.z, od.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') check(eval(repr(od))) update_test = OrderedDict() update_test.update(od) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 333179a71e3cdc..88b5b0e6e358bb 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -818,7 +818,7 @@ def test_15261(self): self.assertEqual(ctx.exception.errno, errno.EBADF) def check_file_attributes(self, result): - self.assertTrue(hasattr(result, 'st_file_attributes')) + self.assertHasAttr(result, 'st_file_attributes') self.assertTrue(isinstance(result.st_file_attributes, int)) self.assertTrue(0 <= result.st_file_attributes <= 0xFFFFFFFF) @@ -2181,7 +2181,7 @@ def test_getrandom0(self): self.assertEqual(empty, b'') def test_getrandom_random(self): - self.assertTrue(hasattr(os, 'GRND_RANDOM')) + self.assertHasAttr(os, 'GRND_RANDOM') # Don't test os.getrandom(1, os.GRND_RANDOM) to not consume the rare # resource /dev/random @@ -5431,8 +5431,8 @@ def test_fsencode_fsdecode(self): def test_pathlike(self): self.assertEqual('#feelthegil', self.fspath(FakePath('#feelthegil'))) - self.assertTrue(issubclass(FakePath, os.PathLike)) - self.assertTrue(isinstance(FakePath('x'), os.PathLike)) + self.assertIsSubclass(FakePath, os.PathLike) + self.assertIsInstance(FakePath('x'), os.PathLike) def test_garbage_in_exception_out(self): vapor = type('blah', (), {}) @@ -5458,8 +5458,8 @@ def test_pathlike_subclasshook(self): # true on abstract implementation. class A(os.PathLike): pass - self.assertFalse(issubclass(FakePath, A)) - self.assertTrue(issubclass(FakePath, os.PathLike)) + self.assertNotIsSubclass(FakePath, A) + self.assertIsSubclass(FakePath, os.PathLike) def test_pathlike_class_getitem(self): self.assertIsInstance(os.PathLike[bytes], types.GenericAlias) @@ -5469,7 +5469,7 @@ class A(os.PathLike): __slots__ = () def __fspath__(self): return '' - self.assertFalse(hasattr(A(), '__dict__')) + self.assertNotHasAttr(A(), '__dict__') def test_fspath_set_to_None(self): class Foo: diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 37ef9fa1946376..13356b4cfe082b 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -77,8 +77,8 @@ def needs_symlinks(fn): class UnsupportedOperationTest(unittest.TestCase): def test_is_notimplemented(self): - self.assertTrue(issubclass(pathlib.UnsupportedOperation, NotImplementedError)) - self.assertTrue(isinstance(pathlib.UnsupportedOperation(), NotImplementedError)) + self.assertIsSubclass(pathlib.UnsupportedOperation, NotImplementedError) + self.assertIsInstance(pathlib.UnsupportedOperation(), NotImplementedError) class LazyImportTest(unittest.TestCase): @@ -300,8 +300,8 @@ def test_repr_common(self): clsname = p.__class__.__name__ r = repr(p) # The repr() is in the form ClassName("forward-slashes path"). - self.assertTrue(r.startswith(clsname + '('), r) - self.assertTrue(r.endswith(')'), r) + self.assertStartsWith(r, clsname + '(') + self.assertEndsWith(r, ')') inner = r[len(clsname) + 1 : -1] self.assertEqual(eval(inner), p.as_posix()) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 47f51f1979faab..0a9ba578673b39 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -316,7 +316,7 @@ def negzero(): return -(1.0-1.0) for instr in dis.get_instructions(negzero): - self.assertFalse(instr.opname.startswith('UNARY_')) + self.assertNotStartsWith(instr.opname, 'UNARY_') self.check_lnotab(negzero) def test_constant_folding_binop(self): diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py index 1095e7303c188f..aa01a9b8f7ed87 100644 --- a/Lib/test/test_peg_generator/test_c_parser.py +++ b/Lib/test/test_peg_generator/test_c_parser.py @@ -387,10 +387,10 @@ def test_with_stmt_with_paren(self) -> None: test_source = """ stmt = "with (\\n a as b,\\n c as d\\n): pass" the_ast = parse.parse_string(stmt, mode=1) - self.assertTrue(ast_dump(the_ast).startswith( + self.assertStartsWith(ast_dump(the_ast), "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), " "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]" - )) + ) """ self.run_test(grammar_source, test_source) diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index d8606521345390..d912c55812397d 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -91,10 +91,8 @@ def test_gather(self) -> None: """ rules = parse_string(grammar, GrammarParser).rules self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE") - self.assertTrue( - repr(rules["start"]).startswith( - "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" - ) + self.assertStartsWith(repr(rules["start"]), + "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" ) self.assertEqual(str(rules["thing"]), "thing: NUMBER") parser_class = make_parser(grammar) diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index c176e505155b90..21d097dbb559ec 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -93,9 +93,7 @@ def baz(): perf_line, f"Could not find {expected_symbol} in perf file" ) perf_addr = perf_line.split(" ")[0] - self.assertFalse( - perf_addr.startswith("0x"), "Address should not be prefixed with 0x" - ) + self.assertNotStartsWith(perf_addr, "0x") self.assertTrue( set(perf_addr).issubset(string.hexdigits), "Address should contain only hex characters", diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 742ca8de1bea8c..e2384b33345a45 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -611,10 +611,10 @@ def test_name_mapping(self): with self.subTest(((module3, name3), (module2, name2))): if (module2, name2) == ('exceptions', 'OSError'): attr = getattribute(module3, name3) - self.assertTrue(issubclass(attr, OSError)) + self.assertIsSubclass(attr, OSError) elif (module2, name2) == ('exceptions', 'ImportError'): attr = getattribute(module3, name3) - self.assertTrue(issubclass(attr, ImportError)) + self.assertIsSubclass(attr, ImportError) else: module, name = mapping(module2, name2) if module3[:1] != '_': diff --git a/Lib/test/test_platform.py b/Lib/test/test_platform.py index 3b673a47c8c137..3688cc4267b6b2 100644 --- a/Lib/test/test_platform.py +++ b/Lib/test/test_platform.py @@ -401,7 +401,7 @@ def test_win32_ver(self): for v in version.split('.'): int(v) # should not fail if csd: - self.assertTrue(csd.startswith('SP'), msg=csd) + self.assertStartsWith(csd, 'SP') if ptype: if os.cpu_count() > 1: self.assertIn('Multiprocessor', ptype) diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py index 0817d0a87a38b1..628920e34b586f 100644 --- a/Lib/test/test_posix.py +++ b/Lib/test/test_posix.py @@ -1107,7 +1107,7 @@ def test_lchmod_dir_symlink(self): def _test_chflags_regular_file(self, chflags_func, target_file, **kwargs): st = os.stat(target_file) - self.assertTrue(hasattr(st, 'st_flags')) + self.assertHasAttr(st, 'st_flags') # ZFS returns EOPNOTSUPP when attempting to set flag UF_IMMUTABLE. flags = st.st_flags | stat.UF_IMMUTABLE @@ -1143,7 +1143,7 @@ def test_lchflags_regular_file(self): def test_lchflags_symlink(self): testfn_st = os.stat(os_helper.TESTFN) - self.assertTrue(hasattr(testfn_st, 'st_flags')) + self.assertHasAttr(testfn_st, 'st_flags') self.addCleanup(os_helper.unlink, _DUMMY_SYMLINK) os.symlink(os_helper.TESTFN, _DUMMY_SYMLINK) @@ -2218,12 +2218,12 @@ def _verify_available(self, name): def test_pwritev(self): self._verify_available("HAVE_PWRITEV") if self.mac_ver >= (10, 16): - self.assertTrue(hasattr(os, "pwritev"), "os.pwritev is not available") - self.assertTrue(hasattr(os, "preadv"), "os.readv is not available") + self.assertHasAttr(os, "pwritev") + self.assertHasAttr(os, "preadv") else: - self.assertFalse(hasattr(os, "pwritev"), "os.pwritev is available") - self.assertFalse(hasattr(os, "preadv"), "os.readv is available") + self.assertNotHasAttr(os, "pwritev") + self.assertNotHasAttr(os, "preadv") def test_stat(self): self._verify_available("HAVE_FSTATAT") diff --git a/Lib/test/test_property.py b/Lib/test/test_property.py index cea241b0f200d0..26aefdbf0421dd 100644 --- a/Lib/test/test_property.py +++ b/Lib/test/test_property.py @@ -87,8 +87,8 @@ def test_property_decorator_baseclass(self): self.assertEqual(base.spam, 10) self.assertEqual(base._spam, 10) delattr(base, "spam") - self.assertTrue(not hasattr(base, "spam")) - self.assertTrue(not hasattr(base, "_spam")) + self.assertNotHasAttr(base, "spam") + self.assertNotHasAttr(base, "_spam") base.spam = 20 self.assertEqual(base.spam, 20) self.assertEqual(base._spam, 20) diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index 6dc51e4371d0f6..3c8ed251acaa4d 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -46,7 +46,7 @@ def test_parse_semantics(self): items = pulldom.parseString(SMALL_SAMPLE) evt, node = next(items) # Just check the node is a Document: - self.assertTrue(hasattr(node, "createElement")) + self.assertHasAttr(node, "createElement") self.assertEqual(pulldom.START_DOCUMENT, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) @@ -192,7 +192,7 @@ def _test_thorough(self, pd, before_root=True): evt, node = next(pd) self.assertEqual(pulldom.START_DOCUMENT, evt) # Just check the node is a Document: - self.assertTrue(hasattr(node, "createElement")) + self.assertHasAttr(node, "createElement") if before_root: evt, node = next(pd) diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py index df05cd07d7e249..3e7b2cd0dc9912 100644 --- a/Lib/test/test_pyclbr.py +++ b/Lib/test/test_pyclbr.py @@ -103,7 +103,7 @@ def ismethod(oclass, obj, name): for name, value in dict.items(): if name in ignore: continue - self.assertHasAttr(module, name, ignore) + self.assertHasAttr(module, name) py_item = getattr(module, name) if isinstance(value, pyclbr.Function): self.assertIsInstance(py_item, (FunctionType, BuiltinFunctionType)) diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index ac88b3c6f13d5e..281b24eaa36b80 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -1380,7 +1380,7 @@ def test_modules_search_builtin(self): helper('modules garbage') result = help_io.getvalue() - self.assertTrue(result.startswith(expected)) + self.assertStartsWith(result, expected) def test_importfile(self): try: diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 43957f525f10c0..bd76d636e4f0fc 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -1415,27 +1415,27 @@ class CommandLineTest(unittest.TestCase): def test_parse_args(self): args, help_text = random._parse_args(shlex.split("--choice a b c")) self.assertEqual(args.choice, ["a", "b", "c"]) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("--integer 5")) self.assertEqual(args.integer, 5) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("--float 2.5")) self.assertEqual(args.float, 2.5) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("a b c")) self.assertEqual(args.input, ["a", "b", "c"]) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("5")) self.assertEqual(args.input, ["5"]) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("2.5")) self.assertEqual(args.input, ["2.5"]) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") def test_main(self): for command, expected in [ diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index f79a6149078996..e9128ac1d9762d 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2868,11 +2868,11 @@ def test_long_pattern(self): pattern = 'Very %spattern' % ('long ' * 1000) r = repr(re.compile(pattern)) self.assertLess(len(r), 300) - self.assertEqual(r[:30], "re.compile('Very long long lon") + self.assertStartsWith(r, "re.compile('Very long long lon") r = repr(re.compile(pattern, re.I)) self.assertLess(len(r), 300) - self.assertEqual(r[:30], "re.compile('Very long long lon") - self.assertEqual(r[-16:], ", re.IGNORECASE)") + self.assertStartsWith(r, "re.compile('Very long long lon") + self.assertEndsWith(r, ", re.IGNORECASE)") def test_flags_repr(self): self.assertEqual(repr(re.I), "re.IGNORECASE") @@ -2951,7 +2951,7 @@ def test_deprecated_modules(self): self.assertEqual(mod.__name__, name) self.assertEqual(mod.__package__, '') for attr in deprecated[name]: - self.assertTrue(hasattr(mod, attr)) + self.assertHasAttr(mod, attr) del sys.modules[name] @cpython_only diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index ffad35092f9916..16623654c29b28 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -173,13 +173,13 @@ def test_instance(self): eq(r(i3), (""%id(i3))) s = r(ClassWithFailingRepr) - self.assertTrue(s.startswith("")) + self.assertStartsWith(s, "") self.assertIn(s.find("..."), [12, 13]) def test_lambda(self): r = repr(lambda x: x) - self.assertTrue(r.startswith("..') # Methods - self.assertTrue(repr(''.split).startswith( - '", "exec") expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \ "ordinal not in range(128)" - self.assertTrue(c.exception.args[0].startswith(expected), - msg=c.exception.args[0]) + self.assertStartsWith(c.exception.args[0], expected) def test_file_parse_error_multiline(self): # gh96611: diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index 06460d6047cac8..2767a53d53c1fc 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -539,9 +539,9 @@ def test_openssl_version(self): openssl_ver = f"OpenSSL {major:d}.{minor:d}.{patch:d}" else: openssl_ver = f"OpenSSL {major:d}.{minor:d}.{fix:d}" - self.assertTrue( - s.startswith((openssl_ver, libressl_ver, "AWS-LC")), - (s, t, hex(n)) + self.assertStartsWith( + s, (openssl_ver, libressl_ver, "AWS-LC"), + (t, hex(n)) ) @support.cpython_only @@ -1668,7 +1668,7 @@ def test_lib_reason(self): regex = "(NO_START_LINE|UNSUPPORTED_PUBLIC_KEY_TYPE)" self.assertRegex(cm.exception.reason, regex) s = str(cm.exception) - self.assertTrue("NO_START_LINE" in s, s) + self.assertIn("NO_START_LINE", s) def test_subclass(self): # Check that the appropriate SSLError subclass is raised @@ -1683,7 +1683,7 @@ def test_subclass(self): with self.assertRaises(ssl.SSLWantReadError) as cm: c.do_handshake() s = str(cm.exception) - self.assertTrue(s.startswith("The operation did not complete (read)"), s) + self.assertStartsWith(s, "The operation did not complete (read)") # For compatibility self.assertEqual(cm.exception.errno, ssl.SSL_ERROR_WANT_READ) diff --git a/Lib/test/test_stat.py b/Lib/test/test_stat.py index 49013a4bcd8af6..5fd25d5012c425 100644 --- a/Lib/test/test_stat.py +++ b/Lib/test/test_stat.py @@ -157,7 +157,7 @@ def test_mode(self): os.chmod(TESTFN, 0o700) st_mode, modestr = self.get_mode() - self.assertEqual(modestr[:3], '-rw') + self.assertStartsWith(modestr, '-rw') self.assertS_IS("REG", st_mode) self.assertEqual(self.statmod.S_IFMT(st_mode), self.statmod.S_IFREG) @@ -256,7 +256,7 @@ def test_flags_consistent(self): "FILE_ATTRIBUTE_* constants are Win32 specific") def test_file_attribute_constants(self): for key, value in sorted(self.file_attributes.items()): - self.assertTrue(hasattr(self.statmod, key), key) + self.assertHasAttr(self.statmod, key) modvalue = getattr(self.statmod, key) self.assertEqual(value, modvalue, key) @@ -314,7 +314,7 @@ def test_macosx_attribute_values(self): self.assertEqual(self.statmod.S_ISGID, 0o002000) self.assertEqual(self.statmod.S_ISVTX, 0o001000) - self.assertFalse(hasattr(self.statmod, "S_ISTXT")) + self.assertNotHasAttr(self.statmod, "S_ISTXT") self.assertEqual(self.statmod.S_IREAD, self.statmod.S_IRUSR) self.assertEqual(self.statmod.S_IWRITE, self.statmod.S_IWUSR) self.assertEqual(self.statmod.S_IEXEC, self.statmod.S_IXUSR) diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index c69baa4bf4d1b1..5980f939185965 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -645,7 +645,7 @@ def do_test(self, args): def test_numerictestcase_is_testcase(self): # Ensure that NumericTestCase actually is a TestCase. - self.assertTrue(issubclass(NumericTestCase, unittest.TestCase)) + self.assertIsSubclass(NumericTestCase, unittest.TestCase) def test_error_msg_numeric(self): # Test the error message generated for numeric comparisons. @@ -683,32 +683,23 @@ class GlobalsTest(unittest.TestCase): def test_meta(self): # Test for the existence of metadata. for meta in self.expected_metadata: - self.assertTrue(hasattr(self.module, meta), - "%s not present" % meta) + self.assertHasAttr(self.module, meta) def test_check_all(self): # Check everything in __all__ exists and is public. module = self.module for name in module.__all__: # No private names in __all__: - self.assertFalse(name.startswith("_"), + self.assertNotStartsWith(name, "_", 'private name "%s" in __all__' % name) # And anything in __all__ must exist: - self.assertTrue(hasattr(module, name), - 'missing name "%s" in __all__' % name) + self.assertHasAttr(module, name) class StatisticsErrorTest(unittest.TestCase): def test_has_exception(self): - errmsg = ( - "Expected StatisticsError to be a ValueError, but got a" - " subclass of %r instead." - ) - self.assertTrue(hasattr(statistics, 'StatisticsError')) - self.assertTrue( - issubclass(statistics.StatisticsError, ValueError), - errmsg % statistics.StatisticsError.__base__ - ) + self.assertHasAttr(statistics, 'StatisticsError') + self.assertIsSubclass(statistics.StatisticsError, ValueError) # === Tests for private utility functions === diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index d0bc0bd7b61520..9622151143cd78 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -42,7 +42,7 @@ def test_repr(self): # os.stat() gives a complicated struct sequence. st = os.stat(__file__) rep = repr(st) - self.assertTrue(rep.startswith("os.stat_result")) + self.assertStartsWith(rep, "os.stat_result") self.assertIn("st_mode=", rep) self.assertIn("st_ino=", rep) self.assertIn("st_dev=", rep) @@ -307,7 +307,7 @@ def test_copy_replace_with_invisible_fields(self): self.assertEqual(t5.tm_mon, 2) # named invisible fields - self.assertTrue(hasattr(t, 'tm_zone'), f"{t} has no attribute 'tm_zone'") + self.assertHasAttr(t, 'tm_zone') with self.assertRaisesRegex(AttributeError, 'readonly attribute'): t.tm_zone = 'some other zone' self.assertEqual(t2.tm_zone, t.tm_zone) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index ca35804fb36076..f0e350c71f60ea 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1178,7 +1178,7 @@ def test_universal_newlines_communicate_stdin_stdout_stderr(self): self.assertEqual("line1\nline2\nline3\nline4\nline5\n", stdout) # Python debug build push something like "[42442 refs]\n" # to stderr at exit of subprocess. - self.assertTrue(stderr.startswith("eline2\neline6\neline7\n")) + self.assertStartsWith(stderr, "eline2\neline6\neline7\n") def test_universal_newlines_communicate_encodings(self): # Check that universal newlines mode works for various encodings, @@ -1510,7 +1510,7 @@ def test_issue8780(self): "[sys.executable, '-c', 'print(\"Hello World!\")'])", 'assert retcode == 0')) output = subprocess.check_output([sys.executable, '-c', code]) - self.assertTrue(output.startswith(b'Hello World!'), ascii(output)) + self.assertStartsWith(output, b'Hello World!') def test_handles_closed_on_exception(self): # If CreateProcess exits with an error, ensure the @@ -1835,8 +1835,8 @@ def test_encoding_warning(self): capture_output=True) lines = cp.stderr.splitlines() self.assertEqual(len(lines), 2, lines) - self.assertTrue(lines[0].startswith(b":2: EncodingWarning: ")) - self.assertTrue(lines[1].startswith(b":3: EncodingWarning: ")) + self.assertStartsWith(lines[0], b":2: EncodingWarning: ") + self.assertStartsWith(lines[1], b":3: EncodingWarning: ") def _get_test_grp_name(): diff --git a/Lib/test/test_super.py b/Lib/test/test_super.py index 5cef612a340be9..193c8b7d7f3e13 100644 --- a/Lib/test/test_super.py +++ b/Lib/test/test_super.py @@ -547,11 +547,11 @@ def test_special_methods(self): self.assertEqual(s.__reduce__, e.__reduce__) self.assertEqual(s.__reduce_ex__, e.__reduce_ex__) self.assertEqual(s.__getstate__, e.__getstate__) - self.assertFalse(hasattr(s, '__getnewargs__')) - self.assertFalse(hasattr(s, '__getnewargs_ex__')) - self.assertFalse(hasattr(s, '__setstate__')) - self.assertFalse(hasattr(s, '__copy__')) - self.assertFalse(hasattr(s, '__deepcopy__')) + self.assertNotHasAttr(s, '__getnewargs__') + self.assertNotHasAttr(s, '__getnewargs_ex__') + self.assertNotHasAttr(s, '__setstate__') + self.assertNotHasAttr(s, '__copy__') + self.assertNotHasAttr(s, '__deepcopy__') def test_pickling(self): e = E() diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py index 8446da03e3645b..e48a2464ee5977 100644 --- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -407,10 +407,10 @@ class Obj: with support.swap_attr(obj, "y", 5) as y: self.assertEqual(obj.y, 5) self.assertIsNone(y) - self.assertFalse(hasattr(obj, 'y')) + self.assertNotHasAttr(obj, 'y') with support.swap_attr(obj, "y", 5): del obj.y - self.assertFalse(hasattr(obj, 'y')) + self.assertNotHasAttr(obj, 'y') def test_swap_item(self): D = {"x":1} diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index fb1c8492a64d38..795d1ecbb59f8f 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -57,7 +57,7 @@ def test_original_displayhook(self): dh(None) self.assertEqual(out.getvalue(), "") - self.assertTrue(not hasattr(builtins, "_")) + self.assertNotHasAttr(builtins, "_") # sys.displayhook() requires arguments self.assertRaises(TypeError, dh) @@ -172,7 +172,7 @@ def test_original_excepthook(self): with support.captured_stderr() as err: sys.__excepthook__(*sys.exc_info()) - self.assertTrue(err.getvalue().endswith("ValueError: 42\n")) + self.assertEndsWith(err.getvalue(), "ValueError: 42\n") self.assertRaises(TypeError, sys.__excepthook__) @@ -192,7 +192,7 @@ def test_excepthook_bytes_filename(self): err = err.getvalue() self.assertIn(""" File "b'bytes_filename'", line 123\n""", err) self.assertIn(""" text\n""", err) - self.assertTrue(err.endswith("SyntaxError: msg\n")) + self.assertEndsWith(err, "SyntaxError: msg\n") def test_excepthook(self): with test.support.captured_output("stderr") as stderr: @@ -269,8 +269,7 @@ def check_exit_message(code, expected, **env_vars): rc, out, err = assert_python_failure('-c', code, **env_vars) self.assertEqual(rc, 1) self.assertEqual(out, b'') - self.assertTrue(err.startswith(expected), - "%s doesn't start with %s" % (ascii(err), ascii(expected))) + self.assertStartsWith(err, expected) # test that stderr buffer is flushed before the exit message is written # into stderr @@ -437,7 +436,7 @@ def test_call_tracing(self): @unittest.skipUnless(hasattr(sys, "setdlopenflags"), 'test needs sys.setdlopenflags()') def test_dlopenflags(self): - self.assertTrue(hasattr(sys, "getdlopenflags")) + self.assertHasAttr(sys, "getdlopenflags") self.assertRaises(TypeError, sys.getdlopenflags, 42) oldflags = sys.getdlopenflags() self.assertRaises(TypeError, sys.setdlopenflags) @@ -623,8 +622,7 @@ def g456(): # And the next record must be for g456(). filename, lineno, funcname, sourceline = stack[i+1] self.assertEqual(funcname, "g456") - self.assertTrue((sourceline.startswith("if leave_g.wait(") or - sourceline.startswith("g_raised.set()"))) + self.assertStartsWith(sourceline, ("if leave_g.wait(", "g_raised.set()")) finally: # Reap the spawned thread. leave_g.set() @@ -860,7 +858,7 @@ def test_sys_flags(self): "hash_randomization", "isolated", "dev_mode", "utf8_mode", "warn_default_encoding", "safe_path", "int_max_str_digits") for attr in attrs: - self.assertTrue(hasattr(sys.flags, attr), attr) + self.assertHasAttr(sys.flags, attr) attr_type = bool if attr in ("dev_mode", "safe_path") else int self.assertEqual(type(getattr(sys.flags, attr)), attr_type, attr) self.assertTrue(repr(sys.flags)) @@ -1072,10 +1070,10 @@ def test_implementation(self): levels = {'alpha': 0xA, 'beta': 0xB, 'candidate': 0xC, 'final': 0xF} - self.assertTrue(hasattr(sys.implementation, 'name')) - self.assertTrue(hasattr(sys.implementation, 'version')) - self.assertTrue(hasattr(sys.implementation, 'hexversion')) - self.assertTrue(hasattr(sys.implementation, 'cache_tag')) + self.assertHasAttr(sys.implementation, 'name') + self.assertHasAttr(sys.implementation, 'version') + self.assertHasAttr(sys.implementation, 'hexversion') + self.assertHasAttr(sys.implementation, 'cache_tag') version = sys.implementation.version self.assertEqual(version[:2], (version.major, version.minor)) @@ -1419,7 +1417,7 @@ def __del__(self): else: self.assertIn("ValueError", report) self.assertIn("del is broken", report) - self.assertTrue(report.endswith("\n")) + self.assertEndsWith(report, "\n") def test_original_unraisablehook_exception_qualname(self): # See bpo-41031, bpo-45083. diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index 963cf753ce6178..d30f69ded6643a 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -186,7 +186,7 @@ def test_posix_venv_scheme(self): # The include directory on POSIX isn't exactly the same as before, # but it is "within" sysconfig_includedir = sysconfig.get_path('include', scheme='posix_venv', vars=vars) - self.assertTrue(sysconfig_includedir.startswith(incpath + os.sep)) + self.assertStartsWith(sysconfig_includedir, incpath + os.sep) def test_nt_venv_scheme(self): # The following directories were hardcoded in the venv module @@ -569,8 +569,7 @@ def test_linux_ext_suffix(self): expected_suffixes = 'i386-linux-gnu.so', 'x86_64-linux-gnux32.so', 'i386-linux-musl.so' else: # 8 byte pointer size expected_suffixes = 'x86_64-linux-gnu.so', 'x86_64-linux-musl.so' - self.assertTrue(suffix.endswith(expected_suffixes), - f'unexpected suffix {suffix!r}') + self.assertEndsWith(suffix, expected_suffixes) @unittest.skipUnless(sys.platform == 'android', 'Android-specific test') def test_android_ext_suffix(self): @@ -582,13 +581,12 @@ def test_android_ext_suffix(self): "aarch64": "aarch64-linux-android", "armv7l": "arm-linux-androideabi", }[machine] - self.assertTrue(suffix.endswith(f"-{expected_triplet}.so"), - f"{machine=}, {suffix=}") + self.assertEndsWith(suffix, f"-{expected_triplet}.so") @unittest.skipUnless(sys.platform == 'darwin', 'OS X-specific test') def test_osx_ext_suffix(self): suffix = sysconfig.get_config_var('EXT_SUFFIX') - self.assertTrue(suffix.endswith('-darwin.so'), suffix) + self.assertEndsWith(suffix, '-darwin.so') def test_always_set_py_debug(self): self.assertIn('Py_DEBUG', sysconfig.get_config_vars()) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2018a20afd1b18..cf218a2bf14369 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1650,7 +1650,7 @@ def test_cwd(self): try: for t in tar: if t.name != ".": - self.assertTrue(t.name.startswith("./"), t.name) + self.assertStartsWith(t.name, "./") finally: tar.close() diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index d46d3c0f040601..52b13b98cbcce5 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -516,11 +516,11 @@ def test_collision_with_existing_file(self): _mock_candidate_names('aaa', 'aaa', 'bbb'): (fd1, name1) = self.make_temp() os.close(fd1) - self.assertTrue(name1.endswith('aaa')) + self.assertEndsWith(name1, 'aaa') (fd2, name2) = self.make_temp() os.close(fd2) - self.assertTrue(name2.endswith('bbb')) + self.assertEndsWith(name2, 'bbb') def test_collision_with_existing_directory(self): # _mkstemp_inner tries another name when a directory with @@ -528,11 +528,11 @@ def test_collision_with_existing_directory(self): with _inside_empty_temp_dir(), \ _mock_candidate_names('aaa', 'aaa', 'bbb'): dir = tempfile.mkdtemp() - self.assertTrue(dir.endswith('aaa')) + self.assertEndsWith(dir, 'aaa') (fd, name) = self.make_temp() os.close(fd) - self.assertTrue(name.endswith('bbb')) + self.assertEndsWith(name, 'bbb') class TestGetTempPrefix(BaseTestCase): @@ -828,9 +828,9 @@ def test_collision_with_existing_file(self): _mock_candidate_names('aaa', 'aaa', 'bbb'): file = tempfile.NamedTemporaryFile(delete=False) file.close() - self.assertTrue(file.name.endswith('aaa')) + self.assertEndsWith(file.name, 'aaa') dir = tempfile.mkdtemp() - self.assertTrue(dir.endswith('bbb')) + self.assertEndsWith(dir, 'bbb') def test_collision_with_existing_directory(self): # mkdtemp tries another name when a directory with @@ -838,9 +838,9 @@ def test_collision_with_existing_directory(self): with _inside_empty_temp_dir(), \ _mock_candidate_names('aaa', 'aaa', 'bbb'): dir1 = tempfile.mkdtemp() - self.assertTrue(dir1.endswith('aaa')) + self.assertEndsWith(dir1, 'aaa') dir2 = tempfile.mkdtemp() - self.assertTrue(dir2.endswith('bbb')) + self.assertEndsWith(dir2, 'bbb') def test_for_tempdir_is_bytes_issue40701_api_warts(self): orig_tempdir = tempfile.tempdir diff --git a/Lib/test/test_termios.py b/Lib/test/test_termios.py index e5d11cf84d2a66..ce8392a6ccdbd6 100644 --- a/Lib/test/test_termios.py +++ b/Lib/test/test_termios.py @@ -290,8 +290,8 @@ def test_ioctl_constants(self): self.assertGreaterEqual(value, 0) def test_exception(self): - self.assertTrue(issubclass(termios.error, Exception)) - self.assertFalse(issubclass(termios.error, OSError)) + self.assertIsSubclass(termios.error, Exception) + self.assertNotIsSubclass(termios.error, OSError) if __name__ == '__main__': diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index d06f65270efe79..5312faa50774ec 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -761,17 +761,17 @@ def test_localtime_timezone(self): # Get the localtime and examine it for the offset and zone. lt = time.localtime() - self.assertTrue(hasattr(lt, "tm_gmtoff")) - self.assertTrue(hasattr(lt, "tm_zone")) + self.assertHasAttr(lt, "tm_gmtoff") + self.assertHasAttr(lt, "tm_zone") # See if the offset and zone are similar to the module # attributes. if lt.tm_gmtoff is None: - self.assertTrue(not hasattr(time, "timezone")) + self.assertNotHasAttr(time, "timezone") else: self.assertEqual(lt.tm_gmtoff, -[time.timezone, time.altzone][lt.tm_isdst]) if lt.tm_zone is None: - self.assertTrue(not hasattr(time, "tzname")) + self.assertNotHasAttr(time, "tzname") else: self.assertEqual(lt.tm_zone, time.tzname[lt.tm_isdst]) @@ -1184,11 +1184,11 @@ def test_clock_functions(self): if mac_ver >= (10, 12): for name in clock_names: - self.assertTrue(hasattr(time, name), f"time.{name} is not available") + self.assertHasAttr(time, name) else: for name in clock_names: - self.assertFalse(hasattr(time, name), f"time.{name} is available") + self.assertNotHasAttr(time, name) if __name__ == "__main__": diff --git a/Lib/test/test_timeit.py b/Lib/test/test_timeit.py index f5ae0a84eb3506..2aeebea9f93d43 100644 --- a/Lib/test/test_timeit.py +++ b/Lib/test/test_timeit.py @@ -222,8 +222,8 @@ def test_repeat_function_zero_iters(self): def assert_exc_string(self, exc_string, expected_exc_name): exc_lines = exc_string.splitlines() self.assertGreater(len(exc_lines), 2) - self.assertTrue(exc_lines[0].startswith('Traceback')) - self.assertTrue(exc_lines[-1].startswith(expected_exc_name)) + self.assertStartsWith(exc_lines[0], 'Traceback') + self.assertStartsWith(exc_lines[-1], expected_exc_name) def test_print_exc(self): s = io.StringIO() diff --git a/Lib/test/test_tkinter/support.py b/Lib/test/test_tkinter/support.py index ebb9e00ff91bf0..46b01e6f131290 100644 --- a/Lib/test/test_tkinter/support.py +++ b/Lib/test/test_tkinter/support.py @@ -58,7 +58,7 @@ def _test_widget(self, constructor): destroy_default_root() tkinter.NoDefaultRoot() self.assertRaises(RuntimeError, constructor) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') def destroy_default_root(): diff --git a/Lib/test/test_tkinter/test_misc.py b/Lib/test/test_tkinter/test_misc.py index 96ea3f0117ca03..0c76e07066f8a8 100644 --- a/Lib/test/test_tkinter/test_misc.py +++ b/Lib/test/test_tkinter/test_misc.py @@ -497,7 +497,7 @@ def test_info_patchlevel(self): self.assertEqual(vi.serial, 0) else: self.assertEqual(vi.micro, 0) - self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}')) + self.assertStartsWith(str(vi), f'{vi.major}.{vi.minor}') def test_embedded_null(self): widget = tkinter.Entry(self.root) @@ -609,7 +609,7 @@ def test_focus(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, '??') self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertEqual(e.state, '??') self.assertEqual(e.char, '??') @@ -642,7 +642,7 @@ def test_configure(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, '??') self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertEqual(e.state, '??') self.assertEqual(e.char, '??') @@ -676,7 +676,7 @@ def test_event_generate_key_press(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, 0) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertIsInstance(e.state, int) self.assertNotEqual(e.state, 0) @@ -747,7 +747,7 @@ def test_event_generate_button_press(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, 0) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, 1) self.assertEqual(e.state, 0) self.assertEqual(e.char, '??') @@ -781,7 +781,7 @@ def test_event_generate_motion(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, 0) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertEqual(e.state, 0x100) self.assertEqual(e.char, '??') @@ -814,7 +814,7 @@ def test_event_generate_mouse_wheel(self): self.assertIs(e.widget, f) self.assertIsInstance(e.serial, int) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.time, 0) self.assertEqual(e.num, '??') self.assertEqual(e.state, 0) @@ -849,7 +849,7 @@ def test_generate_event_virtual_event(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, 0) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertEqual(e.state, 0) self.assertEqual(e.char, '??') @@ -1308,17 +1308,17 @@ def test_no_default_root(self): self.assertIs(tkinter._default_root, root) tkinter.NoDefaultRoot() self.assertIs(tkinter._support_default_root, False) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') # repeated call is no-op tkinter.NoDefaultRoot() self.assertIs(tkinter._support_default_root, False) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') root.destroy() self.assertIs(tkinter._support_default_root, False) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') root = tkinter.Tk() self.assertIs(tkinter._support_default_root, False) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') root.destroy() def test_getboolean(self): diff --git a/Lib/test/test_type_comments.py b/Lib/test/test_type_comments.py index ee8939f62d082c..c40c45594f4d80 100644 --- a/Lib/test/test_type_comments.py +++ b/Lib/test/test_type_comments.py @@ -344,7 +344,7 @@ def test_longargs(self): todo = set(t.name[1:]) self.assertEqual(len(t.args.args) + len(t.args.posonlyargs), len(todo) - bool(t.args.vararg) - bool(t.args.kwarg)) - self.assertTrue(t.name.startswith('f'), t.name) + self.assertStartsWith(t.name, 'f') for index, c in enumerate(t.name[1:]): todo.remove(c) if c == 'v': diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 3552b6b4ef846c..3097c7ddf05901 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -827,15 +827,15 @@ def test_instancecheck_and_subclasscheck(self): self.assertIsInstance(True, x) self.assertIsInstance('a', x) self.assertNotIsInstance(None, x) - self.assertTrue(issubclass(int, x)) - self.assertTrue(issubclass(bool, x)) - self.assertTrue(issubclass(str, x)) - self.assertFalse(issubclass(type(None), x)) + self.assertIsSubclass(int, x) + self.assertIsSubclass(bool, x) + self.assertIsSubclass(str, x) + self.assertNotIsSubclass(type(None), x) for x in (int | None, typing.Union[int, None]): with self.subTest(x=x): self.assertIsInstance(None, x) - self.assertTrue(issubclass(type(None), x)) + self.assertIsSubclass(type(None), x) for x in ( int | collections.abc.Mapping, @@ -844,8 +844,8 @@ def test_instancecheck_and_subclasscheck(self): with self.subTest(x=x): self.assertIsInstance({}, x) self.assertNotIsInstance((), x) - self.assertTrue(issubclass(dict, x)) - self.assertFalse(issubclass(list, x)) + self.assertIsSubclass(dict, x) + self.assertNotIsSubclass(list, x) def test_instancecheck_and_subclasscheck_order(self): T = typing.TypeVar('T') @@ -857,7 +857,7 @@ def test_instancecheck_and_subclasscheck_order(self): for x in will_resolve: with self.subTest(x=x): self.assertIsInstance(1, x) - self.assertTrue(issubclass(int, x)) + self.assertIsSubclass(int, x) wont_resolve = ( T | int, @@ -890,7 +890,7 @@ class BadMeta(type): def __subclasscheck__(cls, sub): 1/0 x = int | BadMeta('A', (), {}) - self.assertTrue(issubclass(int, x)) + self.assertIsSubclass(int, x) self.assertRaises(ZeroDivisionError, issubclass, list, x) def test_or_type_operator_with_TypeVar(self): @@ -1399,7 +1399,7 @@ def test_new_class_basics(self): def test_new_class_subclass(self): C = types.new_class("C", (int,)) - self.assertTrue(issubclass(C, int)) + self.assertIsSubclass(C, int) def test_new_class_meta(self): Meta = self.Meta @@ -1444,7 +1444,7 @@ def func(ns): bases=(int,), kwds=dict(metaclass=Meta, z=2), exec_body=func) - self.assertTrue(issubclass(C, int)) + self.assertIsSubclass(C, int) self.assertIsInstance(C, Meta) self.assertEqual(C.x, 0) self.assertEqual(C.y, 1) diff --git a/Lib/test/test_unittest/test_case.py b/Lib/test/test_unittest/test_case.py index a04af55f3fc0ae..d66cab146af246 100644 --- a/Lib/test/test_unittest/test_case.py +++ b/Lib/test/test_unittest/test_case.py @@ -1989,7 +1989,7 @@ def testAssertNoLogsYieldsNone(self): pass self.assertIsNone(value) - def testAssertStartswith(self): + def testAssertStartsWith(self): self.assertStartsWith('ababahalamaha', 'ababa') self.assertStartsWith('ababahalamaha', ('x', 'ababa', 'y')) self.assertStartsWith(UserString('ababahalamaha'), 'ababa') @@ -2034,7 +2034,7 @@ def testAssertStartswith(self): self.assertStartsWith('ababahalamaha', 'amaha', msg='abracadabra') self.assertIn('ababahalamaha', str(cm.exception)) - def testAssertNotStartswith(self): + def testAssertNotStartsWith(self): self.assertNotStartsWith('ababahalamaha', 'amaha') self.assertNotStartsWith('ababahalamaha', ('x', 'amaha', 'y')) self.assertNotStartsWith(UserString('ababahalamaha'), 'amaha') @@ -2079,7 +2079,7 @@ def testAssertNotStartswith(self): self.assertNotStartsWith('ababahalamaha', 'ababa', msg='abracadabra') self.assertIn('ababahalamaha', str(cm.exception)) - def testAssertEndswith(self): + def testAssertEndsWith(self): self.assertEndsWith('ababahalamaha', 'amaha') self.assertEndsWith('ababahalamaha', ('x', 'amaha', 'y')) self.assertEndsWith(UserString('ababahalamaha'), 'amaha') @@ -2124,7 +2124,7 @@ def testAssertEndswith(self): self.assertEndsWith('ababahalamaha', 'ababa', msg='abracadabra') self.assertIn('ababahalamaha', str(cm.exception)) - def testAssertNotEndswith(self): + def testAssertNotEndsWith(self): self.assertNotEndsWith('ababahalamaha', 'ababa') self.assertNotEndsWith('ababahalamaha', ('x', 'ababa', 'y')) self.assertNotEndsWith(UserString('ababahalamaha'), 'ababa') diff --git a/Lib/test/test_userdict.py b/Lib/test/test_userdict.py index ace84ef564df72..75de9ea252de98 100644 --- a/Lib/test/test_userdict.py +++ b/Lib/test/test_userdict.py @@ -166,7 +166,7 @@ def test_update(self): def test_missing(self): # Make sure UserDict doesn't have a __missing__ method - self.assertEqual(hasattr(collections.UserDict, "__missing__"), False) + self.assertNotHasAttr(collections.UserDict, "__missing__") # Test several cases: # (D) subclass defines __missing__ method returning a value # (E) subclass defines __missing__ method raising RuntimeError diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py index adc86a49b0668d..12c30e178aeb51 100644 --- a/Lib/test/test_venv.py +++ b/Lib/test/test_venv.py @@ -774,7 +774,7 @@ def test_activate_shell_script_has_no_dos_newlines(self): with open(script_path, 'rb') as script: for i, line in enumerate(script, 1): error_message = f"CR LF found in line {i}" - self.assertFalse(line.endswith(b'\r\n'), error_message) + self.assertNotEndsWith(line, b'\r\n', error_message) @requireVenvCreate def test_scm_ignore_files_git(self): @@ -978,7 +978,7 @@ def do_test_with_pip(self, system_site_packages): self.assertEqual(err, "") out = out.decode("latin-1") # Force to text, prevent decoding errors expected_version = "pip {}".format(ensurepip.version()) - self.assertEqual(out[:len(expected_version)], expected_version) + self.assertStartsWith(out, expected_version) env_dir = os.fsencode(self.env_dir).decode("latin-1") self.assertIn(env_dir, out) diff --git a/Lib/test/test_warnings/__init__.py b/Lib/test/test_warnings/__init__.py index 05710c469348c4..5c3b1250ceb045 100644 --- a/Lib/test/test_warnings/__init__.py +++ b/Lib/test/test_warnings/__init__.py @@ -102,7 +102,7 @@ class PublicAPITests(BaseTest): """ def test_module_all_attribute(self): - self.assertTrue(hasattr(self.module, '__all__')) + self.assertHasAttr(self.module, '__all__') target_api = ["warn", "warn_explicit", "showwarning", "formatwarning", "filterwarnings", "simplefilter", "resetwarnings", "catch_warnings", "deprecated"] @@ -735,7 +735,7 @@ class CWarnTests(WarnTests, unittest.TestCase): # test.import_helper.import_fresh_module utility function def test_accelerated(self): self.assertIsNot(original_warnings, self.module) - self.assertFalse(hasattr(self.module.warn, '__code__')) + self.assertNotHasAttr(self.module.warn, '__code__') class PyWarnTests(WarnTests, unittest.TestCase): module = py_warnings @@ -744,7 +744,7 @@ class PyWarnTests(WarnTests, unittest.TestCase): # test.import_helper.import_fresh_module utility function def test_pure_python(self): self.assertIsNot(original_warnings, self.module) - self.assertTrue(hasattr(self.module.warn, '__code__')) + self.assertHasAttr(self.module.warn, '__code__') class WCmdLineTests(BaseTest): @@ -1528,12 +1528,12 @@ def test_late_resource_warning(self): # (_warnings will try to import it) code = "f = open(%a)" % __file__ rc, out, err = assert_python_ok("-Wd", "-c", code) - self.assertTrue(err.startswith(expected), ascii(err)) + self.assertStartsWith(err, expected) # import the warnings module code = "import warnings; f = open(%a)" % __file__ rc, out, err = assert_python_ok("-Wd", "-c", code) - self.assertTrue(err.startswith(expected), ascii(err)) + self.assertStartsWith(err, expected) class AsyncTests(BaseTest): diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 4faad6629fe23c..4c7c900eb56ae1 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -432,7 +432,7 @@ def check_proxy(self, o, proxy): self.assertEqual(proxy.foo, 2, "proxy does not reflect attribute modification") del o.foo - self.assertFalse(hasattr(proxy, 'foo'), + self.assertNotHasAttr(proxy, 'foo', "proxy does not reflect attribute removal") proxy.foo = 1 @@ -442,7 +442,7 @@ def check_proxy(self, o, proxy): self.assertEqual(o.foo, 2, "object does not reflect attribute modification via proxy") del proxy.foo - self.assertFalse(hasattr(o, 'foo'), + self.assertNotHasAttr(o, 'foo', "object does not reflect attribute removal via proxy") def test_proxy_deletion(self): @@ -1108,7 +1108,7 @@ def meth(self): self.assertEqual(r.slot1, "abc") self.assertEqual(r.slot2, "def") self.assertEqual(r.meth(), "abcdef") - self.assertFalse(hasattr(r, "__dict__")) + self.assertNotHasAttr(r, "__dict__") def test_subclass_refs_with_cycle(self): """Confirm https://bugs.python.org/issue3100 is fixed.""" diff --git a/Lib/test/test_weakset.py b/Lib/test/test_weakset.py index 76e8e5c8ab7d3c..c1e4f9c8366e58 100644 --- a/Lib/test/test_weakset.py +++ b/Lib/test/test_weakset.py @@ -466,7 +466,7 @@ def test_copying(self): self.assertIsNot(dup, s) self.assertIs(dup.x, s.x) self.assertIs(dup.z, s.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') dup = copy.deepcopy(s) self.assertIsInstance(dup, cls) @@ -476,7 +476,7 @@ def test_copying(self): self.assertIsNot(dup.x, s.x) self.assertEqual(dup.z, s.z) self.assertIsNot(dup.z, s.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') if __name__ == "__main__": diff --git a/Lib/test/test_winconsoleio.py b/Lib/test/test_winconsoleio.py index d9076e77c158a2..1bae884ed9ae3e 100644 --- a/Lib/test/test_winconsoleio.py +++ b/Lib/test/test_winconsoleio.py @@ -17,9 +17,9 @@ class WindowsConsoleIOTests(unittest.TestCase): def test_abc(self): - self.assertTrue(issubclass(ConIO, io.RawIOBase)) - self.assertFalse(issubclass(ConIO, io.BufferedIOBase)) - self.assertFalse(issubclass(ConIO, io.TextIOBase)) + self.assertIsSubclass(ConIO, io.RawIOBase) + self.assertNotIsSubclass(ConIO, io.BufferedIOBase) + self.assertNotIsSubclass(ConIO, io.TextIOBase) def test_open_fd(self): self.assertRaisesRegex(ValueError, diff --git a/Lib/test/test_with.py b/Lib/test/test_with.py index fd7abd1782ec4d..f16611b29a2658 100644 --- a/Lib/test/test_with.py +++ b/Lib/test/test_with.py @@ -679,7 +679,7 @@ def testSingleComplexTarget(self): class C: pass blah = C() with mock_contextmanager_generator() as blah.foo: - self.assertEqual(hasattr(blah, "foo"), True) + self.assertHasAttr(blah, "foo") def testMultipleComplexTargets(self): class C: diff --git a/Lib/test/test_wmi.py b/Lib/test/test_wmi.py index ac7c9cb3a5a493..90eb40439d4b4a 100644 --- a/Lib/test/test_wmi.py +++ b/Lib/test/test_wmi.py @@ -70,8 +70,8 @@ def test_wmi_query_overflow(self): def test_wmi_query_multiple_rows(self): # Multiple instances should have an extra null separator r = wmi_exec_query("SELECT ProcessId FROM Win32_Process WHERE ProcessId < 1000") - self.assertFalse(r.startswith("\0"), r) - self.assertFalse(r.endswith("\0"), r) + self.assertNotStartsWith(r, "\0") + self.assertNotEndsWith(r, "\0") it = iter(r.split("\0")) try: while True: diff --git a/Lib/test/test_wsgiref.py b/Lib/test/test_wsgiref.py index b047f7b06f85d3..e04a4d2c2218a3 100644 --- a/Lib/test/test_wsgiref.py +++ b/Lib/test/test_wsgiref.py @@ -149,9 +149,9 @@ def bad_app(environ,start_response): start_response("200 OK", ('Content-Type','text/plain')) return ["Hello, world!"] out, err = run_amock(validator(bad_app)) - self.assertTrue(out.endswith( + self.assertEndsWith(out, b"A server error occurred. Please contact the administrator." - )) + ) self.assertEqual( err.splitlines()[-2], "AssertionError: Headers (('Content-Type', 'text/plain')) must" @@ -174,9 +174,9 @@ def bad_app(environ, start_response): for status, exc_message in tests: with self.subTest(status=status): out, err = run_amock(create_bad_app(status)) - self.assertTrue(out.endswith( + self.assertEndsWith(out, b"A server error occurred. Please contact the administrator." - )) + ) self.assertEqual(err.splitlines()[-2], exc_message) def test_wsgi_input(self): @@ -185,9 +185,9 @@ def bad_app(e,s): s("200 OK", [("Content-Type", "text/plain; charset=utf-8")]) return [b"data"] out, err = run_amock(validator(bad_app)) - self.assertTrue(out.endswith( + self.assertEndsWith(out, b"A server error occurred. Please contact the administrator." - )) + ) self.assertEqual( err.splitlines()[-2], "AssertionError" ) @@ -200,7 +200,7 @@ def app(e, s): ]) return [b"data"] out, err = run_amock(validator(app)) - self.assertTrue(err.endswith('"GET / HTTP/1.0" 200 4\n')) + self.assertEndsWith(err, '"GET / HTTP/1.0" 200 4\n') ver = sys.version.split()[0].encode('ascii') py = python_implementation().encode('ascii') pyver = py + b"/" + ver diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 8f2779520070d2..38be2cd437f200 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -225,8 +225,7 @@ def check_element(element): self.assertTrue(ET.iselement(element), msg="not an element") direlem = dir(element) for attr in 'tag', 'attrib', 'text', 'tail': - self.assertTrue(hasattr(element, attr), - msg='no %s member' % attr) + self.assertHasAttr(element, attr) self.assertIn(attr, direlem, msg='no %s visible by dir' % attr) @@ -251,7 +250,7 @@ def check_element(element): # Make sure all standard element methods exist. def check_method(method): - self.assertTrue(hasattr(method, '__call__'), + self.assertHasAttr(method, '__call__', msg="%s not callable" % method) check_method(element.append) diff --git a/Lib/test/test_xxlimited.py b/Lib/test/test_xxlimited.py index 6dbfb3f439393c..b52e78bc4fb7e0 100644 --- a/Lib/test/test_xxlimited.py +++ b/Lib/test/test_xxlimited.py @@ -31,7 +31,7 @@ def test_foo(self): self.assertEqual(self.module.foo(1, 2), 3) def test_str(self): - self.assertTrue(issubclass(self.module.Str, str)) + self.assertIsSubclass(self.module.Str, str) self.assertIsNot(self.module.Str, str) custom_string = self.module.Str("abcd") diff --git a/Lib/test/test_zipapp.py b/Lib/test/test_zipapp.py index d4766c59a102db..8fb0a68deba535 100644 --- a/Lib/test/test_zipapp.py +++ b/Lib/test/test_zipapp.py @@ -259,7 +259,7 @@ def test_pack_to_fileobj(self): (source / '__main__.py').touch() target = io.BytesIO() zipapp.create_archive(str(source), target, interpreter='python') - self.assertTrue(target.getvalue().startswith(b'#!python\n')) + self.assertStartsWith(target.getvalue(), b'#!python\n') def test_read_shebang(self): # Test that we can read the shebang line correctly. @@ -300,7 +300,7 @@ def test_write_shebang_to_fileobj(self): zipapp.create_archive(str(source), str(target), interpreter='python') new_target = io.BytesIO() zipapp.create_archive(str(target), new_target, interpreter='python2.7') - self.assertTrue(new_target.getvalue().startswith(b'#!python2.7\n')) + self.assertStartsWith(new_target.getvalue(), b'#!python2.7\n') def test_read_from_pathlike_obj(self): # Test that we can copy an archive using a path-like object @@ -326,7 +326,7 @@ def test_read_from_fileobj(self): new_target = io.BytesIO() temp_archive.seek(0) zipapp.create_archive(temp_archive, new_target, interpreter='python2.7') - self.assertTrue(new_target.getvalue().startswith(b'#!python2.7\n')) + self.assertStartsWith(new_target.getvalue(), b'#!python2.7\n') def test_remove_shebang(self): # Test that we can remove the shebang from a file. diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index e93603998f979e..ada96813709aea 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -3198,7 +3198,7 @@ def test_write_dir(self): with zipfile.ZipFile(TESTFN, "w") as zipf: zipf.write(dirpath) zinfo = zipf.filelist[0] - self.assertTrue(zinfo.filename.endswith("/x/")) + self.assertEndsWith(zinfo.filename, "/x/") self.assertEqual(zinfo.external_attr, (mode << 16) | 0x10) zipf.write(dirpath, "y") zinfo = zipf.filelist[1] @@ -3206,7 +3206,7 @@ def test_write_dir(self): self.assertEqual(zinfo.external_attr, (mode << 16) | 0x10) with zipfile.ZipFile(TESTFN, "r") as zipf: zinfo = zipf.filelist[0] - self.assertTrue(zinfo.filename.endswith("/x/")) + self.assertEndsWith(zinfo.filename, "/x/") self.assertEqual(zinfo.external_attr, (mode << 16) | 0x10) zinfo = zipf.filelist[1] self.assertTrue(zinfo.filename, "y/") @@ -3226,7 +3226,7 @@ def test_writestr_dir(self): self.assertEqual(zinfo.external_attr, (0o40775 << 16) | 0x10) with zipfile.ZipFile(TESTFN, "r") as zipf: zinfo = zipf.filelist[0] - self.assertTrue(zinfo.filename.endswith("x/")) + self.assertEndsWith(zinfo.filename, "x/") self.assertEqual(zinfo.external_attr, (0o40775 << 16) | 0x10) target = os.path.join(TESTFN2, "target") os.mkdir(target) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 1f288c8b45d589..b5b4acf5f850be 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -835,11 +835,11 @@ def doTraceback(self, module): s = io.StringIO() print_tb(tb, 1, s) - self.assertTrue(s.getvalue().endswith( + self.assertEndsWith(s.getvalue(), ' def do_raise(): raise TypeError\n' '' if support.has_no_debug_ranges() else ' ^^^^^^^^^^^^^^^\n' - )) + ) else: raise AssertionError("This ought to be impossible") diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index d2845495c7f8b6..f313e394f49e9b 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1915,8 +1915,8 @@ class ExtensionBuiltTest(unittest.TestCase): def test_cache_location(self): # The pure Python version stores caches on attributes, but the C # extension stores them in C globals (at least for now) - self.assertFalse(hasattr(c_zoneinfo.ZoneInfo, "_weak_cache")) - self.assertTrue(hasattr(py_zoneinfo.ZoneInfo, "_weak_cache")) + self.assertNotHasAttr(c_zoneinfo.ZoneInfo, "_weak_cache") + self.assertHasAttr(py_zoneinfo.ZoneInfo, "_weak_cache") def test_gc_tracked(self): import gc From 458e33018a2f4f4b3d9a2c8f6e70dcce31f34005 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Thu, 22 May 2025 22:22:02 +1200 Subject: [PATCH 289/989] gh-127081: lock non-re-entrant `*pwent` calls in free-threading (#132748) --- ...-04-21-00-58-04.gh-issue-127081.3DCl92.rst | 2 ++ Modules/pwdmodule.c | 25 +++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-21-00-58-04.gh-issue-127081.3DCl92.rst diff --git a/Misc/NEWS.d/next/Library/2025-04-21-00-58-04.gh-issue-127081.3DCl92.rst b/Misc/NEWS.d/next/Library/2025-04-21-00-58-04.gh-issue-127081.3DCl92.rst new file mode 100644 index 00000000000000..a99669a1bc021a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-21-00-58-04.gh-issue-127081.3DCl92.rst @@ -0,0 +1,2 @@ +Fix libc thread safety issues with :mod:`pwd` by locking access to +``getpwall``. diff --git a/Modules/pwdmodule.c b/Modules/pwdmodule.c index 2240e2078b2d98..c5a8cead19a773 100644 --- a/Modules/pwdmodule.c +++ b/Modules/pwdmodule.c @@ -301,18 +301,33 @@ pwd_getpwall_impl(PyObject *module) struct passwd *p; if ((d = PyList_New(0)) == NULL) return NULL; + +#ifdef Py_GIL_DISABLED + static PyMutex getpwall_mutex = {0}; + PyMutex_Lock(&getpwall_mutex); +#endif + int failure = 0; + PyObject *v = NULL; setpwent(); while ((p = getpwent()) != NULL) { - PyObject *v = mkpwent(module, p); + v = mkpwent(module, p); if (v == NULL || PyList_Append(d, v) != 0) { - Py_XDECREF(v); - Py_DECREF(d); - endpwent(); - return NULL; + /* NOTE: cannot dec-ref here, while holding the mutex. */ + failure = 1; + goto done; } Py_DECREF(v); } + +done: endpwent(); +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&getpwall_mutex); +#endif + if (failure) { + Py_XDECREF(v); + Py_CLEAR(d); + } return d; } #endif From d706eb9e0f99924b628da4a8afe8e23cff8b801b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Thu, 22 May 2025 13:28:35 +0200 Subject: [PATCH 290/989] gh-134455: Fix `build-details.json` to use the ``c_api.headers`` key (#134456) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix `build-details.json` generation to use the correct `c_api.headers` key as defined in PEP 739, instead of `c_api.include`. Co-authored-by: Filipe Laíns 🇵🇸 --- .../next/Build/2025-05-21-19-46-28.gh-issue-134455.vdwlrq.rst | 2 ++ Tools/build/generate-build-details.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Build/2025-05-21-19-46-28.gh-issue-134455.vdwlrq.rst diff --git a/Misc/NEWS.d/next/Build/2025-05-21-19-46-28.gh-issue-134455.vdwlrq.rst b/Misc/NEWS.d/next/Build/2025-05-21-19-46-28.gh-issue-134455.vdwlrq.rst new file mode 100644 index 00000000000000..08833b3344f20b --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-05-21-19-46-28.gh-issue-134455.vdwlrq.rst @@ -0,0 +1,2 @@ +Fixed ``build-details.json`` generation to use the correct ``c_api.headers`` +as defined in :pep:`739`, instead of ``c_api.include``. diff --git a/Tools/build/generate-build-details.py b/Tools/build/generate-build-details.py index 5dc100b8b05e5e..87e262065ec87b 100644 --- a/Tools/build/generate-build-details.py +++ b/Tools/build/generate-build-details.py @@ -123,7 +123,7 @@ def generate_data(schema_version: str) -> collections.defaultdict[str, Any]: if has_static_library: data['libpython']['static'] = os.path.join(LIBDIR, LIBRARY) - data['c_api']['include'] = INCLUDEDIR + data['c_api']['headers'] = INCLUDEDIR if LIBPC: data['c_api']['pkgconfig_path'] = LIBPC From d0eedfa10e5f8a4f34a899d2e744058ef4c66c1a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 May 2025 06:50:06 -0600 Subject: [PATCH 291/989] gh-132775: Use _PyObject_GetXIData (With Fallback) (gh-134440) This change includes some semi-related refactoring of queues and channels. --- Lib/concurrent/futures/interpreter.py | 55 ++---- Lib/test/support/interpreters/channels.py | 85 +++++---- Lib/test/support/interpreters/queues.py | 144 +++++++-------- Lib/test/test__interpchannels.py | 6 +- Lib/test/test_interpreters/test_channels.py | 40 ++--- Lib/test/test_interpreters/test_queues.py | 188 +++++++------------- Modules/_interpchannelsmodule.c | 176 +++++++++++------- Modules/_interpqueuesmodule.c | 175 ++++++++++-------- Modules/_interpreters_common.h | 54 ++++++ Python/crossinterp.c | 1 + 10 files changed, 486 insertions(+), 438 deletions(-) diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py index d17688dc9d7346..a2c4fbfd3fb831 100644 --- a/Lib/concurrent/futures/interpreter.py +++ b/Lib/concurrent/futures/interpreter.py @@ -36,9 +36,6 @@ def __str__(self): """.strip()) -UNBOUND = 2 # error; this should not happen. - - class WorkerContext(_thread.WorkerContext): @classmethod @@ -47,23 +44,13 @@ def resolve_task(fn, args, kwargs): if isinstance(fn, str): # XXX Circle back to this later. raise TypeError('scripts not supported') - if args or kwargs: - raise ValueError(f'a script does not take args or kwargs, got {args!r} and {kwargs!r}') - data = textwrap.dedent(fn) - kind = 'script' - # Make sure the script compiles. - # Ideally we wouldn't throw away the resulting code - # object. However, there isn't much to be done until - # code objects are shareable and/or we do a better job - # of supporting code objects in _interpreters.exec(). - compile(data, '', 'exec') else: # Functions defined in the __main__ module can't be pickled, # so they can't be used here. In the future, we could possibly # borrow from multiprocessing to work around this. - data = pickle.dumps((fn, args, kwargs)) - kind = 'function' - return (data, kind) + task = (fn, args, kwargs) + data = pickle.dumps(task) + return data if initializer is not None: try: @@ -86,24 +73,20 @@ def _capture_exc(cls, resultsid): except BaseException as exc: # Send the captured exception out on the results queue, # but still leave it unhandled for the interpreter to handle. - err = pickle.dumps(exc) - _interpqueues.put(resultsid, (None, err), 1, UNBOUND) + _interpqueues.put(resultsid, (None, exc)) raise # re-raise @classmethod def _send_script_result(cls, resultsid): - _interpqueues.put(resultsid, (None, None), 0, UNBOUND) + _interpqueues.put(resultsid, (None, None)) @classmethod def _call(cls, func, args, kwargs, resultsid): with cls._capture_exc(resultsid): res = func(*args or (), **kwargs or {}) # Send the result back. - try: - _interpqueues.put(resultsid, (res, None), 0, UNBOUND) - except _interpreters.NotShareableError: - res = pickle.dumps(res) - _interpqueues.put(resultsid, (res, None), 1, UNBOUND) + with cls._capture_exc(resultsid): + _interpqueues.put(resultsid, (res, None)) @classmethod def _call_pickled(cls, pickled, resultsid): @@ -134,8 +117,7 @@ def initialize(self): _interpreters.incref(self.interpid) maxsize = 0 - fmt = 0 - self.resultsid = _interpqueues.create(maxsize, fmt, UNBOUND) + self.resultsid = _interpqueues.create(maxsize) self._exec(f'from {__name__} import WorkerContext') @@ -166,17 +148,8 @@ def finalize(self): pass def run(self, task): - data, kind = task - if kind == 'script': - raise NotImplementedError('script kind disabled') - script = f""" -with WorkerContext._capture_exc({self.resultsid}): -{textwrap.indent(data, ' ')} -WorkerContext._send_script_result({self.resultsid})""" - elif kind == 'function': - script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})' - else: - raise NotImplementedError(kind) + data = task + script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})' try: self._exec(script) @@ -199,15 +172,13 @@ def run(self, task): continue else: break - (res, excdata), pickled, unboundop = obj + (res, exc), unboundop = obj assert unboundop is None, unboundop - if excdata is not None: + if exc is not None: assert res is None, res - assert pickled assert exc_wrapper is not None - exc = pickle.loads(excdata) raise exc from exc_wrapper - return pickle.loads(res) if pickled else res + return res class BrokenInterpreterPool(_thread.BrokenThreadPool): diff --git a/Lib/test/support/interpreters/channels.py b/Lib/test/support/interpreters/channels.py index d2bd93d77f7169..7a2bd7d63f808f 100644 --- a/Lib/test/support/interpreters/channels.py +++ b/Lib/test/support/interpreters/channels.py @@ -55,15 +55,23 @@ def create(*, unbounditems=UNBOUND): """ unbound = _serialize_unbound(unbounditems) unboundop, = unbound - cid = _channels.create(unboundop) - recv, send = RecvChannel(cid), SendChannel(cid, _unbound=unbound) + cid = _channels.create(unboundop, -1) + recv, send = RecvChannel(cid), SendChannel(cid) + send._set_unbound(unboundop, unbounditems) return recv, send def list_all(): """Return a list of (recv, send) for all open channels.""" - return [(RecvChannel(cid), SendChannel(cid, _unbound=unbound)) - for cid, unbound in _channels.list_all()] + channels = [] + for cid, unboundop, _ in _channels.list_all(): + chan = _, send = RecvChannel(cid), SendChannel(cid) + if not hasattr(send, '_unboundop'): + send._set_unbound(unboundop) + else: + assert send._unbound[0] == op + channels.append(chan) + return channels class _ChannelEnd: @@ -175,16 +183,33 @@ class SendChannel(_ChannelEnd): _end = 'send' - def __new__(cls, cid, *, _unbound=None): - if _unbound is None: - try: - op = _channels.get_channel_defaults(cid) - _unbound = (op,) - except ChannelNotFoundError: - _unbound = _serialize_unbound(UNBOUND) - self = super().__new__(cls, cid) - self._unbound = _unbound - return self +# def __new__(cls, cid, *, _unbound=None): +# if _unbound is None: +# try: +# op = _channels.get_channel_defaults(cid) +# _unbound = (op,) +# except ChannelNotFoundError: +# _unbound = _serialize_unbound(UNBOUND) +# self = super().__new__(cls, cid) +# self._unbound = _unbound +# return self + + def _set_unbound(self, op, items=None): + assert not hasattr(self, '_unbound') + if items is None: + items = _resolve_unbound(op) + unbound = (op, items) + self._unbound = unbound + return unbound + + @property + def unbounditems(self): + try: + _, items = self._unbound + except AttributeError: + op, _ = _channels.get_queue_defaults(self._id) + _, items = self._set_unbound(op) + return items @property def is_closed(self): @@ -192,61 +217,61 @@ def is_closed(self): return info.closed or info.closing def send(self, obj, timeout=None, *, - unbound=None, + unbounditems=None, ): """Send the object (i.e. its data) to the channel's receiving end. This blocks until the object is received. """ - if unbound is None: - unboundop, = self._unbound + if unbounditems is None: + unboundop = -1 else: - unboundop, = _serialize_unbound(unbound) + unboundop, = _serialize_unbound(unbounditems) _channels.send(self._id, obj, unboundop, timeout=timeout, blocking=True) def send_nowait(self, obj, *, - unbound=None, + unbounditems=None, ): """Send the object to the channel's receiving end. If the object is immediately received then return True (else False). Otherwise this is the same as send(). """ - if unbound is None: - unboundop, = self._unbound + if unbounditems is None: + unboundop = -1 else: - unboundop, = _serialize_unbound(unbound) + unboundop, = _serialize_unbound(unbounditems) # XXX Note that at the moment channel_send() only ever returns # None. This should be fixed when channel_send_wait() is added. # See bpo-32604 and gh-19829. return _channels.send(self._id, obj, unboundop, blocking=False) def send_buffer(self, obj, timeout=None, *, - unbound=None, + unbounditems=None, ): """Send the object's buffer to the channel's receiving end. This blocks until the object is received. """ - if unbound is None: - unboundop, = self._unbound + if unbounditems is None: + unboundop = -1 else: - unboundop, = _serialize_unbound(unbound) + unboundop, = _serialize_unbound(unbounditems) _channels.send_buffer(self._id, obj, unboundop, timeout=timeout, blocking=True) def send_buffer_nowait(self, obj, *, - unbound=None, + unbounditems=None, ): """Send the object's buffer to the channel's receiving end. If the object is immediately received then return True (else False). Otherwise this is the same as send(). """ - if unbound is None: - unboundop, = self._unbound + if unbounditems is None: + unboundop = -1 else: - unboundop, = _serialize_unbound(unbound) + unboundop, = _serialize_unbound(unbounditems) return _channels.send_buffer(self._id, obj, unboundop, blocking=False) def close(self): diff --git a/Lib/test/support/interpreters/queues.py b/Lib/test/support/interpreters/queues.py index deb8e8613af731..d6a3197d9e0e26 100644 --- a/Lib/test/support/interpreters/queues.py +++ b/Lib/test/support/interpreters/queues.py @@ -63,29 +63,34 @@ def _resolve_unbound(flag): return resolved -def create(maxsize=0, *, syncobj=False, unbounditems=UNBOUND): +def create(maxsize=0, *, unbounditems=UNBOUND): """Return a new cross-interpreter queue. The queue may be used to pass data safely between interpreters. - "syncobj" sets the default for Queue.put() - and Queue.put_nowait(). - - "unbounditems" likewise sets the default. See Queue.put() for + "unbounditems" sets the default for Queue.put(); see that method for supported values. The default value is UNBOUND, which replaces the unbound item. """ - fmt = _SHARED_ONLY if syncobj else _PICKLED unbound = _serialize_unbound(unbounditems) unboundop, = unbound - qid = _queues.create(maxsize, fmt, unboundop) - return Queue(qid, _fmt=fmt, _unbound=unbound) + qid = _queues.create(maxsize, unboundop, -1) + self = Queue(qid) + self._set_unbound(unboundop, unbounditems) + return self def list_all(): """Return a list of all open queues.""" - return [Queue(qid, _fmt=fmt, _unbound=(unboundop,)) - for qid, fmt, unboundop in _queues.list_all()] + queues = [] + for qid, unboundop, _ in _queues.list_all(): + self = Queue(qid) + if not hasattr(self, '_unbound'): + self._set_unbound(unboundop) + else: + assert self._unbound[0] == unboundop + queues.append(self) + return queues _known_queues = weakref.WeakValueDictionary() @@ -93,28 +98,17 @@ def list_all(): class Queue: """A cross-interpreter queue.""" - def __new__(cls, id, /, *, _fmt=None, _unbound=None): + def __new__(cls, id, /): # There is only one instance for any given ID. if isinstance(id, int): id = int(id) else: raise TypeError(f'id must be an int, got {id!r}') - if _fmt is None: - if _unbound is None: - _fmt, op = _queues.get_queue_defaults(id) - _unbound = (op,) - else: - _fmt, _ = _queues.get_queue_defaults(id) - elif _unbound is None: - _, op = _queues.get_queue_defaults(id) - _unbound = (op,) try: self = _known_queues[id] except KeyError: self = super().__new__(cls) self._id = id - self._fmt = _fmt - self._unbound = _unbound _known_queues[id] = self _queues.bind(id) return self @@ -143,10 +137,27 @@ def __getnewargs__(self): def __getstate__(self): return None + def _set_unbound(self, op, items=None): + assert not hasattr(self, '_unbound') + if items is None: + items = _resolve_unbound(op) + unbound = (op, items) + self._unbound = unbound + return unbound + @property def id(self): return self._id + @property + def unbounditems(self): + try: + _, items = self._unbound + except AttributeError: + op, _ = _queues.get_queue_defaults(self._id) + _, items = self._set_unbound(op) + return items + @property def maxsize(self): try: @@ -165,77 +176,56 @@ def qsize(self): return _queues.get_count(self._id) def put(self, obj, timeout=None, *, - syncobj=None, - unbound=None, + unbounditems=None, _delay=10 / 1000, # 10 milliseconds ): """Add the object to the queue. This blocks while the queue is full. - If "syncobj" is None (the default) then it uses the - queue's default, set with create_queue(). - - If "syncobj" is false then all objects are supported, - at the expense of worse performance. - - If "syncobj" is true then the object must be "shareable". - Examples of "shareable" objects include the builtin singletons, - str, and memoryview. One benefit is that such objects are - passed through the queue efficiently. - - The key difference, though, is conceptual: the corresponding - object returned from Queue.get() will be strictly equivalent - to the given obj. In other words, the two objects will be - effectively indistinguishable from each other, even if the - object is mutable. The received object may actually be the - same object, or a copy (immutable values only), or a proxy. - Regardless, the received object should be treated as though - the original has been shared directly, whether or not it - actually is. That's a slightly different and stronger promise - than just (initial) equality, which is all "syncobj=False" - can promise. - - "unbound" controls the behavior of Queue.get() for the given + For most objects, the object received through Queue.get() will + be a new one, equivalent to the original and not sharing any + actual underlying data. The notable exceptions include + cross-interpreter types (like Queue) and memoryview, where the + underlying data is actually shared. Furthermore, some types + can be sent through a queue more efficiently than others. This + group includes various immutable types like int, str, bytes, and + tuple (if the items are likewise efficiently shareable). See interpreters.is_shareable(). + + "unbounditems" controls the behavior of Queue.get() for the given object if the current interpreter (calling put()) is later destroyed. - If "unbound" is None (the default) then it uses the + If "unbounditems" is None (the default) then it uses the queue's default, set with create_queue(), which is usually UNBOUND. - If "unbound" is UNBOUND_ERROR then get() will raise an + If "unbounditems" is UNBOUND_ERROR then get() will raise an ItemInterpreterDestroyed exception if the original interpreter has been destroyed. This does not otherwise affect the queue; the next call to put() will work like normal, returning the next item in the queue. - If "unbound" is UNBOUND_REMOVE then the item will be removed + If "unbounditems" is UNBOUND_REMOVE then the item will be removed from the queue as soon as the original interpreter is destroyed. Be aware that this will introduce an imbalance between put() and get() calls. - If "unbound" is UNBOUND then it is returned by get() in place + If "unbounditems" is UNBOUND then it is returned by get() in place of the unbound item. """ - if syncobj is None: - fmt = self._fmt - else: - fmt = _SHARED_ONLY if syncobj else _PICKLED - if unbound is None: - unboundop, = self._unbound + if unbounditems is None: + unboundop = -1 else: - unboundop, = _serialize_unbound(unbound) + unboundop, = _serialize_unbound(unbounditems) if timeout is not None: timeout = int(timeout) if timeout < 0: raise ValueError(f'timeout value must be non-negative') end = time.time() + timeout - if fmt is _PICKLED: - obj = pickle.dumps(obj) while True: try: - _queues.put(self._id, obj, fmt, unboundop) + _queues.put(self._id, obj, unboundop) except QueueFull as exc: if timeout is not None and time.time() >= end: raise # re-raise @@ -243,18 +233,12 @@ def put(self, obj, timeout=None, *, else: break - def put_nowait(self, obj, *, syncobj=None, unbound=None): - if syncobj is None: - fmt = self._fmt + def put_nowait(self, obj, *, unbounditems=None): + if unbounditems is None: + unboundop = -1 else: - fmt = _SHARED_ONLY if syncobj else _PICKLED - if unbound is None: - unboundop, = self._unbound - else: - unboundop, = _serialize_unbound(unbound) - if fmt is _PICKLED: - obj = pickle.dumps(obj) - _queues.put(self._id, obj, fmt, unboundop) + unboundop, = _serialize_unbound(unbounditems) + _queues.put(self._id, obj, unboundop) def get(self, timeout=None, *, _delay=10 / 1000, # 10 milliseconds @@ -265,7 +249,7 @@ def get(self, timeout=None, *, If the next item's original interpreter has been destroyed then the "next object" is determined by the value of the - "unbound" argument to put(). + "unbounditems" argument to put(). """ if timeout is not None: timeout = int(timeout) @@ -274,7 +258,7 @@ def get(self, timeout=None, *, end = time.time() + timeout while True: try: - obj, fmt, unboundop = _queues.get(self._id) + obj, unboundop = _queues.get(self._id) except QueueEmpty as exc: if timeout is not None and time.time() >= end: raise # re-raise @@ -284,10 +268,6 @@ def get(self, timeout=None, *, if unboundop is not None: assert obj is None, repr(obj) return _resolve_unbound(unboundop) - if fmt == _PICKLED: - obj = pickle.loads(obj) - else: - assert fmt == _SHARED_ONLY return obj def get_nowait(self): @@ -297,16 +277,12 @@ def get_nowait(self): is the same as get(). """ try: - obj, fmt, unboundop = _queues.get(self._id) + obj, unboundop = _queues.get(self._id) except QueueEmpty as exc: raise # re-raise if unboundop is not None: assert obj is None, repr(obj) return _resolve_unbound(unboundop) - if fmt == _PICKLED: - obj = pickle.loads(obj) - else: - assert fmt == _SHARED_ONLY return obj diff --git a/Lib/test/test__interpchannels.py b/Lib/test/test__interpchannels.py index e4c1ad854514ed..88eee03a3de93a 100644 --- a/Lib/test/test__interpchannels.py +++ b/Lib/test/test__interpchannels.py @@ -247,7 +247,7 @@ def _run_action(cid, action, end, state): def clean_up_channels(): - for cid, _ in _channels.list_all(): + for cid, _, _ in _channels.list_all(): try: _channels.destroy(cid) except _channels.ChannelNotFoundError: @@ -373,11 +373,11 @@ def test_create_cid(self): self.assertIsInstance(cid, _channels.ChannelID) def test_sequential_ids(self): - before = [cid for cid, _ in _channels.list_all()] + before = [cid for cid, _, _ in _channels.list_all()] id1 = _channels.create(REPLACE) id2 = _channels.create(REPLACE) id3 = _channels.create(REPLACE) - after = [cid for cid, _ in _channels.list_all()] + after = [cid for cid, _, _ in _channels.list_all()] self.assertEqual(id2, int(id1) + 1) self.assertEqual(id3, int(id2) + 1) diff --git a/Lib/test/test_interpreters/test_channels.py b/Lib/test/test_interpreters/test_channels.py index eada18f99d04db..0c027b17cea68c 100644 --- a/Lib/test/test_interpreters/test_channels.py +++ b/Lib/test/test_interpreters/test_channels.py @@ -377,11 +377,11 @@ def common(rch, sch, unbound=None, presize=0): if not unbound: extraargs = '' elif unbound is channels.UNBOUND: - extraargs = ', unbound=channels.UNBOUND' + extraargs = ', unbounditems=channels.UNBOUND' elif unbound is channels.UNBOUND_ERROR: - extraargs = ', unbound=channels.UNBOUND_ERROR' + extraargs = ', unbounditems=channels.UNBOUND_ERROR' elif unbound is channels.UNBOUND_REMOVE: - extraargs = ', unbound=channels.UNBOUND_REMOVE' + extraargs = ', unbounditems=channels.UNBOUND_REMOVE' else: raise NotImplementedError(repr(unbound)) interp = interpreters.create() @@ -454,11 +454,11 @@ def common(rch, sch, unbound=None, presize=0): with self.assertRaises(channels.ChannelEmptyError): rch.recv_nowait() - sch.send_nowait(b'ham', unbound=channels.UNBOUND_REMOVE) + sch.send_nowait(b'ham', unbounditems=channels.UNBOUND_REMOVE) self.assertEqual(_channels.get_count(rch.id), 1) interp = common(rch, sch, channels.UNBOUND_REMOVE, 1) self.assertEqual(_channels.get_count(rch.id), 3) - sch.send_nowait(42, unbound=channels.UNBOUND_REMOVE) + sch.send_nowait(42, unbounditems=channels.UNBOUND_REMOVE) self.assertEqual(_channels.get_count(rch.id), 4) del interp self.assertEqual(_channels.get_count(rch.id), 2) @@ -484,11 +484,11 @@ def test_send_cleared_with_subinterpreter_mixed(self): _run_output(interp, dedent(f""" from test.support.interpreters import channels sch = channels.SendChannel({sch.id}) - sch.send_nowait(1, unbound=channels.UNBOUND) - sch.send_nowait(2, unbound=channels.UNBOUND_ERROR) + sch.send_nowait(1, unbounditems=channels.UNBOUND) + sch.send_nowait(2, unbounditems=channels.UNBOUND_ERROR) sch.send_nowait(3) - sch.send_nowait(4, unbound=channels.UNBOUND_REMOVE) - sch.send_nowait(5, unbound=channels.UNBOUND) + sch.send_nowait(4, unbounditems=channels.UNBOUND_REMOVE) + sch.send_nowait(5, unbounditems=channels.UNBOUND) """)) self.assertEqual(_channels.get_count(rch.id), 5) @@ -522,8 +522,8 @@ def test_send_cleared_with_subinterpreter_multiple(self): rch = channels.RecvChannel({rch.id}) sch = channels.SendChannel({sch.id}) obj1 = rch.recv() - sch.send_nowait(2, unbound=channels.UNBOUND) - sch.send_nowait(obj1, unbound=channels.UNBOUND_REMOVE) + sch.send_nowait(2, unbounditems=channels.UNBOUND) + sch.send_nowait(obj1, unbounditems=channels.UNBOUND_REMOVE) """)) _run_output(interp2, dedent(f""" from test.support.interpreters import channels @@ -535,21 +535,21 @@ def test_send_cleared_with_subinterpreter_multiple(self): self.assertEqual(_channels.get_count(rch.id), 0) sch.send_nowait(3) _run_output(interp1, dedent(""" - sch.send_nowait(4, unbound=channels.UNBOUND) + sch.send_nowait(4, unbounditems=channels.UNBOUND) # interp closed here - sch.send_nowait(5, unbound=channels.UNBOUND_REMOVE) - sch.send_nowait(6, unbound=channels.UNBOUND) + sch.send_nowait(5, unbounditems=channels.UNBOUND_REMOVE) + sch.send_nowait(6, unbounditems=channels.UNBOUND) """)) _run_output(interp2, dedent(""" - sch.send_nowait(7, unbound=channels.UNBOUND_ERROR) + sch.send_nowait(7, unbounditems=channels.UNBOUND_ERROR) # interp closed here - sch.send_nowait(obj1, unbound=channels.UNBOUND_ERROR) - sch.send_nowait(obj2, unbound=channels.UNBOUND_REMOVE) - sch.send_nowait(8, unbound=channels.UNBOUND) + sch.send_nowait(obj1, unbounditems=channels.UNBOUND_ERROR) + sch.send_nowait(obj2, unbounditems=channels.UNBOUND_REMOVE) + sch.send_nowait(8, unbounditems=channels.UNBOUND) """)) _run_output(interp1, dedent(""" - sch.send_nowait(9, unbound=channels.UNBOUND_REMOVE) - sch.send_nowait(10, unbound=channels.UNBOUND) + sch.send_nowait(9, unbounditems=channels.UNBOUND_REMOVE) + sch.send_nowait(10, unbounditems=channels.UNBOUND) """)) self.assertEqual(_channels.get_count(rch.id), 10) diff --git a/Lib/test/test_interpreters/test_queues.py b/Lib/test/test_interpreters/test_queues.py index 18f83d097eb360..64a2db1230d023 100644 --- a/Lib/test/test_interpreters/test_queues.py +++ b/Lib/test/test_interpreters/test_queues.py @@ -9,6 +9,7 @@ _queues = import_helper.import_module('_interpqueues') from test.support import interpreters from test.support.interpreters import queues, _crossinterp +import test._crossinterp_definitions as defs from .utils import _run_output, TestBase as _TestBase @@ -42,7 +43,7 @@ def test_highlevel_reloaded(self): importlib.reload(queues) def test_create_destroy(self): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) _queues.destroy(qid) self.assertEqual(get_num_queues(), 0) with self.assertRaises(queues.QueueNotFoundError): @@ -56,7 +57,7 @@ def test_not_destroyed(self): '-c', dedent(f""" import {_queues.__name__} as _queues - _queues.create(2, 0, {REPLACE}) + _queues.create(2, {REPLACE}, -1) """), ) self.assertEqual(stdout, '') @@ -67,13 +68,13 @@ def test_not_destroyed(self): def test_bind_release(self): with self.subTest('typical'): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) _queues.bind(qid) _queues.release(qid) self.assertEqual(get_num_queues(), 0) with self.subTest('bind too much'): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) _queues.bind(qid) _queues.bind(qid) _queues.release(qid) @@ -81,7 +82,7 @@ def test_bind_release(self): self.assertEqual(get_num_queues(), 0) with self.subTest('nested'): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) _queues.bind(qid) _queues.bind(qid) _queues.release(qid) @@ -89,7 +90,7 @@ def test_bind_release(self): self.assertEqual(get_num_queues(), 0) with self.subTest('release without binding'): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) with self.assertRaises(queues.QueueError): _queues.release(qid) @@ -132,13 +133,13 @@ def test_shareable(self): with self.subTest('same interpreter'): queue2 = queues.create() - queue1.put(queue2, syncobj=True) + queue1.put(queue2) queue3 = queue1.get() self.assertIs(queue3, queue2) with self.subTest('from current interpreter'): queue4 = queues.create() - queue1.put(queue4, syncobj=True) + queue1.put(queue4) out = _run_output(interp, dedent(""" queue4 = queue1.get() print(queue4.id) @@ -149,7 +150,7 @@ def test_shareable(self): with self.subTest('from subinterpreter'): out = _run_output(interp, dedent(""" queue5 = queues.create() - queue1.put(queue5, syncobj=True) + queue1.put(queue5) print(queue5.id) """)) qid = int(out) @@ -198,7 +199,7 @@ class TestQueueOps(TestBase): def test_empty(self): queue = queues.create() before = queue.empty() - queue.put(None, syncobj=True) + queue.put(None) during = queue.empty() queue.get() after = queue.empty() @@ -213,7 +214,7 @@ def test_full(self): queue = queues.create(3) for _ in range(3): actual.append(queue.full()) - queue.put(None, syncobj=True) + queue.put(None) actual.append(queue.full()) for _ in range(3): queue.get() @@ -227,16 +228,16 @@ def test_qsize(self): queue = queues.create() for _ in range(3): actual.append(queue.qsize()) - queue.put(None, syncobj=True) + queue.put(None) actual.append(queue.qsize()) queue.get() actual.append(queue.qsize()) - queue.put(None, syncobj=True) + queue.put(None) actual.append(queue.qsize()) for _ in range(3): queue.get() actual.append(queue.qsize()) - queue.put(None, syncobj=True) + queue.put(None) actual.append(queue.qsize()) queue.get() actual.append(queue.qsize()) @@ -245,70 +246,32 @@ def test_qsize(self): def test_put_get_main(self): expected = list(range(20)) - for syncobj in (True, False): - kwds = dict(syncobj=syncobj) - with self.subTest(f'syncobj={syncobj}'): - queue = queues.create() - for i in range(20): - queue.put(i, **kwds) - actual = [queue.get() for _ in range(20)] + queue = queues.create() + for i in range(20): + queue.put(i) + actual = [queue.get() for _ in range(20)] - self.assertEqual(actual, expected) + self.assertEqual(actual, expected) def test_put_timeout(self): - for syncobj in (True, False): - kwds = dict(syncobj=syncobj) - with self.subTest(f'syncobj={syncobj}'): - queue = queues.create(2) - queue.put(None, **kwds) - queue.put(None, **kwds) - with self.assertRaises(queues.QueueFull): - queue.put(None, timeout=0.1, **kwds) - queue.get() - queue.put(None, **kwds) + queue = queues.create(2) + queue.put(None) + queue.put(None) + with self.assertRaises(queues.QueueFull): + queue.put(None, timeout=0.1) + queue.get() + queue.put(None) def test_put_nowait(self): - for syncobj in (True, False): - kwds = dict(syncobj=syncobj) - with self.subTest(f'syncobj={syncobj}'): - queue = queues.create(2) - queue.put_nowait(None, **kwds) - queue.put_nowait(None, **kwds) - with self.assertRaises(queues.QueueFull): - queue.put_nowait(None, **kwds) - queue.get() - queue.put_nowait(None, **kwds) - - def test_put_syncobj(self): - for obj in [ - None, - True, - 10, - 'spam', - b'spam', - (0, 'a'), - ]: - with self.subTest(repr(obj)): - queue = queues.create() - - queue.put(obj, syncobj=True) - obj2 = queue.get() - self.assertEqual(obj2, obj) - - queue.put(obj, syncobj=True) - obj2 = queue.get_nowait() - self.assertEqual(obj2, obj) - - for obj in [ - [1, 2, 3], - {'a': 13, 'b': 17}, - ]: - with self.subTest(repr(obj)): - queue = queues.create() - with self.assertRaises(interpreters.NotShareableError): - queue.put(obj, syncobj=True) + queue = queues.create(2) + queue.put_nowait(None) + queue.put_nowait(None) + with self.assertRaises(queues.QueueFull): + queue.put_nowait(None) + queue.get() + queue.put_nowait(None) - def test_put_not_syncobj(self): + def test_put_full_fallback(self): for obj in [ None, True, @@ -323,11 +286,11 @@ def test_put_not_syncobj(self): with self.subTest(repr(obj)): queue = queues.create() - queue.put(obj, syncobj=False) + queue.put(obj) obj2 = queue.get() self.assertEqual(obj2, obj) - queue.put(obj, syncobj=False) + queue.put(obj) obj2 = queue.get_nowait() self.assertEqual(obj2, obj) @@ -341,24 +304,9 @@ def test_get_nowait(self): with self.assertRaises(queues.QueueEmpty): queue.get_nowait() - def test_put_get_default_syncobj(self): + def test_put_get_full_fallback(self): expected = list(range(20)) - queue = queues.create(syncobj=True) - for methname in ('get', 'get_nowait'): - with self.subTest(f'{methname}()'): - get = getattr(queue, methname) - for i in range(20): - queue.put(i) - actual = [get() for _ in range(20)] - self.assertEqual(actual, expected) - - obj = [1, 2, 3] # lists are not shareable - with self.assertRaises(interpreters.NotShareableError): - queue.put(obj) - - def test_put_get_default_not_syncobj(self): - expected = list(range(20)) - queue = queues.create(syncobj=False) + queue = queues.create() for methname in ('get', 'get_nowait'): with self.subTest(f'{methname}()'): get = getattr(queue, methname) @@ -384,7 +332,7 @@ def test_put_get_same_interpreter(self): with self.subTest(f'{methname}()'): interp.exec(dedent(f""" orig = b'spam' - queue.put(orig, syncobj=True) + queue.put(orig) obj = queue.{methname}() assert obj == orig, 'expected: obj == orig' assert obj is not orig, 'expected: obj is not orig' @@ -399,7 +347,7 @@ def test_put_get_different_interpreters(self): for methname in ('get', 'get_nowait'): with self.subTest(f'{methname}()'): obj1 = b'spam' - queue1.put(obj1, syncobj=True) + queue1.put(obj1) out = _run_output( interp, @@ -416,7 +364,7 @@ def test_put_get_different_interpreters(self): obj2 = b'eggs' print(id(obj2)) assert queue2.qsize() == 0, 'expected: queue2.qsize() == 0' - queue2.put(obj2, syncobj=True) + queue2.put(obj2) assert queue2.qsize() == 1, 'expected: queue2.qsize() == 1' """)) self.assertEqual(len(queues.list_all()), 2) @@ -433,11 +381,11 @@ def common(queue, unbound=None, presize=0): if not unbound: extraargs = '' elif unbound is queues.UNBOUND: - extraargs = ', unbound=queues.UNBOUND' + extraargs = ', unbounditems=queues.UNBOUND' elif unbound is queues.UNBOUND_ERROR: - extraargs = ', unbound=queues.UNBOUND_ERROR' + extraargs = ', unbounditems=queues.UNBOUND_ERROR' elif unbound is queues.UNBOUND_REMOVE: - extraargs = ', unbound=queues.UNBOUND_REMOVE' + extraargs = ', unbounditems=queues.UNBOUND_REMOVE' else: raise NotImplementedError(repr(unbound)) interp = interpreters.create() @@ -447,8 +395,8 @@ def common(queue, unbound=None, presize=0): queue = queues.Queue({queue.id}) obj1 = b'spam' obj2 = b'eggs' - queue.put(obj1, syncobj=True{extraargs}) - queue.put(obj2, syncobj=True{extraargs}) + queue.put(obj1{extraargs}) + queue.put(obj2{extraargs}) """)) self.assertEqual(queue.qsize(), presize + 2) @@ -501,11 +449,11 @@ def common(queue, unbound=None, presize=0): with self.assertRaises(queues.QueueEmpty): queue.get_nowait() - queue.put(b'ham', unbound=queues.UNBOUND_REMOVE) + queue.put(b'ham', unbounditems=queues.UNBOUND_REMOVE) self.assertEqual(queue.qsize(), 1) interp = common(queue, queues.UNBOUND_REMOVE, 1) self.assertEqual(queue.qsize(), 3) - queue.put(42, unbound=queues.UNBOUND_REMOVE) + queue.put(42, unbounditems=queues.UNBOUND_REMOVE) self.assertEqual(queue.qsize(), 4) del interp self.assertEqual(queue.qsize(), 2) @@ -523,11 +471,11 @@ def test_put_cleared_with_subinterpreter_mixed(self): _run_output(interp, dedent(f""" from test.support.interpreters import queues queue = queues.Queue({queue.id}) - queue.put(1, syncobj=True, unbound=queues.UNBOUND) - queue.put(2, syncobj=True, unbound=queues.UNBOUND_ERROR) - queue.put(3, syncobj=True) - queue.put(4, syncobj=True, unbound=queues.UNBOUND_REMOVE) - queue.put(5, syncobj=True, unbound=queues.UNBOUND) + queue.put(1, unbounditems=queues.UNBOUND) + queue.put(2, unbounditems=queues.UNBOUND_ERROR) + queue.put(3) + queue.put(4, unbounditems=queues.UNBOUND_REMOVE) + queue.put(5, unbounditems=queues.UNBOUND) """)) self.assertEqual(queue.qsize(), 5) @@ -555,13 +503,13 @@ def test_put_cleared_with_subinterpreter_multiple(self): interp1 = interpreters.create() interp2 = interpreters.create() - queue.put(1, syncobj=True) + queue.put(1) _run_output(interp1, dedent(f""" from test.support.interpreters import queues queue = queues.Queue({queue.id}) obj1 = queue.get() - queue.put(2, syncobj=True, unbound=queues.UNBOUND) - queue.put(obj1, syncobj=True, unbound=queues.UNBOUND_REMOVE) + queue.put(2, unbounditems=queues.UNBOUND) + queue.put(obj1, unbounditems=queues.UNBOUND_REMOVE) """)) _run_output(interp2, dedent(f""" from test.support.interpreters import queues @@ -572,21 +520,21 @@ def test_put_cleared_with_subinterpreter_multiple(self): self.assertEqual(queue.qsize(), 0) queue.put(3) _run_output(interp1, dedent(""" - queue.put(4, syncobj=True, unbound=queues.UNBOUND) + queue.put(4, unbounditems=queues.UNBOUND) # interp closed here - queue.put(5, syncobj=True, unbound=queues.UNBOUND_REMOVE) - queue.put(6, syncobj=True, unbound=queues.UNBOUND) + queue.put(5, unbounditems=queues.UNBOUND_REMOVE) + queue.put(6, unbounditems=queues.UNBOUND) """)) _run_output(interp2, dedent(""" - queue.put(7, syncobj=True, unbound=queues.UNBOUND_ERROR) + queue.put(7, unbounditems=queues.UNBOUND_ERROR) # interp closed here - queue.put(obj1, syncobj=True, unbound=queues.UNBOUND_ERROR) - queue.put(obj2, syncobj=True, unbound=queues.UNBOUND_REMOVE) - queue.put(8, syncobj=True, unbound=queues.UNBOUND) + queue.put(obj1, unbounditems=queues.UNBOUND_ERROR) + queue.put(obj2, unbounditems=queues.UNBOUND_REMOVE) + queue.put(8, unbounditems=queues.UNBOUND) """)) _run_output(interp1, dedent(""" - queue.put(9, syncobj=True, unbound=queues.UNBOUND_REMOVE) - queue.put(10, syncobj=True, unbound=queues.UNBOUND) + queue.put(9, unbounditems=queues.UNBOUND_REMOVE) + queue.put(10, unbounditems=queues.UNBOUND) """)) self.assertEqual(queue.qsize(), 10) @@ -642,12 +590,12 @@ def f(): break except queues.QueueEmpty: continue - queue2.put(obj, syncobj=True) + queue2.put(obj) t = threading.Thread(target=f) t.start() orig = b'spam' - queue1.put(orig, syncobj=True) + queue1.put(orig) obj = queue2.get() t.join() diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index f9fa1dab291056..0ab553190001bd 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -20,9 +20,11 @@ #endif #define REGISTERS_HEAP_TYPES +#define HAS_FALLBACK #define HAS_UNBOUND_ITEMS #include "_interpreters_common.h" #undef HAS_UNBOUND_ITEMS +#undef HAS_FALLBACK #undef REGISTERS_HEAP_TYPES @@ -523,7 +525,7 @@ typedef struct _channelitem { int64_t interpid; _PyXIData_t *data; _waiting_t *waiting; - int unboundop; + unboundop_t unboundop; struct _channelitem *next; } _channelitem; @@ -536,7 +538,7 @@ _channelitem_ID(_channelitem *item) static void _channelitem_init(_channelitem *item, int64_t interpid, _PyXIData_t *data, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop) { if (interpid < 0) { interpid = _get_interpid(data); @@ -583,7 +585,7 @@ _channelitem_clear(_channelitem *item) static _channelitem * _channelitem_new(int64_t interpid, _PyXIData_t *data, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop) { _channelitem *item = GLOBAL_MALLOC(_channelitem); if (item == NULL) { @@ -694,7 +696,7 @@ _channelqueue_free(_channelqueue *queue) static int _channelqueue_put(_channelqueue *queue, int64_t interpid, _PyXIData_t *data, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop) { _channelitem *item = _channelitem_new(interpid, data, waiting, unboundop); if (item == NULL) { @@ -798,7 +800,7 @@ _channelqueue_remove(_channelqueue *queue, _channelitem_id_t itemid, } queue->count -= 1; - int unboundop; + unboundop_t unboundop; _channelitem_popped(item, p_data, p_waiting, &unboundop); } @@ -1083,16 +1085,18 @@ typedef struct _channel { PyThread_type_lock mutex; _channelqueue *queue; _channelends *ends; - struct { - int unboundop; + struct _channeldefaults { + unboundop_t unboundop; + xidata_fallback_t fallback; } defaults; int open; struct _channel_closing *closing; } _channel_state; static _channel_state * -_channel_new(PyThread_type_lock mutex, int unboundop) +_channel_new(PyThread_type_lock mutex, struct _channeldefaults defaults) { + assert(check_unbound(defaults.unboundop)); _channel_state *chan = GLOBAL_MALLOC(_channel_state); if (chan == NULL) { return NULL; @@ -1109,7 +1113,7 @@ _channel_new(PyThread_type_lock mutex, int unboundop) GLOBAL_FREE(chan); return NULL; } - chan->defaults.unboundop = unboundop; + chan->defaults = defaults; chan->open = 1; chan->closing = NULL; return chan; @@ -1130,7 +1134,7 @@ _channel_free(_channel_state *chan) static int _channel_add(_channel_state *chan, int64_t interpid, - _PyXIData_t *data, _waiting_t *waiting, int unboundop) + _PyXIData_t *data, _waiting_t *waiting, unboundop_t unboundop) { int res = -1; PyThread_acquire_lock(chan->mutex, WAIT_LOCK); @@ -1611,7 +1615,7 @@ _channels_release_cid_object(_channels *channels, int64_t cid) struct channel_id_and_info { int64_t id; - int unboundop; + struct _channeldefaults defaults; }; static struct channel_id_and_info * @@ -1628,7 +1632,7 @@ _channels_list_all(_channels *channels, int64_t *count) for (int64_t i=0; ref != NULL; ref = ref->next, i++) { ids[i] = (struct channel_id_and_info){ .id = ref->cid, - .unboundop = ref->chan->defaults.unboundop, + .defaults = ref->chan->defaults, }; } *count = channels->numopen; @@ -1714,13 +1718,13 @@ _channel_finish_closing(_channel_state *chan) { // Create a new channel. static int64_t -channel_create(_channels *channels, int unboundop) +channel_create(_channels *channels, struct _channeldefaults defaults) { PyThread_type_lock mutex = PyThread_allocate_lock(); if (mutex == NULL) { return ERR_CHANNEL_MUTEX_INIT; } - _channel_state *chan = _channel_new(mutex, unboundop); + _channel_state *chan = _channel_new(mutex, defaults); if (chan == NULL) { PyThread_free_lock(mutex); return -1; @@ -1752,7 +1756,7 @@ channel_destroy(_channels *channels, int64_t cid) // Optionally request to be notified when it is received. static int channel_send(_channels *channels, int64_t cid, PyObject *obj, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop, xidata_fallback_t fallback) { PyThreadState *tstate = _PyThreadState_GET(); PyInterpreterState *interp = tstate->interp; @@ -1779,7 +1783,7 @@ channel_send(_channels *channels, int64_t cid, PyObject *obj, PyThread_release_lock(mutex); return -1; } - if (_PyObject_GetXIDataNoFallback(tstate, obj, data) != 0) { + if (_PyObject_GetXIData(tstate, obj, fallback, data) != 0) { PyThread_release_lock(mutex); GLOBAL_FREE(data); return -1; @@ -1823,7 +1827,8 @@ channel_clear_sent(_channels *channels, int64_t cid, _waiting_t *waiting) // Like channel_send(), but strictly wait for the object to be received. static int channel_send_wait(_channels *channels, int64_t cid, PyObject *obj, - int unboundop, PY_TIMEOUT_T timeout) + unboundop_t unboundop, PY_TIMEOUT_T timeout, + xidata_fallback_t fallback) { // We use a stack variable here, so we must ensure that &waiting // is not held by any channel item at the point this function exits. @@ -1834,7 +1839,7 @@ channel_send_wait(_channels *channels, int64_t cid, PyObject *obj, } /* Queue up the object. */ - int res = channel_send(channels, cid, obj, &waiting, unboundop); + int res = channel_send(channels, cid, obj, &waiting, unboundop, fallback); if (res < 0) { assert(waiting.status == WAITING_NO_STATUS); goto finally; @@ -2005,6 +2010,20 @@ channel_is_associated(_channels *channels, int64_t cid, int64_t interpid, return (end != NULL && end->open); } +static int +channel_get_defaults(_channels *channels, int64_t cid, struct _channeldefaults *defaults) +{ + PyThread_type_lock mutex = NULL; + _channel_state *channel = NULL; + int err = _channels_lookup(channels, cid, &mutex, &channel); + if (err != 0) { + return err; + } + *defaults = channel->defaults; + PyThread_release_lock(mutex); + return 0; +} + static int _channel_get_count(_channels *channels, int64_t cid, Py_ssize_t *p_count) { @@ -2881,20 +2900,27 @@ clear_interpreter(void *data) static PyObject * channelsmod_create(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"unboundop", NULL}; - int unboundop; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "i:create", kwlist, - &unboundop)) + static char *kwlist[] = {"unboundop", "fallback", NULL}; + int unboundarg = -1; + int fallbackarg = -1; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|ii:create", kwlist, + &unboundarg, &fallbackarg)) { return NULL; } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + struct _channeldefaults defaults = {0}; + if (resolve_unboundop(unboundarg, UNBOUND_REPLACE, + &defaults.unboundop) < 0) + { + return NULL; + } + if (resolve_fallback(fallbackarg, _PyXIDATA_FULL_FALLBACK, + &defaults.fallback) < 0) + { return NULL; } - int64_t cid = channel_create(&_globals.channels, unboundop); + int64_t cid = channel_create(&_globals.channels, defaults); if (cid < 0) { (void)handle_channel_error(-1, self, cid); return NULL; @@ -2987,7 +3013,9 @@ channelsmod_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) } assert(cidobj != NULL); - PyObject *item = Py_BuildValue("Oi", cidobj, cur->unboundop); + PyObject *item = Py_BuildValue("Oii", cidobj, + cur->defaults.unboundop, + cur->defaults.fallback); Py_DECREF(cidobj); if (item == NULL) { Py_SETREF(ids, NULL); @@ -3075,40 +3103,54 @@ receive end."); static PyObject * channelsmod_send(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"cid", "obj", "unboundop", "blocking", "timeout", - NULL}; + static char *kwlist[] = {"cid", "obj", "unboundop", "fallback", + "blocking", "timeout", NULL}; struct channel_id_converter_data cid_data = { .module = self, }; PyObject *obj; - int unboundop = UNBOUND_REPLACE; + int unboundarg = -1; + int fallbackarg = -1; int blocking = 1; PyObject *timeout_obj = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O|i$pO:channel_send", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O&O|ii$pO:channel_send", kwlist, channel_id_converter, &cid_data, &obj, - &unboundop, &blocking, &timeout_obj)) + &unboundarg, &fallbackarg, + &blocking, &timeout_obj)) { return NULL; } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); - return NULL; - } - int64_t cid = cid_data.cid; PY_TIMEOUT_T timeout; if (PyThread_ParseTimeoutArg(timeout_obj, blocking, &timeout) < 0) { return NULL; } + struct _channeldefaults defaults = {-1, -1}; + if (unboundarg < 0 || fallbackarg < 0) { + int err = channel_get_defaults(&_globals.channels, cid, &defaults); + if (handle_channel_error(err, self, cid)) { + return NULL; + } + } + unboundop_t unboundop; + if (resolve_unboundop(unboundarg, defaults.unboundop, &unboundop) < 0) { + return NULL; + } + xidata_fallback_t fallback; + if (resolve_fallback(fallbackarg, defaults.fallback, &fallback) < 0) { + return NULL; + } /* Queue up the object. */ int err = 0; if (blocking) { - err = channel_send_wait(&_globals.channels, cid, obj, unboundop, timeout); + err = channel_send_wait( + &_globals.channels, cid, obj, unboundop, timeout, fallback); } else { - err = channel_send(&_globals.channels, cid, obj, NULL, unboundop); + err = channel_send( + &_globals.channels, cid, obj, NULL, unboundop, fallback); } if (handle_channel_error(err, self, cid)) { return NULL; @@ -3126,32 +3168,44 @@ By default this waits for the object to be received."); static PyObject * channelsmod_send_buffer(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"cid", "obj", "unboundop", "blocking", "timeout", - NULL}; + static char *kwlist[] = {"cid", "obj", "unboundop", "fallback", + "blocking", "timeout", NULL}; struct channel_id_converter_data cid_data = { .module = self, }; PyObject *obj; - int unboundop = UNBOUND_REPLACE; - int blocking = 1; + int unboundarg = -1; + int fallbackarg = -1; + int blocking = -1; PyObject *timeout_obj = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O&O|i$pO:channel_send_buffer", kwlist, + "O&O|ii$pO:channel_send_buffer", kwlist, channel_id_converter, &cid_data, &obj, - &unboundop, &blocking, &timeout_obj)) { - return NULL; - } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + &unboundarg, &fallbackarg, + &blocking, &timeout_obj)) + { return NULL; } - int64_t cid = cid_data.cid; PY_TIMEOUT_T timeout; if (PyThread_ParseTimeoutArg(timeout_obj, blocking, &timeout) < 0) { return NULL; } + struct _channeldefaults defaults = {-1, -1}; + if (unboundarg < 0 || fallbackarg < 0) { + int err = channel_get_defaults(&_globals.channels, cid, &defaults); + if (handle_channel_error(err, self, cid)) { + return NULL; + } + } + unboundop_t unboundop; + if (resolve_unboundop(unboundarg, defaults.unboundop, &unboundop) < 0) { + return NULL; + } + xidata_fallback_t fallback; + if (resolve_fallback(fallbackarg, defaults.fallback, &fallback) < 0) { + return NULL; + } PyObject *tempobj = PyMemoryView_FromObject(obj); if (tempobj == NULL) { @@ -3162,10 +3216,11 @@ channelsmod_send_buffer(PyObject *self, PyObject *args, PyObject *kwds) int err = 0; if (blocking) { err = channel_send_wait( - &_globals.channels, cid, tempobj, unboundop, timeout); + &_globals.channels, cid, tempobj, unboundop, timeout, fallback); } else { - err = channel_send(&_globals.channels, cid, tempobj, NULL, unboundop); + err = channel_send( + &_globals.channels, cid, tempobj, NULL, unboundop, fallback); } Py_DECREF(tempobj); if (handle_channel_error(err, self, cid)) { @@ -3197,7 +3252,7 @@ channelsmod_recv(PyObject *self, PyObject *args, PyObject *kwds) cid = cid_data.cid; PyObject *obj = NULL; - int unboundop = 0; + unboundop_t unboundop = 0; int err = channel_recv(&_globals.channels, cid, &obj, &unboundop); if (err == ERR_CHANNEL_EMPTY && dflt != NULL) { // Use the default. @@ -3388,17 +3443,14 @@ channelsmod_get_channel_defaults(PyObject *self, PyObject *args, PyObject *kwds) } int64_t cid = cid_data.cid; - PyThread_type_lock mutex = NULL; - _channel_state *channel = NULL; - int err = _channels_lookup(&_globals.channels, cid, &mutex, &channel); + struct _channeldefaults defaults; + int err = channel_get_defaults(&_globals.channels, cid, &defaults); if (handle_channel_error(err, self, cid)) { return NULL; } - int unboundop = channel->defaults.unboundop; - PyThread_release_lock(mutex); - PyObject *defaults = Py_BuildValue("i", unboundop); - return defaults; + PyObject *res = Py_BuildValue("ii", defaults.unboundop, defaults.fallback); + return res; } PyDoc_STRVAR(channelsmod_get_channel_defaults_doc, diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 209fcdfd0cd01e..816285c9eff44a 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -9,9 +9,11 @@ #include "pycore_crossinterp.h" // _PyXIData_t #define REGISTERS_HEAP_TYPES +#define HAS_FALLBACK #define HAS_UNBOUND_ITEMS #include "_interpreters_common.h" #undef HAS_UNBOUND_ITEMS +#undef HAS_FALLBACK #undef REGISTERS_HEAP_TYPES @@ -401,14 +403,13 @@ typedef struct _queueitem { meaning the interpreter has been destroyed. */ int64_t interpid; _PyXIData_t *data; - int fmt; - int unboundop; + unboundop_t unboundop; struct _queueitem *next; } _queueitem; static void _queueitem_init(_queueitem *item, - int64_t interpid, _PyXIData_t *data, int fmt, int unboundop) + int64_t interpid, _PyXIData_t *data, unboundop_t unboundop) { if (interpid < 0) { interpid = _get_interpid(data); @@ -422,7 +423,6 @@ _queueitem_init(_queueitem *item, *item = (_queueitem){ .interpid = interpid, .data = data, - .fmt = fmt, .unboundop = unboundop, }; } @@ -446,14 +446,14 @@ _queueitem_clear(_queueitem *item) } static _queueitem * -_queueitem_new(int64_t interpid, _PyXIData_t *data, int fmt, int unboundop) +_queueitem_new(int64_t interpid, _PyXIData_t *data, int unboundop) { _queueitem *item = GLOBAL_MALLOC(_queueitem); if (item == NULL) { PyErr_NoMemory(); return NULL; } - _queueitem_init(item, interpid, data, fmt, unboundop); + _queueitem_init(item, interpid, data, unboundop); return item; } @@ -476,10 +476,9 @@ _queueitem_free_all(_queueitem *item) static void _queueitem_popped(_queueitem *item, - _PyXIData_t **p_data, int *p_fmt, int *p_unboundop) + _PyXIData_t **p_data, unboundop_t *p_unboundop) { *p_data = item->data; - *p_fmt = item->fmt; *p_unboundop = item->unboundop; // We clear them here, so they won't be released in _queueitem_clear(). item->data = NULL; @@ -527,16 +526,16 @@ typedef struct _queue { _queueitem *first; _queueitem *last; } items; - struct { - int fmt; + struct _queuedefaults { + xidata_fallback_t fallback; int unboundop; } defaults; } _queue; static int -_queue_init(_queue *queue, Py_ssize_t maxsize, int fmt, int unboundop) +_queue_init(_queue *queue, Py_ssize_t maxsize, struct _queuedefaults defaults) { - assert(check_unbound(unboundop)); + assert(check_unbound(defaults.unboundop)); PyThread_type_lock mutex = PyThread_allocate_lock(); if (mutex == NULL) { return ERR_QUEUE_ALLOC; @@ -547,10 +546,7 @@ _queue_init(_queue *queue, Py_ssize_t maxsize, int fmt, int unboundop) .items = { .maxsize = maxsize, }, - .defaults = { - .fmt = fmt, - .unboundop = unboundop, - }, + .defaults = defaults, }; return 0; } @@ -631,8 +627,7 @@ _queue_unlock(_queue *queue) } static int -_queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, - int fmt, int unboundop) +_queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, int unboundop) { int err = _queue_lock(queue); if (err < 0) { @@ -648,7 +643,7 @@ _queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, return ERR_QUEUE_FULL; } - _queueitem *item = _queueitem_new(interpid, data, fmt, unboundop); + _queueitem *item = _queueitem_new(interpid, data, unboundop); if (item == NULL) { _queue_unlock(queue); return -1; @@ -668,8 +663,7 @@ _queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, } static int -_queue_next(_queue *queue, - _PyXIData_t **p_data, int *p_fmt, int *p_unboundop) +_queue_next(_queue *queue, _PyXIData_t **p_data, int *p_unboundop) { int err = _queue_lock(queue); if (err < 0) { @@ -688,7 +682,7 @@ _queue_next(_queue *queue, } queue->items.count -= 1; - _queueitem_popped(item, p_data, p_fmt, p_unboundop); + _queueitem_popped(item, p_data, p_unboundop); _queue_unlock(queue); return 0; @@ -1035,8 +1029,7 @@ _queues_decref(_queues *queues, int64_t qid) struct queue_id_and_info { int64_t id; - int fmt; - int unboundop; + struct _queuedefaults defaults; }; static struct queue_id_and_info * @@ -1053,8 +1046,7 @@ _queues_list_all(_queues *queues, int64_t *p_count) for (int64_t i=0; ref != NULL; ref = ref->next, i++) { ids[i].id = ref->qid; assert(ref->queue != NULL); - ids[i].fmt = ref->queue->defaults.fmt; - ids[i].unboundop = ref->queue->defaults.unboundop; + ids[i].defaults = ref->queue->defaults; } *p_count = queues->count; @@ -1090,13 +1082,14 @@ _queue_free(_queue *queue) // Create a new queue. static int64_t -queue_create(_queues *queues, Py_ssize_t maxsize, int fmt, int unboundop) +queue_create(_queues *queues, Py_ssize_t maxsize, + struct _queuedefaults defaults) { _queue *queue = GLOBAL_MALLOC(_queue); if (queue == NULL) { return ERR_QUEUE_ALLOC; } - int err = _queue_init(queue, maxsize, fmt, unboundop); + int err = _queue_init(queue, maxsize, defaults); if (err < 0) { GLOBAL_FREE(queue); return (int64_t)err; @@ -1125,7 +1118,8 @@ queue_destroy(_queues *queues, int64_t qid) // Push an object onto the queue. static int -queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) +queue_put(_queues *queues, int64_t qid, PyObject *obj, unboundop_t unboundop, + xidata_fallback_t fallback) { PyThreadState *tstate = PyThreadState_Get(); @@ -1138,27 +1132,27 @@ queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) assert(queue != NULL); // Convert the object to cross-interpreter data. - _PyXIData_t *data = _PyXIData_New(); - if (data == NULL) { + _PyXIData_t *xidata = _PyXIData_New(); + if (xidata == NULL) { _queue_unmark_waiter(queue, queues->mutex); return -1; } - if (_PyObject_GetXIDataNoFallback(tstate, obj, data) != 0) { + if (_PyObject_GetXIData(tstate, obj, fallback, xidata) != 0) { _queue_unmark_waiter(queue, queues->mutex); - GLOBAL_FREE(data); + GLOBAL_FREE(xidata); return -1; } - assert(_PyXIData_INTERPID(data) == + assert(_PyXIData_INTERPID(xidata) == PyInterpreterState_GetID(tstate->interp)); // Add the data to the queue. int64_t interpid = -1; // _queueitem_init() will set it. - int res = _queue_add(queue, interpid, data, fmt, unboundop); + int res = _queue_add(queue, interpid, xidata, unboundop); _queue_unmark_waiter(queue, queues->mutex); if (res != 0) { // We may chain an exception here: - (void)_release_xid_data(data, 0); - GLOBAL_FREE(data); + (void)_release_xid_data(xidata, 0); + GLOBAL_FREE(xidata); return res; } @@ -1169,7 +1163,7 @@ queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) // XXX Support a "wait" mutex? static int queue_get(_queues *queues, int64_t qid, - PyObject **res, int *p_fmt, int *p_unboundop) + PyObject **res, int *p_unboundop) { int err; *res = NULL; @@ -1185,7 +1179,7 @@ queue_get(_queues *queues, int64_t qid, // Pop off the next item from the queue. _PyXIData_t *data = NULL; - err = _queue_next(queue, &data, p_fmt, p_unboundop); + err = _queue_next(queue, &data, p_unboundop); _queue_unmark_waiter(queue, queues->mutex); if (err != 0) { return err; @@ -1216,6 +1210,20 @@ queue_get(_queues *queues, int64_t qid, return 0; } +static int +queue_get_defaults(_queues *queues, int64_t qid, + struct _queuedefaults *p_defaults) +{ + _queue *queue = NULL; + int err = _queues_lookup(queues, qid, &queue); + if (err != 0) { + return err; + } + *p_defaults = queue->defaults; + _queue_unmark_waiter(queue, queues->mutex); + return 0; +} + static int queue_get_maxsize(_queues *queues, int64_t qid, Py_ssize_t *p_maxsize) { @@ -1474,22 +1482,28 @@ qidarg_converter(PyObject *arg, void *ptr) static PyObject * queuesmod_create(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"maxsize", "fmt", "unboundop", NULL}; + static char *kwlist[] = {"maxsize", "unboundop", "fallback", NULL}; Py_ssize_t maxsize; - int fmt; - int unboundop; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "nii:create", kwlist, - &maxsize, &fmt, &unboundop)) + int unboundarg = -1; + int fallbackarg = -1; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "n|ii:create", kwlist, + &maxsize, &unboundarg, &fallbackarg)) { return NULL; } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + struct _queuedefaults defaults = {0}; + if (resolve_unboundop(unboundarg, UNBOUND_REPLACE, + &defaults.unboundop) < 0) + { + return NULL; + } + if (resolve_fallback(fallbackarg, _PyXIDATA_FULL_FALLBACK, + &defaults.fallback) < 0) + { return NULL; } - int64_t qid = queue_create(&_globals.queues, maxsize, fmt, unboundop); + int64_t qid = queue_create(&_globals.queues, maxsize, defaults); if (qid < 0) { (void)handle_queue_error((int)qid, self, qid); return NULL; @@ -1511,7 +1525,7 @@ queuesmod_create(PyObject *self, PyObject *args, PyObject *kwds) } PyDoc_STRVAR(queuesmod_create_doc, -"create(maxsize, fmt, unboundop) -> qid\n\ +"create(maxsize, unboundop, fallback) -> qid\n\ \n\ Create a new cross-interpreter queue and return its unique generated ID.\n\ It is a new reference as though bind() had been called on the queue.\n\ @@ -1560,8 +1574,9 @@ queuesmod_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) } struct queue_id_and_info *cur = qids; for (int64_t i=0; i < count; cur++, i++) { - PyObject *item = Py_BuildValue("Lii", cur->id, cur->fmt, - cur->unboundop); + PyObject *item = Py_BuildValue("Lii", cur->id, + cur->defaults.unboundop, + cur->defaults.fallback); if (item == NULL) { Py_SETREF(ids, NULL); break; @@ -1575,34 +1590,44 @@ queuesmod_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) } PyDoc_STRVAR(queuesmod_list_all_doc, -"list_all() -> [(qid, fmt)]\n\ +"list_all() -> [(qid, unboundop, fallback)]\n\ \n\ Return the list of IDs for all queues.\n\ -Each corresponding default format is also included."); +Each corresponding default unbound op and fallback is also included."); static PyObject * queuesmod_put(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"qid", "obj", "fmt", "unboundop", NULL}; + static char *kwlist[] = {"qid", "obj", "unboundop", "fallback", NULL}; qidarg_converter_data qidarg = {0}; PyObject *obj; - int fmt; - int unboundop; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&Oii:put", kwlist, - qidarg_converter, &qidarg, &obj, &fmt, - &unboundop)) + int unboundarg = -1; + int fallbackarg = -1; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O|ii$p:put", kwlist, + qidarg_converter, &qidarg, &obj, + &unboundarg, &fallbackarg)) { return NULL; } int64_t qid = qidarg.id; - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + struct _queuedefaults defaults = {-1, -1}; + if (unboundarg < 0 || fallbackarg < 0) { + int err = queue_get_defaults(&_globals.queues, qid, &defaults); + if (handle_queue_error(err, self, qid)) { + return NULL; + } + } + unboundop_t unboundop; + if (resolve_unboundop(unboundarg, defaults.unboundop, &unboundop) < 0) { + return NULL; + } + xidata_fallback_t fallback; + if (resolve_fallback(fallbackarg, defaults.fallback, &fallback) < 0) { return NULL; } /* Queue up the object. */ - int err = queue_put(&_globals.queues, qid, obj, fmt, unboundop); + int err = queue_put(&_globals.queues, qid, obj, unboundop, fallback); // This is the only place that raises QueueFull. if (handle_queue_error(err, self, qid)) { return NULL; @@ -1612,7 +1637,7 @@ queuesmod_put(PyObject *self, PyObject *args, PyObject *kwds) } PyDoc_STRVAR(queuesmod_put_doc, -"put(qid, obj, fmt)\n\ +"put(qid, obj)\n\ \n\ Add the object's data to the queue."); @@ -1628,27 +1653,26 @@ queuesmod_get(PyObject *self, PyObject *args, PyObject *kwds) int64_t qid = qidarg.id; PyObject *obj = NULL; - int fmt = 0; int unboundop = 0; - int err = queue_get(&_globals.queues, qid, &obj, &fmt, &unboundop); + int err = queue_get(&_globals.queues, qid, &obj, &unboundop); // This is the only place that raises QueueEmpty. if (handle_queue_error(err, self, qid)) { return NULL; } if (obj == NULL) { - return Py_BuildValue("Oii", Py_None, fmt, unboundop); + return Py_BuildValue("Oi", Py_None, unboundop); } - PyObject *res = Py_BuildValue("OiO", obj, fmt, Py_None); + PyObject *res = Py_BuildValue("OO", obj, Py_None); Py_DECREF(obj); return res; } PyDoc_STRVAR(queuesmod_get_doc, -"get(qid) -> (obj, fmt)\n\ +"get(qid) -> (obj, unboundop)\n\ \n\ Return a new object from the data at the front of the queue.\n\ -The object's format is also returned.\n\ +The unbound op is also returned.\n\ \n\ If there is nothing to receive then raise QueueEmpty."); @@ -1748,17 +1772,14 @@ queuesmod_get_queue_defaults(PyObject *self, PyObject *args, PyObject *kwds) } int64_t qid = qidarg.id; - _queue *queue = NULL; - int err = _queues_lookup(&_globals.queues, qid, &queue); + struct _queuedefaults defaults; + int err = queue_get_defaults(&_globals.queues, qid, &defaults); if (handle_queue_error(err, self, qid)) { return NULL; } - int fmt = queue->defaults.fmt; - int unboundop = queue->defaults.unboundop; - _queue_unmark_waiter(queue, _globals.queues.mutex); - PyObject *defaults = Py_BuildValue("ii", fmt, unboundop); - return defaults; + PyObject *res = Py_BuildValue("ii", defaults.unboundop, defaults.fallback); + return res; } PyDoc_STRVAR(queuesmod_get_queue_defaults_doc, diff --git a/Modules/_interpreters_common.h b/Modules/_interpreters_common.h index d73cbca36359c7..40fd51d752e324 100644 --- a/Modules/_interpreters_common.h +++ b/Modules/_interpreters_common.h @@ -39,10 +39,37 @@ _get_interpid(_PyXIData_t *data) } +#ifdef HAS_FALLBACK +static int +resolve_fallback(int arg, xidata_fallback_t dflt, + xidata_fallback_t *p_fallback) +{ + if (arg < 0) { + *p_fallback = dflt; + return 0; + } + xidata_fallback_t fallback; + if (arg == _PyXIDATA_XIDATA_ONLY) { + fallback =_PyXIDATA_XIDATA_ONLY; + } + else if (arg == _PyXIDATA_FULL_FALLBACK) { + fallback = _PyXIDATA_FULL_FALLBACK; + } + else { + PyErr_Format(PyExc_ValueError, "unsupported fallback %d", arg); + return -1; + } + *p_fallback = fallback; + return 0; +} +#endif + + /* unbound items ************************************************************/ #ifdef HAS_UNBOUND_ITEMS +typedef int unboundop_t; #define UNBOUND_REMOVE 1 #define UNBOUND_ERROR 2 #define UNBOUND_REPLACE 3 @@ -53,6 +80,7 @@ _get_interpid(_PyXIData_t *data) // object is released but the underlying data is copied (with the "raw" // allocator) and used when the item is popped off the queue. +#ifndef NDEBUG static int check_unbound(int unboundop) { @@ -65,5 +93,31 @@ check_unbound(int unboundop) return 0; } } +#endif + +static int +resolve_unboundop(int arg, unboundop_t dflt, unboundop_t *p_unboundop) +{ + if (arg < 0) { + *p_unboundop = dflt; + return 0; + } + unboundop_t op; + if (arg == UNBOUND_REMOVE) { + op = UNBOUND_REMOVE; + } + else if (arg == UNBOUND_ERROR) { + op = UNBOUND_ERROR; + } + else if (arg == UNBOUND_REPLACE) { + op = UNBOUND_REPLACE; + } + else { + PyErr_Format(PyExc_ValueError, "unsupported unboundop %d", arg); + return -1; + } + *p_unboundop = op; + return 0; +} #endif diff --git a/Python/crossinterp.c b/Python/crossinterp.c index dc67de4a40849d..6681b969183925 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -1839,6 +1839,7 @@ _sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) return -1; } PyThreadState *tstate = PyThreadState_Get(); + // XXX Use _PyObject_GetXIDataWithFallback()? if (_PyObject_GetXIDataNoFallback(tstate, value, item->xidata) != 0) { PyMem_RawFree(item->xidata); item->xidata = NULL; From bd4046f4f869039a1a2ebe2d1d18bfbc2a2951b6 Mon Sep 17 00:00:00 2001 From: Alex Kautz Date: Thu, 22 May 2025 09:46:29 -0400 Subject: [PATCH 292/989] gh-134370: Added clarification on instance annotations (#134387) Instances of classes cannot have annotations, however sometimes they will erroneously have the __annotations__ attribute --- Doc/howto/annotations.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Doc/howto/annotations.rst b/Doc/howto/annotations.rst index 78f3704ba5d000..d7deb6c6bc1768 100644 --- a/Doc/howto/annotations.rst +++ b/Doc/howto/annotations.rst @@ -248,4 +248,9 @@ quirks by using :func:`annotationlib.get_annotations` on Python 3.14+ or :func:`inspect.get_annotations` on Python 3.10+. On earlier versions of Python, you can avoid these bugs by accessing the annotations from the class's :attr:`~type.__dict__` -(e.g., ``cls.__dict__.get('__annotations__', None)``). +(for example, ``cls.__dict__.get('__annotations__', None)``). + +In some versions of Python, instances of classes may have an ``__annotations__`` +attribute. However, this is not supported functionality. If you need the +annotations of an instance, you can use :func:`type` to access its class +(for example, ``annotationlib.get_annotations(type(myinstance))`` on Python 3.14+). From 3effede97cc13fc0c5ab5dcde26cc319f388e84c Mon Sep 17 00:00:00 2001 From: Duprat Date: Thu, 22 May 2025 15:48:24 +0200 Subject: [PATCH 293/989] gh-134323: Fix the new `threading.RLock.locked` method (#134368) Co-authored-by: Kumar Aditya --- Lib/test/lock_tests.py | 18 ++++++++++++++++++ Lib/threading.py | 4 ++-- ...5-05-20-19-16-30.gh-issue-134323.ZQZGvw.rst | 1 + Modules/_threadmodule.c | 10 ++++++++-- 4 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-20-19-16-30.gh-issue-134323.ZQZGvw.rst diff --git a/Lib/test/lock_tests.py b/Lib/test/lock_tests.py index d64b2b9fe28694..850450c1e81a16 100644 --- a/Lib/test/lock_tests.py +++ b/Lib/test/lock_tests.py @@ -370,6 +370,24 @@ def test_locked(self): lock.release() self.assertFalse(lock.locked()) + def test_locked_with_2threads(self): + # see gh-134323: check that a rlock which + # is acquired in a different thread, + # is still locked in the main thread. + result = [] + rlock = self.locktype() + self.assertFalse(rlock.locked()) + def acquire(): + result.append(rlock.locked()) + rlock.acquire() + result.append(rlock.locked()) + + with Bunch(acquire, 1): + pass + self.assertTrue(rlock.locked()) + self.assertFalse(result[0]) + self.assertTrue(result[1]) + def test_release_save_unacquired(self): # Cannot _release_save an unacquired lock lock = self.locktype() diff --git a/Lib/threading.py b/Lib/threading.py index fb9ef75d28111b..b6c451d1fbaabd 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -158,7 +158,7 @@ def __repr__(self): except KeyError: pass return "<%s %s.%s object owner=%r count=%d at %s>" % ( - "locked" if self._block.locked() else "unlocked", + "locked" if self.locked() else "unlocked", self.__class__.__module__, self.__class__.__qualname__, owner, @@ -237,7 +237,7 @@ def __exit__(self, t, v, tb): def locked(self): """Return whether this object is locked.""" - return self._count > 0 + return self._block.locked() # Internal methods used by condition variables diff --git a/Misc/NEWS.d/next/Library/2025-05-20-19-16-30.gh-issue-134323.ZQZGvw.rst b/Misc/NEWS.d/next/Library/2025-05-20-19-16-30.gh-issue-134323.ZQZGvw.rst new file mode 100644 index 00000000000000..7982b52f77a172 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-20-19-16-30.gh-issue-134323.ZQZGvw.rst @@ -0,0 +1 @@ +Fix the :meth:`threading.RLock.locked` method. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 77286ed2a74669..10123700f90f32 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -1022,6 +1022,11 @@ rlock_traverse(PyObject *self, visitproc visit, void *arg) return 0; } +static int +rlock_locked_impl(rlockobject *self) +{ + return PyMutex_IsLocked(&self->lock.mutex); +} static void rlock_dealloc(PyObject *self) @@ -1111,7 +1116,7 @@ static PyObject * rlock_locked(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = rlockobject_CAST(op); - int is_locked = _PyRecursiveMutex_IsLockedByCurrentThread(&self->lock); + int is_locked = rlock_locked_impl(self); return PyBool_FromLong(is_locked); } @@ -1219,10 +1224,11 @@ rlock_repr(PyObject *op) { rlockobject *self = rlockobject_CAST(op); PyThread_ident_t owner = self->lock.thread; + int locked = rlock_locked_impl(self); size_t count = self->lock.level + 1; return PyUnicode_FromFormat( "<%s %s object owner=%" PY_FORMAT_THREAD_IDENT_T " count=%zu at %p>", - owner ? "locked" : "unlocked", + locked ? "locked" : "unlocked", Py_TYPE(self)->tp_name, owner, count, self); } From 8c5e5557c64a8d6310b2a958f576d5eb245cb1b6 Mon Sep 17 00:00:00 2001 From: Nadeshiko Manju Date: Thu, 22 May 2025 21:54:57 +0800 Subject: [PATCH 294/989] GH-131798: Turn _LOAD_SMALL_INT into _LOAD_CONST_INLINE_BORROW in the JIT (GH-134406) --- Lib/test/test_capi/test_opt.py | 12 ++++++++++++ .../2025-05-21-13-57-26.gh-issue-131798.QwS5Bb.rst | 1 + Python/optimizer_bytecodes.c | 5 ++++- Python/optimizer_cases.c.h | 5 ++++- 4 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-13-57-26.gh-issue-131798.QwS5Bb.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 98b434313e4d2d..2b777acb1ec4fd 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2156,6 +2156,18 @@ def testfunc(n): self.assertIn("_GUARD_TYPE_VERSION", uops) self.assertNotIn("_CHECK_ATTR_CLASS", uops) + def test_load_small_int(self): + def testfunc(n): + x = 0 + for i in range(n): + x += 1 + return x + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_LOAD_SMALL_INT", uops) + self.assertIn("_LOAD_CONST_INLINE_BORROW", uops) def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-13-57-26.gh-issue-131798.QwS5Bb.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-13-57-26.gh-issue-131798.QwS5Bb.rst new file mode 100644 index 00000000000000..f873bbfb4dcb68 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-13-57-26.gh-issue-131798.QwS5Bb.rst @@ -0,0 +1 @@ +JIT: replace ``_LOAD_SMALL_INT`` with ``_LOAD_CONST_INLINE_BORROW`` diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f12cd7b968cffc..b9ebd8678e0f1e 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -528,7 +528,10 @@ dummy_func(void) { } op(_LOAD_SMALL_INT, (-- value)) { - PyObject *val = PyLong_FromLong(this_instr->oparg); + PyObject *val = PyLong_FromLong(oparg); + assert(val); + assert(_Py_IsImmortal(val)); + REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); value = sym_new_const(ctx, val); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 602f5e2cfaf77e..0ba45e1f58fe08 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -79,7 +79,10 @@ case _LOAD_SMALL_INT: { JitOptSymbol *value; - PyObject *val = PyLong_FromLong(this_instr->oparg); + PyObject *val = PyLong_FromLong(oparg); + assert(val); + assert(_Py_IsImmortal(val)); + REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); value = sym_new_const(ctx, val); stack_pointer[0] = value; stack_pointer += 1; From 899c7dc283cb899fdfd79fb479b38352f48e454a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 22 May 2025 16:05:07 +0200 Subject: [PATCH 295/989] gh-133740: Fix locale.nl_langinfo(ALT_DIGITS) (#134468) Set the LC_CTYPE locale to the LC_TIME locale even if nl_langinfo(ALT_DIGITS) result is ASCII. The result is a list separated by NUL characters and the code only checks the first list item which can be ASCII whereas following items are non-ASCII. Fix test__locale for the uk_UA locale on RHEL 7. Co-authored-by: Serhiy Storchaka --- Modules/_localemodule.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index ad618398d5b824..c1f56008b7c49e 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -692,7 +692,17 @@ _locale_nl_langinfo_impl(PyObject *module, int item) result = result != NULL ? result : ""; char *oldloc = NULL; if (langinfo_constants[i].category != LC_CTYPE - && !is_all_ascii(result) + && ( +#ifdef __GLIBC__ + // gh-133740: Always change the locale for ALT_DIGITS and ERA +# ifdef ALT_DIGITS + item == ALT_DIGITS || +# endif +# ifdef ERA + item == ERA || +# endif +#endif + !is_all_ascii(result)) && change_locale(langinfo_constants[i].category, &oldloc) < 0) { return NULL; From 09e72cf091d03479eddcb3c4526f5c6af56d31a0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 May 2025 08:40:33 -0600 Subject: [PATCH 296/989] gh-132775: Use _PyCode GetScriptXIData() (gh-134511) --- Lib/test/support/interpreters/channels.py | 2 +- Lib/test/test__interpreters.py | 25 +- Lib/test/test_interpreters/test_api.py | 23 +- Modules/_interpretersmodule.c | 294 ++++++---------------- 4 files changed, 104 insertions(+), 240 deletions(-) diff --git a/Lib/test/support/interpreters/channels.py b/Lib/test/support/interpreters/channels.py index 7a2bd7d63f808f..3b6e0f0effd969 100644 --- a/Lib/test/support/interpreters/channels.py +++ b/Lib/test/support/interpreters/channels.py @@ -69,7 +69,7 @@ def list_all(): if not hasattr(send, '_unboundop'): send._set_unbound(unboundop) else: - assert send._unbound[0] == op + assert send._unbound[0] == unboundop channels.append(chan) return channels diff --git a/Lib/test/test__interpreters.py b/Lib/test/test__interpreters.py index 63fdaad8de7ef5..ad3ebbfdff64a7 100644 --- a/Lib/test/test__interpreters.py +++ b/Lib/test/test__interpreters.py @@ -474,13 +474,15 @@ def setUp(self): def test_signatures(self): # See https://github.com/python/cpython/issues/126654 - msg = "expected 'shared' to be a dict" + msg = r'_interpreters.exec\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.exec(self.id, 'a', 1) with self.assertRaisesRegex(TypeError, msg): _interpreters.exec(self.id, 'a', shared=1) + msg = r'_interpreters.run_string\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.run_string(self.id, 'a', shared=1) + msg = r'_interpreters.run_func\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.run_func(self.id, lambda: None, shared=1) @@ -952,7 +954,8 @@ def test_invalid_syntax(self): """) with self.subTest('script'): - self.assert_run_failed(SyntaxError, script) + with self.assertRaises(SyntaxError): + _interpreters.run_string(self.id, script) with self.subTest('module'): modname = 'spam_spam_spam' @@ -1019,12 +1022,19 @@ def script(): with open(w, 'w', encoding="utf-8") as spipe: with contextlib.redirect_stdout(spipe): print('it worked!', end='') + failed = None def f(): - _interpreters.set___main___attrs(self.id, dict(w=w)) - _interpreters.run_func(self.id, script) + nonlocal failed + try: + _interpreters.set___main___attrs(self.id, dict(w=w)) + _interpreters.run_func(self.id, script) + except Exception as exc: + failed = exc t = threading.Thread(target=f) t.start() t.join() + if failed: + raise Exception from failed with open(r, encoding="utf-8") as outfile: out = outfile.read() @@ -1053,19 +1063,16 @@ def test_closure(self): spam = True def script(): assert spam - - with self.assertRaises(TypeError): + with self.assertRaises(ValueError): _interpreters.run_func(self.id, script) - # XXX This hasn't been fixed yet. - @unittest.expectedFailure def test_return_value(self): def script(): return 'spam' with self.assertRaises(ValueError): _interpreters.run_func(self.id, script) - @unittest.skip("we're not quite there yet") +# @unittest.skip("we're not quite there yet") def test_args(self): with self.subTest('args'): def script(a, b=0): diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 1e2d572b1cbb81..165949167ceba8 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -839,9 +839,16 @@ def test_bad_script(self): interp.exec(10) def test_bytes_for_script(self): + r, w = self.pipe() + RAN = b'R' + DONE = b'D' interp = interpreters.create() - with self.assertRaises(TypeError): - interp.exec(b'print("spam")') + interp.exec(f"""if True: + import os + os.write({w}, {RAN!r}) + """) + os.write(w, DONE) + self.assertEqual(os.read(r, 1), RAN) def test_with_background_threads_still_running(self): r_interp, w_interp = self.pipe() @@ -1010,8 +1017,6 @@ def test_call(self): for i, (callable, args, kwargs) in enumerate([ (call_func_noop, (), {}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), (Spam.noop, (), {}), ]): with self.subTest(f'success case #{i+1}'): @@ -1036,6 +1041,8 @@ def test_call(self): (call_func_complex, ('custom', 'spam!'), {}), (call_func_complex, ('custom-inner', 'eggs!'), {}), (call_func_complex, ('???',), {'exc': ValueError('spam')}), + (call_func_return_shareable, (), {}), + (call_func_return_not_shareable, (), {}), ]): with self.subTest(f'invalid case #{i+1}'): with self.assertRaises(Exception): @@ -1051,8 +1058,6 @@ def test_call_in_thread(self): for i, (callable, args, kwargs) in enumerate([ (call_func_noop, (), {}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), (Spam.noop, (), {}), ]): with self.subTest(f'success case #{i+1}'): @@ -1079,6 +1084,8 @@ def test_call_in_thread(self): (call_func_complex, ('custom', 'spam!'), {}), (call_func_complex, ('custom-inner', 'eggs!'), {}), (call_func_complex, ('???',), {'exc': ValueError('spam')}), + (call_func_return_shareable, (), {}), + (call_func_return_not_shareable, (), {}), ]): with self.subTest(f'invalid case #{i+1}'): if args or kwargs: @@ -1618,8 +1625,8 @@ def test_exec(self): def test_call(self): with self.subTest('no args'): interpid = _interpreters.create() - exc = _interpreters.call(interpid, call_func_return_shareable) - self.assertIs(exc, None) + with self.assertRaises(ValueError): + _interpreters.call(interpid, call_func_return_shareable) with self.subTest('uncaught exception'): interpid = _interpreters.create() diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 91cd92806206be..7f84b38a70e127 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -9,7 +9,6 @@ #include "pycore_code.h" // _PyCode_HAS_EXECUTORS() #include "pycore_crossinterp.h" // _PyXIData_t #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() -#include "pycore_function.h" // _PyFunction_VerifyStateless() #include "pycore_interp.h" // _PyInterpreterState_IDIncref() #include "pycore_modsupport.h" // _PyArg_BadArgument() #include "pycore_namespace.h" // _PyNamespace_New() @@ -361,81 +360,6 @@ _get_current_xibufferview_type(void) } -/* Python code **************************************************************/ - -static const char * -check_code_str(PyUnicodeObject *text) -{ - assert(text != NULL); - if (PyUnicode_GET_LENGTH(text) == 0) { - return "too short"; - } - - // XXX Verify that it parses? - - return NULL; -} - -#ifndef NDEBUG -static int -code_has_args(PyCodeObject *code) -{ - assert(code != NULL); - return (code->co_argcount > 0 - || code->co_posonlyargcount > 0 - || code->co_kwonlyargcount > 0 - || code->co_flags & (CO_VARARGS | CO_VARKEYWORDS)); -} -#endif - -#define RUN_TEXT 1 -#define RUN_CODE 2 - -static const char * -get_code_str(PyObject *arg, Py_ssize_t *len_p, PyObject **bytes_p, int *flags_p) -{ - const char *codestr = NULL; - Py_ssize_t len = -1; - PyObject *bytes_obj = NULL; - int flags = 0; - - if (PyUnicode_Check(arg)) { - assert(PyUnicode_Check(arg) - && (check_code_str((PyUnicodeObject *)arg) == NULL)); - codestr = PyUnicode_AsUTF8AndSize(arg, &len); - if (codestr == NULL) { - return NULL; - } - if (strlen(codestr) != (size_t)len) { - PyErr_SetString(PyExc_ValueError, - "source code string cannot contain null bytes"); - return NULL; - } - flags = RUN_TEXT; - } - else { - assert(PyCode_Check(arg)); - assert(_PyCode_VerifyStateless( - PyThreadState_Get(), (PyCodeObject *)arg, NULL, NULL, NULL) == 0); - assert(!code_has_args((PyCodeObject *)arg)); - flags = RUN_CODE; - - // Serialize the code object. - bytes_obj = PyMarshal_WriteObjectToString(arg, Py_MARSHAL_VERSION); - if (bytes_obj == NULL) { - return NULL; - } - codestr = PyBytes_AS_STRING(bytes_obj); - len = PyBytes_GET_SIZE(bytes_obj); - } - - *flags_p = flags; - *bytes_p = bytes_obj; - *len_p = len; - return codestr; -} - - /* interpreter-specific code ************************************************/ static int @@ -499,22 +423,14 @@ config_from_object(PyObject *configobj, PyInterpreterConfig *config) static int -_run_script(PyObject *ns, const char *codestr, Py_ssize_t codestrlen, int flags) +_run_script(_PyXIData_t *script, PyObject *ns) { - PyObject *result = NULL; - if (flags & RUN_TEXT) { - result = PyRun_StringFlags(codestr, Py_file_input, ns, ns, NULL); - } - else if (flags & RUN_CODE) { - PyObject *code = PyMarshal_ReadObjectFromString(codestr, codestrlen); - if (code != NULL) { - result = PyEval_EvalCode(code, ns, ns); - Py_DECREF(code); - } - } - else { - Py_UNREACHABLE(); + PyObject *code = _PyXIData_NewObject(script); + if (code == NULL) { + return -1; } + PyObject *result = PyEval_EvalCode(code, ns, ns); + Py_DECREF(code); if (result == NULL) { return -1; } @@ -523,17 +439,16 @@ _run_script(PyObject *ns, const char *codestr, Py_ssize_t codestrlen, int flags) } static int -_run_in_interpreter(PyInterpreterState *interp, - const char *codestr, Py_ssize_t codestrlen, - PyObject *shareables, int flags, +_exec_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, + _PyXIData_t *script, PyObject *shareables, PyObject **p_excinfo) { - assert(!PyErr_Occurred()); + assert(!_PyErr_Occurred(tstate)); _PyXI_session session = {0}; // Prep and switch interpreters. if (_PyXI_Enter(&session, interp, shareables) < 0) { - if (PyErr_Occurred()) { + if (_PyErr_Occurred(tstate)) { // If an error occured at this step, it means that interp // was not prepared and switched. return -1; @@ -548,7 +463,7 @@ _run_in_interpreter(PyInterpreterState *interp, } // Run the script. - int res = _run_script(session.main_ns, codestr, codestrlen, flags); + int res = _run_script(script, session.main_ns); // Clean up and switch back. _PyXI_Exit(&session); @@ -935,104 +850,23 @@ PyDoc_STRVAR(set___main___attrs_doc, Bind the given attributes in the interpreter's __main__ module."); -static PyUnicodeObject * -convert_script_arg(PyThreadState *tstate, - PyObject *arg, const char *fname, const char *displayname, - const char *expected) -{ - PyUnicodeObject *str = NULL; - if (PyUnicode_CheckExact(arg)) { - str = (PyUnicodeObject *)Py_NewRef(arg); - } - else if (PyUnicode_Check(arg)) { - // XXX str = PyUnicode_FromObject(arg); - str = (PyUnicodeObject *)Py_NewRef(arg); - } - else { - _PyArg_BadArgument(fname, displayname, expected, arg); - return NULL; - } - - const char *err = check_code_str(str); - if (err != NULL) { - Py_DECREF(str); - _PyErr_Format(tstate, PyExc_ValueError, - "%.200s(): bad script text (%s)", fname, err); - return NULL; - } - - return str; -} - -static PyCodeObject * -convert_code_arg(PyThreadState *tstate, - PyObject *arg, const char *fname, const char *displayname, - const char *expected) +static void +unwrap_not_shareable(PyThreadState *tstate) { - PyObject *cause; - PyCodeObject *code = NULL; - if (PyFunction_Check(arg)) { - // For now we allow globals, so we can't use - // _PyFunction_VerifyStateless(). - PyObject *codeobj = PyFunction_GetCode(arg); - if (_PyCode_VerifyStateless( - tstate, (PyCodeObject *)codeobj, NULL, NULL, NULL) < 0) { - goto chained; - } - code = (PyCodeObject *)Py_NewRef(codeobj); + PyObject *exctype = _PyXIData_GetNotShareableErrorType(tstate); + if (!_PyErr_ExceptionMatches(tstate, exctype)) { + return; } - else if (PyCode_Check(arg)) { - if (_PyCode_VerifyStateless( - tstate, (PyCodeObject *)arg, NULL, NULL, NULL) < 0) { - goto chained; - } - code = (PyCodeObject *)Py_NewRef(arg); + PyObject *exc = _PyErr_GetRaisedException(tstate); + PyObject *cause = PyException_GetCause(exc); + if (cause != NULL) { + Py_DECREF(exc); + exc = cause; } else { - _PyArg_BadArgument(fname, displayname, expected, arg); - return NULL; + assert(PyException_GetContext(exc) == NULL); } - - return code; - -chained: - cause = _PyErr_GetRaisedException(tstate); - assert(cause != NULL); - _PyArg_BadArgument(fname, displayname, expected, arg); - PyObject *exc = _PyErr_GetRaisedException(tstate); - PyException_SetCause(exc, cause); _PyErr_SetRaisedException(tstate, exc); - return NULL; -} - -static int -_interp_exec(PyObject *self, PyInterpreterState *interp, - PyObject *code_arg, PyObject *shared_arg, PyObject **p_excinfo) -{ - if (shared_arg != NULL && !PyDict_CheckExact(shared_arg)) { - PyErr_SetString(PyExc_TypeError, "expected 'shared' to be a dict"); - return -1; - } - - // Extract code. - Py_ssize_t codestrlen = -1; - PyObject *bytes_obj = NULL; - int flags = 0; - const char *codestr = get_code_str(code_arg, - &codestrlen, &bytes_obj, &flags); - if (codestr == NULL) { - return -1; - } - - // Run the code in the interpreter. - int res = _run_in_interpreter(interp, codestr, codestrlen, - shared_arg, flags, p_excinfo); - Py_XDECREF(bytes_obj); - if (res < 0) { - return -1; - } - - return 0; } static PyObject * @@ -1045,8 +879,9 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" FUNCNAME, kwlist, - &id, &code, &shared, &restricted)) + "OO|O!$p:" FUNCNAME, kwlist, + &id, &code, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1058,22 +893,17 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - const char *expected = "a string, a function, or a code object"; - if (PyUnicode_Check(code)) { - code = (PyObject *)convert_script_arg(tstate, code, FUNCNAME, - "argument 2", expected); - } - else { - code = (PyObject *)convert_code_arg(tstate, code, FUNCNAME, - "argument 2", expected); - } - if (code == NULL) { + // We don't need the script to be "pure", which means it can use + // global variables. They will be resolved against __main__. + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { + unwrap_not_shareable(tstate); return NULL; } PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, code, shared, &excinfo); - Py_DECREF(code); + int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); + _PyXIData_Release(&xidata); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; @@ -1109,8 +939,9 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OU|O$p:" FUNCNAME, kwlist, - &id, &script, &shared, &restricted)) + "OU|O!$p:" FUNCNAME, kwlist, + &id, &script, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1122,15 +953,20 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - script = (PyObject *)convert_script_arg(tstate, script, FUNCNAME, - "argument 2", "a string"); - if (script == NULL) { + if (PyFunction_Check(script) || PyCode_Check(script)) { + _PyArg_BadArgument(FUNCNAME, "argument 2", "a string", script); + return NULL; + } + + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, script, &xidata) < 0) { + unwrap_not_shareable(tstate); return NULL; } PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, script, shared, &excinfo); - Py_DECREF(script); + int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); + _PyXIData_Release(&xidata); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; @@ -1156,8 +992,9 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" FUNCNAME, kwlist, - &id, &func, &shared, &restricted)) + "OO|O!$p:" FUNCNAME, kwlist, + &id, &func, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1169,16 +1006,29 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyCodeObject *code = convert_code_arg(tstate, func, FUNCNAME, - "argument 2", - "a function or a code object"); - if (code == NULL) { + // We don't worry about checking globals. They will be resolved + // against __main__. + PyObject *code; + if (PyFunction_Check(func)) { + code = PyFunction_GET_CODE(func); + } + else if (PyCode_Check(func)) { + code = func; + } + else { + _PyArg_BadArgument(FUNCNAME, "argument 2", "a function", func); + return NULL; + } + + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { + unwrap_not_shareable(tstate); return NULL; } PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, (PyObject *)code, shared, &excinfo); - Py_DECREF(code); + int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); + _PyXIData_Release(&xidata); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; @@ -1231,15 +1081,15 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyObject *code = (PyObject *)convert_code_arg(tstate, callable, FUNCNAME, - "argument 2", "a function"); - if (code == NULL) { + _PyXIData_t xidata = {0}; + if (_PyCode_GetPureScriptXIData(tstate, callable, &xidata) < 0) { + unwrap_not_shareable(tstate); return NULL; } PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, code, NULL, &excinfo); - Py_DECREF(code); + int res = _exec_in_interpreter(tstate, interp, &xidata, NULL, &excinfo); + _PyXIData_Release(&xidata); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; From ec736e7daec33cb3383865895d7ab92d4ada8bc9 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 22 May 2025 11:15:03 -0400 Subject: [PATCH 297/989] GH-131798: Optimize cached class attributes and methods in the JIT (GH-134403) --- Include/internal/pycore_backoff.h | 6 +- Include/internal/pycore_code.h | 3 + Include/internal/pycore_uop_ids.h | 158 +++++++++--------- Include/internal/pycore_uop_metadata.h | 8 + Lib/test/test_capi/test_opt.py | 47 +++++- ...-05-20-23-32-11.gh-issue-131798.G9ZQZw.rst | 2 + Python/bytecodes.c | 12 ++ Python/executor_cases.c.h | 30 ++++ Python/optimizer_analysis.c | 31 +++- Python/optimizer_bytecodes.c | 46 ++++- Python/optimizer_cases.c.h | 74 +++++++- Python/optimizer_symbols.c | 82 +++++---- 12 files changed, 367 insertions(+), 132 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-23-32-11.gh-issue-131798.G9ZQZw.rst diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 942d8b107a7daf..454c8dde031ff4 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -95,8 +95,10 @@ backoff_counter_triggers(_Py_BackoffCounter counter) return counter.value_and_backoff < UNREACHABLE_BACKOFF; } -/* Initial JUMP_BACKWARD counter. - * This determines when we create a trace for a loop. */ +// Initial JUMP_BACKWARD counter. +// Must be larger than ADAPTIVE_COOLDOWN_VALUE, otherwise when JIT code is +// invalidated we may construct a new trace before the bytecode has properly +// re-specialized: #define JUMP_BACKWARD_INITIAL_VALUE 4095 #define JUMP_BACKWARD_INITIAL_BACKOFF 12 static inline _Py_BackoffCounter diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 37a747aa4e3e46..439989c60f6f24 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -451,6 +451,9 @@ write_location_entry_start(uint8_t *ptr, int code, int length) #define ADAPTIVE_COOLDOWN_BACKOFF 0 // Can't assert this in pycore_backoff.h because of header order dependencies +#if JUMP_BACKWARD_INITIAL_VALUE <= ADAPTIVE_COOLDOWN_VALUE +# error "JIT threshold value should be larger than adaptive cooldown value" +#endif #if SIDE_EXIT_INITIAL_VALUE <= ADAPTIVE_COOLDOWN_VALUE # error "Cold exit value should be larger than adaptive cooldown value" #endif diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 4c270211d4c323..d6c2ba59db1eda 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -209,129 +209,131 @@ extern "C" { #define _LOAD_CONST LOAD_CONST #define _LOAD_CONST_INLINE 446 #define _LOAD_CONST_INLINE_BORROW 447 +#define _LOAD_CONST_UNDER_INLINE 448 +#define _LOAD_CONST_UNDER_INLINE_BORROW 449 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 448 -#define _LOAD_FAST_0 449 -#define _LOAD_FAST_1 450 -#define _LOAD_FAST_2 451 -#define _LOAD_FAST_3 452 -#define _LOAD_FAST_4 453 -#define _LOAD_FAST_5 454 -#define _LOAD_FAST_6 455 -#define _LOAD_FAST_7 456 +#define _LOAD_FAST 450 +#define _LOAD_FAST_0 451 +#define _LOAD_FAST_1 452 +#define _LOAD_FAST_2 453 +#define _LOAD_FAST_3 454 +#define _LOAD_FAST_4 455 +#define _LOAD_FAST_5 456 +#define _LOAD_FAST_6 457 +#define _LOAD_FAST_7 458 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 457 -#define _LOAD_FAST_BORROW_0 458 -#define _LOAD_FAST_BORROW_1 459 -#define _LOAD_FAST_BORROW_2 460 -#define _LOAD_FAST_BORROW_3 461 -#define _LOAD_FAST_BORROW_4 462 -#define _LOAD_FAST_BORROW_5 463 -#define _LOAD_FAST_BORROW_6 464 -#define _LOAD_FAST_BORROW_7 465 +#define _LOAD_FAST_BORROW 459 +#define _LOAD_FAST_BORROW_0 460 +#define _LOAD_FAST_BORROW_1 461 +#define _LOAD_FAST_BORROW_2 462 +#define _LOAD_FAST_BORROW_3 463 +#define _LOAD_FAST_BORROW_4 464 +#define _LOAD_FAST_BORROW_5 465 +#define _LOAD_FAST_BORROW_6 466 +#define _LOAD_FAST_BORROW_7 467 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 466 -#define _LOAD_GLOBAL_BUILTINS 467 -#define _LOAD_GLOBAL_MODULE 468 +#define _LOAD_GLOBAL 468 +#define _LOAD_GLOBAL_BUILTINS 469 +#define _LOAD_GLOBAL_MODULE 470 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 469 -#define _LOAD_SMALL_INT_0 470 -#define _LOAD_SMALL_INT_1 471 -#define _LOAD_SMALL_INT_2 472 -#define _LOAD_SMALL_INT_3 473 -#define _LOAD_SPECIAL 474 +#define _LOAD_SMALL_INT 471 +#define _LOAD_SMALL_INT_0 472 +#define _LOAD_SMALL_INT_1 473 +#define _LOAD_SMALL_INT_2 474 +#define _LOAD_SMALL_INT_3 475 +#define _LOAD_SPECIAL 476 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 475 +#define _MAKE_CALLARGS_A_TUPLE 477 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 476 +#define _MAKE_WARM 478 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 477 -#define _MAYBE_EXPAND_METHOD_KW 478 -#define _MONITOR_CALL 479 -#define _MONITOR_CALL_KW 480 -#define _MONITOR_JUMP_BACKWARD 481 -#define _MONITOR_RESUME 482 +#define _MAYBE_EXPAND_METHOD 479 +#define _MAYBE_EXPAND_METHOD_KW 480 +#define _MONITOR_CALL 481 +#define _MONITOR_CALL_KW 482 +#define _MONITOR_JUMP_BACKWARD 483 +#define _MONITOR_RESUME 484 #define _NOP NOP -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 483 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 485 #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 484 -#define _POP_JUMP_IF_TRUE 485 +#define _POP_JUMP_IF_FALSE 486 +#define _POP_JUMP_IF_TRUE 487 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 486 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 487 -#define _POP_TWO 488 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 489 +#define _POP_TOP_LOAD_CONST_INLINE 488 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 489 +#define _POP_TWO 490 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 491 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 490 +#define _PUSH_FRAME 492 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 491 -#define _PY_FRAME_GENERAL 492 -#define _PY_FRAME_KW 493 -#define _QUICKEN_RESUME 494 -#define _REPLACE_WITH_TRUE 495 +#define _PUSH_NULL_CONDITIONAL 493 +#define _PY_FRAME_GENERAL 494 +#define _PY_FRAME_KW 495 +#define _QUICKEN_RESUME 496 +#define _REPLACE_WITH_TRUE 497 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 496 -#define _SEND 497 -#define _SEND_GEN_FRAME 498 +#define _SAVE_RETURN_OFFSET 498 +#define _SEND 499 +#define _SEND_GEN_FRAME 500 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 499 -#define _STORE_ATTR 500 -#define _STORE_ATTR_INSTANCE_VALUE 501 -#define _STORE_ATTR_SLOT 502 -#define _STORE_ATTR_WITH_HINT 503 +#define _START_EXECUTOR 501 +#define _STORE_ATTR 502 +#define _STORE_ATTR_INSTANCE_VALUE 503 +#define _STORE_ATTR_SLOT 504 +#define _STORE_ATTR_WITH_HINT 505 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 504 -#define _STORE_FAST_0 505 -#define _STORE_FAST_1 506 -#define _STORE_FAST_2 507 -#define _STORE_FAST_3 508 -#define _STORE_FAST_4 509 -#define _STORE_FAST_5 510 -#define _STORE_FAST_6 511 -#define _STORE_FAST_7 512 +#define _STORE_FAST 506 +#define _STORE_FAST_0 507 +#define _STORE_FAST_1 508 +#define _STORE_FAST_2 509 +#define _STORE_FAST_3 510 +#define _STORE_FAST_4 511 +#define _STORE_FAST_5 512 +#define _STORE_FAST_6 513 +#define _STORE_FAST_7 514 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 513 -#define _STORE_SUBSCR 514 -#define _STORE_SUBSCR_DICT 515 -#define _STORE_SUBSCR_LIST_INT 516 +#define _STORE_SLICE 515 +#define _STORE_SUBSCR 516 +#define _STORE_SUBSCR_DICT 517 +#define _STORE_SUBSCR_LIST_INT 518 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 517 -#define _TO_BOOL 518 +#define _TIER2_RESUME_CHECK 519 +#define _TO_BOOL 520 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 519 +#define _TO_BOOL_LIST 521 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 520 +#define _TO_BOOL_STR 522 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 521 -#define _UNPACK_SEQUENCE_LIST 522 -#define _UNPACK_SEQUENCE_TUPLE 523 -#define _UNPACK_SEQUENCE_TWO_TUPLE 524 +#define _UNPACK_SEQUENCE 523 +#define _UNPACK_SEQUENCE_LIST 524 +#define _UNPACK_SEQUENCE_TUPLE 525 +#define _UNPACK_SEQUENCE_TWO_TUPLE 526 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 524 +#define MAX_UOP_ID 526 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index fc2c4c2924d9ad..725238228a3dbc 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -306,6 +306,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_CONST_UNDER_INLINE] = 0, + [_LOAD_CONST_UNDER_INLINE_BORROW] = 0, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_START_EXECUTOR] = 0, [_MAKE_WARM] = 0, @@ -504,6 +506,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_CONST] = "_LOAD_CONST", [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", + [_LOAD_CONST_UNDER_INLINE] = "_LOAD_CONST_UNDER_INLINE", + [_LOAD_CONST_UNDER_INLINE_BORROW] = "_LOAD_CONST_UNDER_INLINE_BORROW", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", [_LOAD_FAST_0] = "_LOAD_FAST_0", @@ -1196,6 +1200,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: return 4; + case _LOAD_CONST_UNDER_INLINE: + return 1; + case _LOAD_CONST_UNDER_INLINE_BORROW: + return 1; case _CHECK_FUNCTION: return 0; case _START_EXECUTOR: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2b777acb1ec4fd..0bc0e1b212b6b8 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1280,8 +1280,8 @@ class Bar: self.assertIsNotNone(ex) self.assertEqual(res, TIER2_THRESHOLD * 6 + 1) call = opnames.index("_CALL_BUILTIN_FAST") - load_attr_top = opnames.index("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", 0, call) - load_attr_bottom = opnames.index("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", call) + load_attr_top = opnames.index("_POP_TOP_LOAD_CONST_INLINE_BORROW", 0, call) + load_attr_bottom = opnames.index("_POP_TOP_LOAD_CONST_INLINE_BORROW", call) self.assertEqual(opnames[:load_attr_top].count("_GUARD_TYPE_VERSION"), 1) self.assertEqual(opnames[call:load_attr_bottom].count("_CHECK_VALIDITY"), 2) @@ -1303,8 +1303,8 @@ class Foo: self.assertIsNotNone(ex) self.assertEqual(res, TIER2_THRESHOLD * 2) call = opnames.index("_CALL_BUILTIN_FAST_WITH_KEYWORDS") - load_attr_top = opnames.index("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", 0, call) - load_attr_bottom = opnames.index("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", call) + load_attr_top = opnames.index("_POP_TOP_LOAD_CONST_INLINE_BORROW", 0, call) + load_attr_bottom = opnames.index("_POP_TOP_LOAD_CONST_INLINE_BORROW", call) self.assertEqual(opnames[:load_attr_top].count("_GUARD_TYPE_VERSION"), 1) self.assertEqual(opnames[call:load_attr_bottom].count("_CHECK_VALIDITY"), 2) @@ -2169,6 +2169,45 @@ def testfunc(n): self.assertNotIn("_LOAD_SMALL_INT", uops) self.assertIn("_LOAD_CONST_INLINE_BORROW", uops) + def test_cached_attributes(self): + class C: + A = 1 + def m(self): + return 1 + class D: + __slots__ = () + A = 1 + def m(self): + return 1 + class E(Exception): + def m(self): + return 1 + def f(n): + x = 0 + c = C() + d = D() + e = E() + for _ in range(n): + x += C.A # _LOAD_ATTR_CLASS + x += c.A # _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES + x += d.A # _LOAD_ATTR_NONDESCRIPTOR_NO_DICT + x += c.m() # _LOAD_ATTR_METHOD_WITH_VALUES + x += d.m() # _LOAD_ATTR_METHOD_NO_DICT + x += e.m() # _LOAD_ATTR_METHOD_LAZY_DICT + return x + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, 6 * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_LOAD_ATTR_CLASS", uops) + self.assertNotIn("_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", uops) + self.assertNotIn("_LOAD_ATTR_NONDESCRIPTOR_NO_DICT", uops) + self.assertNotIn("_LOAD_ATTR_METHOD_WITH_VALUES", uops) + self.assertNotIn("_LOAD_ATTR_METHOD_NO_DICT", uops) + self.assertNotIn("_LOAD_ATTR_METHOD_LAZY_DICT", uops) + + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-23-32-11.gh-issue-131798.G9ZQZw.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-23-32-11.gh-issue-131798.G9ZQZw.rst new file mode 100644 index 00000000000000..8eb8782037abfa --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-23-32-11.gh-issue-131798.G9ZQZw.rst @@ -0,0 +1,2 @@ +Improve the JIT's ability to optimize away cached class attribute and method +loads. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a2367026cde8b8..652bda9c182e49 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5307,6 +5307,18 @@ dummy_func( value = PyStackRef_FromPyObjectBorrow(ptr); } + tier2 op(_LOAD_CONST_UNDER_INLINE, (ptr/4, old -- value, new)) { + new = old; + DEAD(old); + value = PyStackRef_FromPyObjectNew(ptr); + } + + tier2 op(_LOAD_CONST_UNDER_INLINE_BORROW, (ptr/4, old -- value, new)) { + new = old; + DEAD(old); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + tier2 op(_CHECK_FUNCTION, (func_version/2 -- )) { assert(PyStackRef_FunctionCheck(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(frame->f_funcobj); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1c8239f38eec91..fcde31a30126a4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7105,6 +7105,36 @@ break; } + case _LOAD_CONST_UNDER_INLINE: { + _PyStackRef old; + _PyStackRef value; + _PyStackRef new; + old = stack_pointer[-1]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + new = old; + value = PyStackRef_FromPyObjectNew(ptr); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_UNDER_INLINE_BORROW: { + _PyStackRef old; + _PyStackRef value; + _PyStackRef new; + old = stack_pointer[-1]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + new = old; + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _CHECK_FUNCTION: { uint32_t func_version = (uint32_t)CURRENT_OPERAND0(); assert(PyStackRef_FunctionCheck(frame->f_funcobj)); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 53ab289b75cc9a..5c50228a13b2d1 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -375,6 +375,23 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) } } +static JitOptSymbol * +lookup_attr(JitOptContext *ctx, _PyUOpInstruction *this_instr, + PyTypeObject *type, PyObject *name, uint16_t immortal, + uint16_t mortal) +{ + // The cached value may be dead, so we need to do the lookup again... :( + if (type && PyType_Check(type)) { + PyObject *lookup = _PyType_Lookup(type, name); + if (lookup) { + int opcode = _Py_IsImmortal(lookup) ? immortal : mortal; + REPLACE_OP(this_instr, opcode, 0, (uintptr_t)lookup); + return sym_new_const(ctx, lookup); + } + } + return sym_new_not_null(ctx); +} + /* _PUSH_FRAME/_RETURN_VALUE's operand can be 0, a PyFunctionObject *, or a * PyCodeObject *. Retrieve the code object if possible. */ @@ -527,6 +544,8 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = { [_COPY] = _NOP, [_LOAD_CONST_INLINE] = _NOP, [_LOAD_CONST_INLINE_BORROW] = _NOP, + [_LOAD_CONST_UNDER_INLINE] = _POP_TOP_LOAD_CONST_INLINE, + [_LOAD_CONST_UNDER_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, [_LOAD_FAST] = _NOP, [_LOAD_FAST_BORROW] = _NOP, [_LOAD_SMALL_INT] = _NOP, @@ -535,10 +554,16 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = { [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO, }; +const bool op_skip[MAX_UOP_ID + 1] = { + [_NOP] = true, + [_CHECK_VALIDITY] = true, +}; + const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_TOP] = _NOP, [_POP_TOP_LOAD_CONST_INLINE] = _LOAD_CONST_INLINE, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW, + [_POP_TWO] = _POP_TOP, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, }; @@ -578,7 +603,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _NOP + _POP_TOP + _NOP while (op_without_pop[opcode]) { _PyUOpInstruction *last = &buffer[pc - 1]; - while (last->opcode == _NOP) { + while (op_skip[last->opcode]) { last--; } if (!op_without_push[last->opcode]) { @@ -586,6 +611,10 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } last->opcode = op_without_push[last->opcode]; opcode = buffer[pc].opcode = op_without_pop[opcode]; + if (op_without_pop[last->opcode]) { + opcode = last->opcode; + pc = last - buffer; + } } /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index b9ebd8678e0f1e..0b6bbd133d6ac9 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -522,7 +522,7 @@ dummy_func(void) { } op(_LOAD_CONST, (-- value)) { - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); + PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); value = sym_new_const(ctx, val); } @@ -608,7 +608,7 @@ dummy_func(void) { op(_LOAD_ATTR, (owner -- attr, self_or_null[oparg&1])) { (void)owner; attr = sym_new_not_null(ctx); - if (oparg &1) { + if (oparg & 1) { self_or_null[0] = sym_new_unknown(ctx); } } @@ -624,25 +624,59 @@ dummy_func(void) { } op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr)) { - attr = sym_new_not_null(ctx); (void)descr; + PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); + } + + op(_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, (descr/4, owner -- attr)) { + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); + } + + op(_LOAD_ATTR_NONDESCRIPTOR_NO_DICT, (descr/4, owner -- attr)) { + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); } op(_LOAD_ATTR_METHOD_WITH_VALUES, (descr/4, owner -- attr, self)) { (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; } op(_LOAD_ATTR_METHOD_NO_DICT, (descr/4, owner -- attr, self)) { (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; } op(_LOAD_ATTR_METHOD_LAZY_DICT, (descr/4, owner -- attr, self)) { (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 0ba45e1f58fe08..5a9fcf3b1b6924 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -68,7 +68,7 @@ case _LOAD_CONST: { JitOptSymbol *value; - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); + PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); value = sym_new_const(ctx, val); stack_pointer[0] = value; @@ -1174,7 +1174,7 @@ self_or_null = &stack_pointer[0]; (void)owner; attr = sym_new_not_null(ctx); - if (oparg &1) { + if (oparg & 1) { self_or_null[0] = sym_new_unknown(ctx); } stack_pointer[-1] = attr; @@ -1284,10 +1284,16 @@ } case _LOAD_ATTR_CLASS: { + JitOptSymbol *owner; JitOptSymbol *attr; + owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; - attr = sym_new_not_null(ctx); (void)descr; + PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; break; } @@ -1701,7 +1707,11 @@ owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; @@ -1717,7 +1727,11 @@ owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; @@ -1727,15 +1741,31 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + JitOptSymbol *owner; JitOptSymbol *attr; - attr = sym_new_not_null(ctx); + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand0; + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; break; } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + JitOptSymbol *owner; JitOptSymbol *attr; - attr = sym_new_not_null(ctx); + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand0; + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; break; } @@ -1751,7 +1781,11 @@ owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; @@ -2594,6 +2628,30 @@ break; } + case _LOAD_CONST_UNDER_INLINE: { + JitOptSymbol *value; + JitOptSymbol *new; + value = sym_new_not_null(ctx); + new = sym_new_not_null(ctx); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_UNDER_INLINE_BORROW: { + JitOptSymbol *value; + JitOptSymbol *new; + value = sym_new_not_null(ctx); + new = sym_new_not_null(ctx); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _CHECK_FUNCTION: { break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 2e619fa6f9977f..25de5d83166f64 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -13,22 +13,46 @@ #include #include -/* Symbols - ======= - - See the diagram at - https://github.com/faster-cpython/ideas/blob/main/3.13/redundancy_eliminator.md - - We represent the nodes in the diagram as follows - (the flag bits are only defined in optimizer_symbols.c): - - Top: no flag bits, typ and const_val are NULL. - - NULL: IS_NULL flag set, type and const_val NULL. - - Not NULL: NOT_NULL flag set, type and const_val NULL. - - None/not None: not used. (None could be represented as any other constant.) - - Known type: NOT_NULL flag set and typ set; const_val is NULL. - - Known constant: NOT_NULL flag set, type set, const_val set. - - Bottom: IS_NULL and NOT_NULL flags set, type and const_val NULL. - */ +/* + +Symbols +======= + +https://github.com/faster-cpython/ideas/blob/main/3.13/redundancy_eliminator.md + +Logically, all symbols begin as UNKNOWN, and can transition downwards along the +edges of the lattice, but *never* upwards (see the diagram below). The UNKNOWN +state represents no information, and the BOTTOM state represents contradictory +information. Though symbols logically progress through all intermediate nodes, +we often skip in-between states for convenience: + + UNKNOWN + | | +NULL | +| | <- Anything below this level is an object. +| NON_NULL +| | | <- Anything below this level has a known type version. +| TYPE_VERSION | +| | | <- Anything below this level has a known type. +| KNOWN_CLASS | +| | | | <- Anything below this level has a known truthiness. +| | | TRUTHINESS +| | | | +| TUPLE | | +| | | | <- Anything below this level is a known constant. +| KNOWN_VALUE +| | <- Anything below this level is unreachable. +BOTTOM + +For example, after guarding that the type of an UNKNOWN local is int, we can +narrow the symbol to KNOWN_CLASS (logically progressing though NON_NULL and +TYPE_VERSION to get there). Later, we may learn that it is falsey based on the +result of a truth test, which would allow us to narrow the symbol to KNOWN_VALUE +(with a value of integer zero). If at any point we encounter a float guard on +the same symbol, that would be a contradiction, and the symbol would be set to +BOTTOM (indicating that the code is unreachable). + +*/ #ifdef Py_DEBUG static inline int get_lltrace(void) { @@ -420,7 +444,6 @@ _Py_uop_sym_get_type(JitOptSymbol *sym) JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: - case JIT_SYM_TYPE_VERSION_TAG: case JIT_SYM_BOTTOM_TAG: case JIT_SYM_NON_NULL_TAG: case JIT_SYM_UNKNOWN_TAG: @@ -429,6 +452,8 @@ _Py_uop_sym_get_type(JitOptSymbol *sym) return sym->cls.type; case JIT_SYM_KNOWN_VALUE_TAG: return Py_TYPE(sym->value.value); + case JIT_SYM_TYPE_VERSION_TAG: + return _PyType_LookupByVersion(sym->version.version); case JIT_SYM_TUPLE_TAG: return &PyTuple_Type; case JIT_SYM_TRUTHINESS_TAG: @@ -464,21 +489,7 @@ _Py_uop_sym_get_type_version(JitOptSymbol *sym) bool _Py_uop_sym_has_type(JitOptSymbol *sym) { - JitSymType tag = sym->tag; - switch(tag) { - case JIT_SYM_NULL_TAG: - case JIT_SYM_TYPE_VERSION_TAG: - case JIT_SYM_BOTTOM_TAG: - case JIT_SYM_NON_NULL_TAG: - case JIT_SYM_UNKNOWN_TAG: - return false; - case JIT_SYM_KNOWN_CLASS_TAG: - case JIT_SYM_KNOWN_VALUE_TAG: - case JIT_SYM_TUPLE_TAG: - case JIT_SYM_TRUTHINESS_TAG: - return true; - } - Py_UNREACHABLE(); + return _Py_uop_sym_get_type(sym) != NULL; } bool @@ -576,7 +587,7 @@ _Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item) else if (sym->tag == JIT_SYM_TUPLE_TAG && item < sym->tuple.length) { return allocation_base(ctx) + sym->tuple.items[item]; } - return _Py_uop_sym_new_unknown(ctx); + return _Py_uop_sym_new_not_null(ctx); } int @@ -863,6 +874,11 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, sym, 1)) == val_43, "tuple item does not match value used to create tuple" ); + sym = _Py_uop_sym_new_type(ctx, &PyTuple_Type); + TEST_PREDICATE( + _Py_uop_sym_is_not_null(_Py_uop_sym_tuple_getitem(ctx, sym, 42)), + "Unknown tuple item is not narrowed to non-NULL" + ); JitOptSymbol *value = _Py_uop_sym_new_type(ctx, &PyBool_Type); sym = _Py_uop_sym_new_truthiness(ctx, value, false); TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyBool_Type), "truthiness is not boolean"); From 4a4ac3ab4d2a34af99af9e948be9cd1257ed4186 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 May 2025 10:14:04 -0600 Subject: [PATCH 298/989] gh-132775: Make _PyXI_session Opaque (gh-134452) This is mostly a refactor to clean things up a bit, most notably the "XI namespace" code. Making the session opaque requires adding the following internal-only functions: * _PyXI_NewSession() * _PyXI_FreeSession() * _PyXI_GetMainNamespace() --- Include/internal/pycore_crossinterp.h | 58 +-- Modules/_interpretersmodule.c | 41 +- Python/crossinterp.c | 723 +++++++++++++++----------- 3 files changed, 456 insertions(+), 366 deletions(-) diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index 45fa47d62c78a3..1272927413868b 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -335,24 +335,9 @@ typedef struct _sharedexception { PyAPI_FUNC(PyObject *) _PyXI_ApplyError(_PyXI_error *err); -typedef struct xi_session _PyXI_session; -typedef struct _sharedns _PyXI_namespace; - -PyAPI_FUNC(void) _PyXI_FreeNamespace(_PyXI_namespace *ns); -PyAPI_FUNC(_PyXI_namespace *) _PyXI_NamespaceFromNames(PyObject *names); -PyAPI_FUNC(int) _PyXI_FillNamespaceFromDict( - _PyXI_namespace *ns, - PyObject *nsobj, - _PyXI_session *session); -PyAPI_FUNC(int) _PyXI_ApplyNamespace( - _PyXI_namespace *ns, - PyObject *nsobj, - PyObject *dflt); - - // A cross-interpreter session involves entering an interpreter -// (_PyXI_Enter()), doing some work with it, and finally exiting -// that interpreter (_PyXI_Exit()). +// with _PyXI_Enter(), doing some work with it, and finally exiting +// that interpreter with _PyXI_Exit(). // // At the boundaries of the session, both entering and exiting, // data may be exchanged between the previous interpreter and the @@ -360,39 +345,10 @@ PyAPI_FUNC(int) _PyXI_ApplyNamespace( // isolation between interpreters. This includes setting objects // in the target's __main__ module on the way in, and capturing // uncaught exceptions on the way out. -struct xi_session { - // Once a session has been entered, this is the tstate that was - // current before the session. If it is different from cur_tstate - // then we must have switched interpreters. Either way, this will - // be the current tstate once we exit the session. - PyThreadState *prev_tstate; - // Once a session has been entered, this is the current tstate. - // It must be current when the session exits. - PyThreadState *init_tstate; - // This is true if init_tstate needs cleanup during exit. - int own_init_tstate; - - // This is true if, while entering the session, init_thread took - // "ownership" of the interpreter's __main__ module. This means - // it is the only thread that is allowed to run code there. - // (Caveat: for now, users may still run exec() against the - // __main__ module's dict, though that isn't advisable.) - int running; - // This is a cached reference to the __dict__ of the entered - // interpreter's __main__ module. It is looked up when at the - // beginning of the session as a convenience. - PyObject *main_ns; - - // This is set if the interpreter is entered and raised an exception - // that needs to be handled in some special way during exit. - _PyXI_errcode *error_override; - // This is set if exit captured an exception to propagate. - _PyXI_error *error; - - // -- pre-allocated memory -- - _PyXI_error _error; - _PyXI_errcode _error_override; -}; +typedef struct xi_session _PyXI_session; + +PyAPI_FUNC(_PyXI_session *) _PyXI_NewSession(void); +PyAPI_FUNC(void) _PyXI_FreeSession(_PyXI_session *); PyAPI_FUNC(int) _PyXI_Enter( _PyXI_session *session, @@ -400,6 +356,8 @@ PyAPI_FUNC(int) _PyXI_Enter( PyObject *nsupdates); PyAPI_FUNC(void) _PyXI_Exit(_PyXI_session *session); +PyAPI_FUNC(PyObject *) _PyXI_GetMainNamespace(_PyXI_session *); + PyAPI_FUNC(PyObject *) _PyXI_ApplyCapturedException(_PyXI_session *session); PyAPI_FUNC(int) _PyXI_HasCapturedException(_PyXI_session *session); diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 7f84b38a70e127..376517ab92360f 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -444,42 +444,54 @@ _exec_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, PyObject **p_excinfo) { assert(!_PyErr_Occurred(tstate)); - _PyXI_session session = {0}; + _PyXI_session *session = _PyXI_NewSession(); + if (session == NULL) { + return -1; + } // Prep and switch interpreters. - if (_PyXI_Enter(&session, interp, shareables) < 0) { + if (_PyXI_Enter(session, interp, shareables) < 0) { if (_PyErr_Occurred(tstate)) { // If an error occured at this step, it means that interp // was not prepared and switched. + _PyXI_FreeSession(session); return -1; } // Now, apply the error from another interpreter: - PyObject *excinfo = _PyXI_ApplyError(session.error); + PyObject *excinfo = _PyXI_ApplyCapturedException(session); if (excinfo != NULL) { *p_excinfo = excinfo; } assert(PyErr_Occurred()); + _PyXI_FreeSession(session); return -1; } // Run the script. - int res = _run_script(script, session.main_ns); + int res = -1; + PyObject *mainns = _PyXI_GetMainNamespace(session); + if (mainns == NULL) { + goto finally; + } + res = _run_script(script, mainns); +finally: // Clean up and switch back. - _PyXI_Exit(&session); + _PyXI_Exit(session); // Propagate any exception out to the caller. assert(!PyErr_Occurred()); if (res < 0) { - PyObject *excinfo = _PyXI_ApplyCapturedException(&session); + PyObject *excinfo = _PyXI_ApplyCapturedException(session); if (excinfo != NULL) { *p_excinfo = excinfo; } } else { - assert(!_PyXI_HasCapturedException(&session)); + assert(!_PyXI_HasCapturedException(session)); } + _PyXI_FreeSession(session); return res; } @@ -824,22 +836,27 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) } } - _PyXI_session session = {0}; + _PyXI_session *session = _PyXI_NewSession(); + if (session == NULL) { + return NULL; + } // Prep and switch interpreters, including apply the updates. - if (_PyXI_Enter(&session, interp, updates) < 0) { + if (_PyXI_Enter(session, interp, updates) < 0) { if (!PyErr_Occurred()) { - _PyXI_ApplyCapturedException(&session); + _PyXI_ApplyCapturedException(session); assert(PyErr_Occurred()); } else { - assert(!_PyXI_HasCapturedException(&session)); + assert(!_PyXI_HasCapturedException(session)); } + _PyXI_FreeSession(session); return NULL; } // Clean up and switch back. - _PyXI_Exit(&session); + _PyXI_Exit(session); + _PyXI_FreeSession(session); Py_RETURN_NONE; } diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 6681b969183925..65ccab32daf730 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -1914,156 +1914,212 @@ _sharednsitem_apply(_PyXI_namespace_item *item, PyObject *ns, PyObject *dflt) return res; } -struct _sharedns { - Py_ssize_t len; - _PyXI_namespace_item *items; -}; -static _PyXI_namespace * -_sharedns_new(void) -{ - _PyXI_namespace *ns = PyMem_RawCalloc(sizeof(_PyXI_namespace), 1); - if (ns == NULL) { - PyErr_NoMemory(); - return NULL; - } - *ns = (_PyXI_namespace){ 0 }; - return ns; -} +typedef struct { + Py_ssize_t maxitems; + Py_ssize_t numnames; + Py_ssize_t numvalues; + _PyXI_namespace_item items[1]; +} _PyXI_namespace; +#ifndef NDEBUG static int -_sharedns_is_initialized(_PyXI_namespace *ns) +_sharedns_check_counts(_PyXI_namespace *ns) { - if (ns->len == 0) { - assert(ns->items == NULL); + if (ns->maxitems <= 0) { + return 0; + } + if (ns->numnames < 0) { + return 0; + } + if (ns->numnames > ns->maxitems) { + return 0; + } + if (ns->numvalues < 0) { + return 0; + } + if (ns->numvalues > ns->numnames) { return 0; } - - assert(ns->len > 0); - assert(ns->items != NULL); - assert(_sharednsitem_is_initialized(&ns->items[0])); - assert(ns->len == 1 - || _sharednsitem_is_initialized(&ns->items[ns->len - 1])); return 1; } -#define HAS_COMPLETE_DATA 1 -#define HAS_PARTIAL_DATA 2 - static int -_sharedns_has_xidata(_PyXI_namespace *ns, int64_t *p_interpid) +_sharedns_check_consistency(_PyXI_namespace *ns) { - // We expect _PyXI_namespace to always be initialized. - assert(_sharedns_is_initialized(ns)); - int res = 0; - _PyXI_namespace_item *item0 = &ns->items[0]; - if (!_sharednsitem_is_initialized(item0)) { + if (!_sharedns_check_counts(ns)) { return 0; } - int64_t interpid0 = -1; - if (!_sharednsitem_has_value(item0, &interpid0)) { - return 0; + + Py_ssize_t i = 0; + _PyXI_namespace_item *item; + if (ns->numvalues > 0) { + item = &ns->items[0]; + if (!_sharednsitem_is_initialized(item)) { + return 0; + } + int64_t interpid0 = -1; + if (!_sharednsitem_has_value(item, &interpid0)) { + return 0; + } + i += 1; + for (; i < ns->numvalues; i++) { + item = &ns->items[i]; + if (!_sharednsitem_is_initialized(item)) { + return 0; + } + int64_t interpid = -1; + if (!_sharednsitem_has_value(item, &interpid)) { + return 0; + } + if (interpid != interpid0) { + return 0; + } + } } - if (ns->len > 1) { - // At this point we know it is has at least partial data. - _PyXI_namespace_item *itemN = &ns->items[ns->len-1]; - if (!_sharednsitem_is_initialized(itemN)) { - res = HAS_PARTIAL_DATA; - goto finally; + for (; i < ns->numnames; i++) { + item = &ns->items[i]; + if (!_sharednsitem_is_initialized(item)) { + return 0; } - int64_t interpidN = -1; - if (!_sharednsitem_has_value(itemN, &interpidN)) { - res = HAS_PARTIAL_DATA; - goto finally; + if (_sharednsitem_has_value(item, NULL)) { + return 0; } - assert(interpidN == interpid0); } - res = HAS_COMPLETE_DATA; - *p_interpid = interpid0; - -finally: - return res; + for (; i < ns->maxitems; i++) { + item = &ns->items[i]; + if (_sharednsitem_is_initialized(item)) { + return 0; + } + if (_sharednsitem_has_value(item, NULL)) { + return 0; + } + } + return 1; } +#endif -static void -_sharedns_clear(_PyXI_namespace *ns) +static _PyXI_namespace * +_sharedns_alloc(Py_ssize_t maxitems) { - if (!_sharedns_is_initialized(ns)) { - return; + if (maxitems < 0) { + if (!PyErr_Occurred()) { + PyErr_BadInternalCall(); + } + return NULL; + } + else if (maxitems == 0) { + PyErr_SetString(PyExc_ValueError, "empty namespaces not allowed"); + return NULL; } - // If the cross-interpreter data were allocated as part of - // _PyXI_namespace_item (instead of dynamically), this is where - // we would need verify that we are clearing the items in the - // correct interpreter, to avoid a race with releasing the XI data - // via a pending call. See _sharedns_has_xidata(). - for (Py_ssize_t i=0; i < ns->len; i++) { - _sharednsitem_clear(&ns->items[i]); + // Check for overflow. + size_t fixedsize = sizeof(_PyXI_namespace) - sizeof(_PyXI_namespace_item); + if ((size_t)maxitems > + ((size_t)PY_SSIZE_T_MAX - fixedsize) / sizeof(_PyXI_namespace_item)) + { + PyErr_NoMemory(); + return NULL; + } + + // Allocate the value, including items. + size_t size = fixedsize + sizeof(_PyXI_namespace_item) * maxitems; + + _PyXI_namespace *ns = PyMem_RawCalloc(size, 1); + if (ns == NULL) { + PyErr_NoMemory(); + return NULL; } - PyMem_RawFree(ns->items); - ns->items = NULL; - ns->len = 0; + ns->maxitems = maxitems; + assert(_sharedns_check_consistency(ns)); + return ns; } static void _sharedns_free(_PyXI_namespace *ns) { - _sharedns_clear(ns); + // If we weren't always dynamically allocating the cross-interpreter + // data in each item then we would need to use a pending call + // to call _sharedns_free(), to avoid the race between freeing + // the shared namespace and releasing the XI data. + assert(_sharedns_check_counts(ns)); + Py_ssize_t i = 0; + _PyXI_namespace_item *item; + if (ns->numvalues > 0) { + // One or more items may have interpreter-specific data. +#ifndef NDEBUG + int64_t interpid = PyInterpreterState_GetID(PyInterpreterState_Get()); + int64_t interpid_i; +#endif + for (; i < ns->numvalues; i++) { + item = &ns->items[i]; + assert(_sharednsitem_is_initialized(item)); + // While we do want to ensure consistency across items, + // technically they don't need to match the current + // interpreter. However, we keep the constraint for + // simplicity, by giving _PyXI_FreeNamespace() the exclusive + // responsibility of dealing with the owning interpreter. + assert(_sharednsitem_has_value(item, &interpid_i)); + assert(interpid_i == interpid); + _sharednsitem_clear(item); + } + } + for (; i < ns->numnames; i++) { + item = &ns->items[i]; + assert(_sharednsitem_is_initialized(item)); + assert(!_sharednsitem_has_value(item, NULL)); + _sharednsitem_clear(item); + } +#ifndef NDEBUG + for (; i < ns->maxitems; i++) { + item = &ns->items[i]; + assert(!_sharednsitem_is_initialized(item)); + assert(!_sharednsitem_has_value(item, NULL)); + } +#endif + PyMem_RawFree(ns); } -static int -_sharedns_init(_PyXI_namespace *ns, PyObject *names) +static _PyXI_namespace * +_create_sharedns(PyObject *names) { - assert(!_sharedns_is_initialized(ns)); assert(names != NULL); - Py_ssize_t len = PyDict_CheckExact(names) + Py_ssize_t numnames = PyDict_CheckExact(names) ? PyDict_Size(names) : PySequence_Size(names); - if (len < 0) { - return -1; - } - if (len == 0) { - PyErr_SetString(PyExc_ValueError, "empty namespaces not allowed"); - return -1; - } - assert(len > 0); - // Allocate the items. - _PyXI_namespace_item *items = - PyMem_RawCalloc(sizeof(struct _sharednsitem), len); - if (items == NULL) { - PyErr_NoMemory(); - return -1; + _PyXI_namespace *ns = _sharedns_alloc(numnames); + if (ns == NULL) { + return NULL; } + _PyXI_namespace_item *items = ns->items; // Fill in the names. - Py_ssize_t i = -1; if (PyDict_CheckExact(names)) { + Py_ssize_t i = 0; Py_ssize_t pos = 0; - for (i=0; i < len; i++) { - PyObject *key; - if (!PyDict_Next(names, &pos, &key, NULL)) { - // This should not be possible. - assert(0); - goto error; - } - if (_sharednsitem_init(&items[i], key) < 0) { + PyObject *name; + while(PyDict_Next(names, &pos, &name, NULL)) { + if (_sharednsitem_init(&items[i], name) < 0) { goto error; } + ns->numnames += 1; + i += 1; } } else if (PySequence_Check(names)) { - for (i=0; i < len; i++) { - PyObject *key = PySequence_GetItem(names, i); - if (key == NULL) { + for (Py_ssize_t i = 0; i < numnames; i++) { + PyObject *name = PySequence_GetItem(names, i); + if (name == NULL) { goto error; } - int res = _sharednsitem_init(&items[i], key); - Py_DECREF(key); + int res = _sharednsitem_init(&items[i], name); + Py_DECREF(name); if (res < 0) { goto error; } + ns->numnames += 1; } } else { @@ -2071,140 +2127,79 @@ _sharedns_init(_PyXI_namespace *ns, PyObject *names) "non-sequence namespace not supported"); goto error; } - - ns->items = items; - ns->len = len; - assert(_sharedns_is_initialized(ns)); - return 0; + assert(ns->numnames == ns->maxitems); + return ns; error: - for (Py_ssize_t j=0; j < i; j++) { - _sharednsitem_clear(&items[j]); - } - PyMem_RawFree(items); - assert(!_sharedns_is_initialized(ns)); - return -1; -} - -void -_PyXI_FreeNamespace(_PyXI_namespace *ns) -{ - if (!_sharedns_is_initialized(ns)) { - return; - } - - int64_t interpid = -1; - if (!_sharedns_has_xidata(ns, &interpid)) { - _sharedns_free(ns); - return; - } - - if (interpid == PyInterpreterState_GetID(PyInterpreterState_Get())) { - _sharedns_free(ns); - } - else { - // If we weren't always dynamically allocating the cross-interpreter - // data in each item then we would need to using a pending call - // to call _sharedns_free(), to avoid the race between freeing - // the shared namespace and releasing the XI data. - _sharedns_free(ns); - } -} - -_PyXI_namespace * -_PyXI_NamespaceFromNames(PyObject *names) -{ - if (names == NULL || names == Py_None) { - return NULL; - } - - _PyXI_namespace *ns = _sharedns_new(); - if (ns == NULL) { - return NULL; - } - - if (_sharedns_init(ns, names) < 0) { - PyMem_RawFree(ns); - if (PySequence_Size(names) == 0) { - PyErr_Clear(); - } - return NULL; - } - - return ns; + _sharedns_free(ns); + return NULL; } -#ifndef NDEBUG -static int _session_is_active(_PyXI_session *); -#endif static void _propagate_not_shareable_error(_PyXI_session *); -int -_PyXI_FillNamespaceFromDict(_PyXI_namespace *ns, PyObject *nsobj, - _PyXI_session *session) -{ - // session must be entered already, if provided. - assert(session == NULL || _session_is_active(session)); - assert(_sharedns_is_initialized(ns)); - for (Py_ssize_t i=0; i < ns->len; i++) { - _PyXI_namespace_item *item = &ns->items[i]; - if (_sharednsitem_copy_from_ns(item, nsobj) < 0) { +static int +_fill_sharedns(_PyXI_namespace *ns, PyObject *nsobj, _PyXI_session *session) +{ + // All items are expected to be shareable. + assert(_sharedns_check_counts(ns)); + assert(ns->numnames == ns->maxitems); + assert(ns->numvalues == 0); + for (Py_ssize_t i=0; i < ns->maxitems; i++) { + if (_sharednsitem_copy_from_ns(&ns->items[i], nsobj) < 0) { _propagate_not_shareable_error(session); // Clear out the ones we set so far. for (Py_ssize_t j=0; j < i; j++) { _sharednsitem_clear_value(&ns->items[j]); + ns->numvalues -= 1; } return -1; } + ns->numvalues += 1; } return 0; } -// All items are expected to be shareable. -static _PyXI_namespace * -_PyXI_NamespaceFromDict(PyObject *nsobj, _PyXI_session *session) +static int +_sharedns_free_pending(void *data) { - // session must be entered already, if provided. - assert(session == NULL || _session_is_active(session)); - if (nsobj == NULL || nsobj == Py_None) { - return NULL; - } - if (!PyDict_CheckExact(nsobj)) { - PyErr_SetString(PyExc_TypeError, "expected a dict"); - return NULL; - } + _sharedns_free((_PyXI_namespace *)data); + return 0; +} - _PyXI_namespace *ns = _sharedns_new(); - if (ns == NULL) { - return NULL; +static void +_destroy_sharedns(_PyXI_namespace *ns) +{ + assert(_sharedns_check_counts(ns)); + assert(ns->numnames == ns->maxitems); + if (ns->numvalues == 0) { + _sharedns_free(ns); + return; } - if (_sharedns_init(ns, nsobj) < 0) { - if (PyDict_Size(nsobj) == 0) { - PyMem_RawFree(ns); - PyErr_Clear(); - return NULL; - } - goto error; + int64_t interpid0; + if (!_sharednsitem_has_value(&ns->items[0], &interpid0)) { + // This shouldn't have been possible. + // We can deal with it in _sharedns_free(). + _sharedns_free(ns); + return; } - - if (_PyXI_FillNamespaceFromDict(ns, nsobj, session) < 0) { - goto error; + PyInterpreterState *interp = _PyInterpreterState_LookUpID(interpid0); + if (interp == PyInterpreterState_Get()) { + _sharedns_free(ns); + return; } - return ns; - -error: - assert(PyErr_Occurred() - || (session != NULL && session->error_override != NULL)); - _sharedns_free(ns); - return NULL; + // One or more items may have interpreter-specific data. + // Currently the xidata for each value is dynamically allocated, + // so technically we don't need to worry about that. + // However, explicitly adding a pending call here is simpler. + (void)_Py_CallInInterpreter(interp, _sharedns_free_pending, ns); } -int -_PyXI_ApplyNamespace(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) +static int +_apply_sharedns(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) { - for (Py_ssize_t i=0; i < ns->len; i++) { + for (Py_ssize_t i=0; i < ns->maxitems; i++) { if (_sharednsitem_apply(&ns->items[i], nsobj, dflt) != 0) { return -1; } @@ -2213,9 +2208,79 @@ _PyXI_ApplyNamespace(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) } -/**********************/ -/* high-level helpers */ -/**********************/ +/*********************************/ +/* switched-interpreter sessions */ +/*********************************/ + +struct xi_session { +#define SESSION_UNUSED 0 +#define SESSION_ACTIVE 1 + int status; + int switched; + + // Once a session has been entered, this is the tstate that was + // current before the session. If it is different from cur_tstate + // then we must have switched interpreters. Either way, this will + // be the current tstate once we exit the session. + PyThreadState *prev_tstate; + // Once a session has been entered, this is the current tstate. + // It must be current when the session exits. + PyThreadState *init_tstate; + // This is true if init_tstate needs cleanup during exit. + int own_init_tstate; + + // This is true if, while entering the session, init_thread took + // "ownership" of the interpreter's __main__ module. This means + // it is the only thread that is allowed to run code there. + // (Caveat: for now, users may still run exec() against the + // __main__ module's dict, though that isn't advisable.) + int running; + // This is a cached reference to the __dict__ of the entered + // interpreter's __main__ module. It is looked up when at the + // beginning of the session as a convenience. + PyObject *main_ns; + + // This is set if the interpreter is entered and raised an exception + // that needs to be handled in some special way during exit. + _PyXI_errcode *error_override; + // This is set if exit captured an exception to propagate. + _PyXI_error *error; + + // -- pre-allocated memory -- + _PyXI_error _error; + _PyXI_errcode _error_override; +}; + + +_PyXI_session * +_PyXI_NewSession(void) +{ + _PyXI_session *session = PyMem_RawCalloc(1, sizeof(_PyXI_session)); + if (session == NULL) { + PyErr_NoMemory(); + return NULL; + } + return session; +} + +void +_PyXI_FreeSession(_PyXI_session *session) +{ + assert(session->status == SESSION_UNUSED); + PyMem_RawFree(session); +} + + +static inline int +_session_is_active(_PyXI_session *session) +{ + return session->status == SESSION_ACTIVE; +} + +static int _ensure_main_ns(_PyXI_session *); +static inline void _session_set_error(_PyXI_session *, _PyXI_errcode); +static void _capture_current_exception(_PyXI_session *); + /* enter/exit a cross-interpreter session */ @@ -2223,6 +2288,7 @@ static void _enter_session(_PyXI_session *session, PyInterpreterState *interp) { // Set here and cleared in _exit_session(). + assert(session->status == SESSION_UNUSED); assert(!session->own_init_tstate); assert(session->init_tstate == NULL); assert(session->prev_tstate == NULL); @@ -2237,15 +2303,22 @@ _enter_session(_PyXI_session *session, PyInterpreterState *interp) // Switch to interpreter. PyThreadState *tstate = PyThreadState_Get(); PyThreadState *prev = tstate; - if (interp != tstate->interp) { + int same_interp = (interp == tstate->interp); + if (!same_interp) { tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_EXEC); // XXX Possible GILState issues? - session->prev_tstate = PyThreadState_Swap(tstate); - assert(session->prev_tstate == prev); - session->own_init_tstate = 1; + PyThreadState *swapped = PyThreadState_Swap(tstate); + assert(swapped == prev); + (void)swapped; } - session->init_tstate = tstate; - session->prev_tstate = prev; + + *session = (_PyXI_session){ + .status = SESSION_ACTIVE, + .switched = !same_interp, + .init_tstate = tstate, + .prev_tstate = prev, + .own_init_tstate = !same_interp, + }; } static void @@ -2256,9 +2329,7 @@ _exit_session(_PyXI_session *session) assert(PyThreadState_Get() == tstate); // Release any of the entered interpreters resources. - if (session->main_ns != NULL) { - Py_CLEAR(session->main_ns); - } + Py_CLEAR(session->main_ns); // Ensure this thread no longer owns __main__. if (session->running) { @@ -2279,17 +2350,15 @@ _exit_session(_PyXI_session *session) else { assert(!session->own_init_tstate); } - session->prev_tstate = NULL; - session->init_tstate = NULL; -} -#ifndef NDEBUG -static int -_session_is_active(_PyXI_session *session) -{ - return (session->init_tstate != NULL); + // For now the error data persists past the exit. + *session = (_PyXI_session){ + .error_override = session->error_override, + .error = session->error, + ._error = session->_error, + ._error_override = session->_error_override, + }; } -#endif static void _propagate_not_shareable_error(_PyXI_session *session) @@ -2306,11 +2375,102 @@ _propagate_not_shareable_error(_PyXI_session *session) } if (PyErr_ExceptionMatches(exctype)) { // We want to propagate the exception directly. - session->_error_override = _PyXI_ERR_NOT_SHAREABLE; - session->error_override = &session->_error_override; + _session_set_error(session, _PyXI_ERR_NOT_SHAREABLE); } } +PyObject * +_PyXI_ApplyCapturedException(_PyXI_session *session) +{ + assert(!PyErr_Occurred()); + assert(session->error != NULL); + PyObject *res = _PyXI_ApplyError(session->error); + assert((res == NULL) != (PyErr_Occurred() == NULL)); + session->error = NULL; + return res; +} + +int +_PyXI_HasCapturedException(_PyXI_session *session) +{ + return session->error != NULL; +} + +int +_PyXI_Enter(_PyXI_session *session, + PyInterpreterState *interp, PyObject *nsupdates) +{ + // Convert the attrs for cross-interpreter use. + _PyXI_namespace *sharedns = NULL; + if (nsupdates != NULL) { + Py_ssize_t len = PyDict_Size(nsupdates); + if (len < 0) { + return -1; + } + if (len > 0) { + sharedns = _create_sharedns(nsupdates); + if (sharedns == NULL) { + return -1; + } + if (_fill_sharedns(sharedns, nsupdates, NULL) < 0) { + assert(session->error == NULL); + _destroy_sharedns(sharedns); + return -1; + } + } + } + + // Switch to the requested interpreter (if necessary). + _enter_session(session, interp); + _PyXI_errcode errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; + + // Ensure this thread owns __main__. + if (_PyInterpreterState_SetRunningMain(interp) < 0) { + // In the case where we didn't switch interpreters, it would + // be more efficient to leave the exception in place and return + // immediately. However, life is simpler if we don't. + errcode = _PyXI_ERR_ALREADY_RUNNING; + goto error; + } + session->running = 1; + + // Apply the cross-interpreter data. + if (sharedns != NULL) { + if (_ensure_main_ns(session) < 0) { + errcode = _PyXI_ERR_MAIN_NS_FAILURE; + goto error; + } + if (_apply_sharedns(sharedns, session->main_ns, NULL) < 0) { + errcode = _PyXI_ERR_APPLY_NS_FAILURE; + goto error; + } + _destroy_sharedns(sharedns); + } + + errcode = _PyXI_ERR_NO_ERROR; + assert(!PyErr_Occurred()); + return 0; + +error: + // We want to propagate all exceptions here directly (best effort). + _session_set_error(session, errcode); + _exit_session(session); + if (sharedns != NULL) { + _destroy_sharedns(sharedns); + } + return -1; +} + +void +_PyXI_Exit(_PyXI_session *session) +{ + _capture_current_exception(session); + _exit_session(session); +} + + +/* in an active cross-interpreter session */ + static void _capture_current_exception(_PyXI_session *session) { @@ -2372,100 +2532,55 @@ _capture_current_exception(_PyXI_session *session) // Finished! assert(!PyErr_Occurred()); - session->error = err; -} - -PyObject * -_PyXI_ApplyCapturedException(_PyXI_session *session) -{ - assert(!PyErr_Occurred()); - assert(session->error != NULL); - PyObject *res = _PyXI_ApplyError(session->error); - assert((res == NULL) != (PyErr_Occurred() == NULL)); - session->error = NULL; - return res; + session->error = err; } -int -_PyXI_HasCapturedException(_PyXI_session *session) +static inline void +_session_set_error(_PyXI_session *session, _PyXI_errcode errcode) { - return session->error != NULL; + assert(_session_is_active(session)); + assert(PyErr_Occurred()); + if (errcode != _PyXI_ERR_UNCAUGHT_EXCEPTION) { + session->_error_override = errcode; + session->error_override = &session->_error_override; + } + _capture_current_exception(session); } -int -_PyXI_Enter(_PyXI_session *session, - PyInterpreterState *interp, PyObject *nsupdates) +static int +_ensure_main_ns(_PyXI_session *session) { - // Convert the attrs for cross-interpreter use. - _PyXI_namespace *sharedns = NULL; - if (nsupdates != NULL) { - sharedns = _PyXI_NamespaceFromDict(nsupdates, NULL); - if (sharedns == NULL && PyErr_Occurred()) { - assert(session->error == NULL); - return -1; - } - } - - // Switch to the requested interpreter (if necessary). - _enter_session(session, interp); - PyThreadState *session_tstate = session->init_tstate; - _PyXI_errcode errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; - - // Ensure this thread owns __main__. - if (_PyInterpreterState_SetRunningMain(interp) < 0) { - // In the case where we didn't switch interpreters, it would - // be more efficient to leave the exception in place and return - // immediately. However, life is simpler if we don't. - errcode = _PyXI_ERR_ALREADY_RUNNING; - goto error; + assert(_session_is_active(session)); + if (session->main_ns != NULL) { + return 0; } - session->running = 1; - // Cache __main__.__dict__. - PyObject *main_mod = _Py_GetMainModule(session_tstate); + PyObject *main_mod = _Py_GetMainModule(session->init_tstate); if (_Py_CheckMainModule(main_mod) < 0) { - errcode = _PyXI_ERR_MAIN_NS_FAILURE; - goto error; + return -1; } PyObject *ns = PyModule_GetDict(main_mod); // borrowed Py_DECREF(main_mod); if (ns == NULL) { - errcode = _PyXI_ERR_MAIN_NS_FAILURE; - goto error; + return -1; } session->main_ns = Py_NewRef(ns); - - // Apply the cross-interpreter data. - if (sharedns != NULL) { - if (_PyXI_ApplyNamespace(sharedns, ns, NULL) < 0) { - errcode = _PyXI_ERR_APPLY_NS_FAILURE; - goto error; - } - _PyXI_FreeNamespace(sharedns); - } - - errcode = _PyXI_ERR_NO_ERROR; - assert(!PyErr_Occurred()); return 0; - -error: - assert(PyErr_Occurred()); - // We want to propagate all exceptions here directly (best effort). - assert(errcode != _PyXI_ERR_UNCAUGHT_EXCEPTION); - session->error_override = &errcode; - _capture_current_exception(session); - _exit_session(session); - if (sharedns != NULL) { - _PyXI_FreeNamespace(sharedns); - } - return -1; } -void -_PyXI_Exit(_PyXI_session *session) +PyObject * +_PyXI_GetMainNamespace(_PyXI_session *session) { - _capture_current_exception(session); - _exit_session(session); + if (!_session_is_active(session)) { + PyErr_SetString(PyExc_RuntimeError, "session not active"); + return NULL; + } + if (_ensure_main_ns(session) < 0) { + _session_set_error(session, _PyXI_ERR_MAIN_NS_FAILURE); + _capture_current_exception(session); + return NULL; + } + return session->main_ns; } From fade146cfb1616ad7b3b918bedb86756dedf79e6 Mon Sep 17 00:00:00 2001 From: Duprat Date: Thu, 22 May 2025 18:46:57 +0200 Subject: [PATCH 299/989] gh-134322: Fix `repr(threading.RLock)` (#134389) Fix the `__repr__` value of `threading.RLock` from `_thread` module, when just created. --- Lib/test/lock_tests.py | 20 ++++++++++++++++++++ Lib/test/test_importlib/test_locks.py | 1 + Modules/_threadmodule.c | 8 +++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/Lib/test/lock_tests.py b/Lib/test/lock_tests.py index 850450c1e81a16..691029a1a54f7f 100644 --- a/Lib/test/lock_tests.py +++ b/Lib/test/lock_tests.py @@ -337,6 +337,26 @@ class RLockTests(BaseLockTests): """ Tests for recursive locks. """ + def test_repr_count(self): + # see gh-134322: check that count values are correct: + # when a rlock is just created, + # in a second thread when rlock is acquired in the main thread. + lock = self.locktype() + self.assertIn("count=0", repr(lock)) + self.assertIn("lock.thread; int locked = rlock_locked_impl(self); - size_t count = self->lock.level + 1; + size_t count; + if (locked) { + count = self->lock.level + 1; + } + else { + count = 0; + } return PyUnicode_FromFormat( "<%s %s object owner=%" PY_FORMAT_THREAD_IDENT_T " count=%zu at %p>", locked ? "locked" : "unlocked", From 484e00379bd1f2e9b2e273af3e6d524cd07b0c6c Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Thu, 22 May 2025 12:52:47 -0400 Subject: [PATCH 300/989] GH-131798: Optimize away isinstance calls in the JIT (GH-134369) --- Include/internal/pycore_uop_ids.h | 91 ++++++++------- Include/internal/pycore_uop_metadata.h | 26 ++++- Lib/test/test_capi/test_opt.py | 20 +++- ...-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst | 1 + Python/bytecodes.c | 42 ++++++- Python/executor_cases.c.h | 110 ++++++++++++++++++ Python/optimizer_analysis.c | 37 ++++-- Python/optimizer_bytecodes.c | 8 ++ Python/optimizer_cases.c.h | 38 ++++++ 9 files changed, 312 insertions(+), 61 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index d6c2ba59db1eda..d08799487fda42 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -265,75 +265,80 @@ extern "C" { #define _MONITOR_JUMP_BACKWARD 483 #define _MONITOR_RESUME 484 #define _NOP NOP -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 485 +#define _POP_CALL 485 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486 +#define _POP_CALL_ONE 487 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488 +#define _POP_CALL_TWO 489 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490 #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 486 -#define _POP_JUMP_IF_TRUE 487 +#define _POP_JUMP_IF_FALSE 491 +#define _POP_JUMP_IF_TRUE 492 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 488 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 489 -#define _POP_TWO 490 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 491 +#define _POP_TOP_LOAD_CONST_INLINE 493 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494 +#define _POP_TWO 495 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 492 +#define _PUSH_FRAME 497 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 493 -#define _PY_FRAME_GENERAL 494 -#define _PY_FRAME_KW 495 -#define _QUICKEN_RESUME 496 -#define _REPLACE_WITH_TRUE 497 +#define _PUSH_NULL_CONDITIONAL 498 +#define _PY_FRAME_GENERAL 499 +#define _PY_FRAME_KW 500 +#define _QUICKEN_RESUME 501 +#define _REPLACE_WITH_TRUE 502 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 498 -#define _SEND 499 -#define _SEND_GEN_FRAME 500 +#define _SAVE_RETURN_OFFSET 503 +#define _SEND 504 +#define _SEND_GEN_FRAME 505 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 501 -#define _STORE_ATTR 502 -#define _STORE_ATTR_INSTANCE_VALUE 503 -#define _STORE_ATTR_SLOT 504 -#define _STORE_ATTR_WITH_HINT 505 +#define _START_EXECUTOR 506 +#define _STORE_ATTR 507 +#define _STORE_ATTR_INSTANCE_VALUE 508 +#define _STORE_ATTR_SLOT 509 +#define _STORE_ATTR_WITH_HINT 510 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 506 -#define _STORE_FAST_0 507 -#define _STORE_FAST_1 508 -#define _STORE_FAST_2 509 -#define _STORE_FAST_3 510 -#define _STORE_FAST_4 511 -#define _STORE_FAST_5 512 -#define _STORE_FAST_6 513 -#define _STORE_FAST_7 514 +#define _STORE_FAST 511 +#define _STORE_FAST_0 512 +#define _STORE_FAST_1 513 +#define _STORE_FAST_2 514 +#define _STORE_FAST_3 515 +#define _STORE_FAST_4 516 +#define _STORE_FAST_5 517 +#define _STORE_FAST_6 518 +#define _STORE_FAST_7 519 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 515 -#define _STORE_SUBSCR 516 -#define _STORE_SUBSCR_DICT 517 -#define _STORE_SUBSCR_LIST_INT 518 +#define _STORE_SLICE 520 +#define _STORE_SUBSCR 521 +#define _STORE_SUBSCR_DICT 522 +#define _STORE_SUBSCR_LIST_INT 523 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 519 -#define _TO_BOOL 520 +#define _TIER2_RESUME_CHECK 524 +#define _TO_BOOL 525 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 521 +#define _TO_BOOL_LIST 526 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 522 +#define _TO_BOOL_STR 527 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 523 -#define _UNPACK_SEQUENCE_LIST 524 -#define _UNPACK_SEQUENCE_TUPLE 525 -#define _UNPACK_SEQUENCE_TWO_TUPLE 526 +#define _UNPACK_SEQUENCE 528 +#define _UNPACK_SEQUENCE_LIST 529 +#define _UNPACK_SEQUENCE_TUPLE 530 +#define _UNPACK_SEQUENCE_TWO_TUPLE 531 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 526 +#define MAX_UOP_ID 531 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 725238228a3dbc..5ebe124983b862 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -303,9 +303,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, - [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_CALL] = HAS_ESCAPES_FLAG, + [_POP_CALL_ONE] = HAS_ESCAPES_FLAG, + [_POP_CALL_TWO] = HAS_ESCAPES_FLAG, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, [_LOAD_CONST_UNDER_INLINE] = 0, [_LOAD_CONST_UNDER_INLINE_BORROW] = 0, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, @@ -557,6 +562,11 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_MAYBE_EXPAND_METHOD] = "_MAYBE_EXPAND_METHOD", [_MAYBE_EXPAND_METHOD_KW] = "_MAYBE_EXPAND_METHOD_KW", [_NOP] = "_NOP", + [_POP_CALL] = "_POP_CALL", + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_LOAD_CONST_INLINE_BORROW", + [_POP_CALL_ONE] = "_POP_CALL_ONE", + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", + [_POP_CALL_TWO] = "_POP_CALL_TWO", [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_TOP] = "_POP_TOP", @@ -1194,10 +1204,20 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _LOAD_CONST_INLINE_BORROW: return 0; + case _POP_CALL: + return 2; + case _POP_CALL_ONE: + return 3; + case _POP_CALL_TWO: + return 4; case _POP_TOP_LOAD_CONST_INLINE_BORROW: return 1; case _POP_TWO_LOAD_CONST_INLINE_BORROW: return 2; + case _POP_CALL_LOAD_CONST_INLINE_BORROW: + return 2; + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: + return 3; case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: return 4; case _LOAD_CONST_UNDER_INLINE: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0bc0e1b212b6b8..cb6eae484149ee 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2002,7 +2002,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_GUARD_THIRD_NULL", uops) self.assertNotIn("_GUARD_CALLABLE_ISINSTANCE", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_list_append(self): def testfunc(n): @@ -2035,7 +2038,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_TRUE_POP", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_is_false(self): def testfunc(n): @@ -2053,7 +2059,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_FALSE_POP", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_subclass(self): def testfunc(n): @@ -2071,7 +2080,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_TRUE_POP", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_unknown_object(self): def testfunc(n): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst new file mode 100644 index 00000000000000..fdb6a2fe0b383b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst @@ -0,0 +1 @@ +Optimize ``_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW``. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 652bda9c182e49..be22c7446f5402 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5287,18 +5287,54 @@ dummy_func( value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op (_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { + tier2 op(_POP_CALL, (callable, null --)) { + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_CALL_ONE, (callable, null, pop --)) { + PyStackRef_CLOSE(pop); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_CALL_TWO, (callable, null, pop1, pop2 --)) { + PyStackRef_CLOSE(pop2); + PyStackRef_CLOSE(pop1); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { PyStackRef_CLOSE(pop); value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) { + tier2 op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) { PyStackRef_CLOSE(pop2); PyStackRef_CLOSE(pop1); value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop1, pop2 -- value)) { + tier2 op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null -- value)) { + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop -- value)) { + PyStackRef_CLOSE(pop); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop1, pop2 -- value)) { PyStackRef_CLOSE(pop2); PyStackRef_CLOSE(pop1); (void)null; // Silence compiler warnings about unused variables diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fcde31a30126a4..40090e692e4a72 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7030,6 +7030,69 @@ break; } + case _POP_CALL: { + _PyStackRef null; + _PyStackRef callable; + null = stack_pointer[-1]; + callable = stack_pointer[-2]; + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + + case _POP_CALL_ONE: { + _PyStackRef pop; + _PyStackRef null; + _PyStackRef callable; + pop = stack_pointer[-1]; + null = stack_pointer[-2]; + callable = stack_pointer[-3]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + + case _POP_CALL_TWO: { + _PyStackRef pop2; + _PyStackRef pop1; + _PyStackRef null; + _PyStackRef callable; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + null = stack_pointer[-3]; + callable = stack_pointer[-4]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop2); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop1); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { _PyStackRef pop; _PyStackRef value; @@ -7071,6 +7134,53 @@ break; } + case _POP_CALL_LOAD_CONST_INLINE_BORROW: { + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + null = stack_pointer[-1]; + callable = stack_pointer[-2]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + _PyStackRef pop; + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + pop = stack_pointer[-1]; + null = stack_pointer[-2]; + callable = stack_pointer[-3]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { _PyStackRef pop2; _PyStackRef pop1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5c50228a13b2d1..851e1efa0497af 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -552,11 +552,13 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = { [_POP_TOP_LOAD_CONST_INLINE] = _POP_TOP, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _POP_TOP, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = _POP_CALL_TWO, }; const bool op_skip[MAX_UOP_ID + 1] = { [_NOP] = true, [_CHECK_VALIDITY] = true, + [_SET_IP] = true, }; const uint16_t op_without_pop[MAX_UOP_ID + 1] = { @@ -565,6 +567,15 @@ const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW, [_POP_TWO] = _POP_TOP, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = _POP_CALL_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_TWO] = _POP_CALL_ONE, + [_POP_CALL_ONE] = _POP_CALL, +}; + +const uint16_t op_without_pop_null[MAX_UOP_ID + 1] = { + [_POP_CALL] = _POP_TOP, + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, }; @@ -601,19 +612,29 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP - while (op_without_pop[opcode]) { + while (op_without_pop[opcode] || op_without_pop_null[opcode]) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { last--; } - if (!op_without_push[last->opcode]) { - break; + if (op_without_push[last->opcode]) { + last->opcode = op_without_push[last->opcode]; + opcode = buffer[pc].opcode = op_without_pop[opcode]; + if (op_without_pop[last->opcode]) { + opcode = last->opcode; + pc = last - buffer; + } } - last->opcode = op_without_push[last->opcode]; - opcode = buffer[pc].opcode = op_without_pop[opcode]; - if (op_without_pop[last->opcode]) { - opcode = last->opcode; - pc = last - buffer; + else if (last->opcode == _PUSH_NULL) { + // Handle _POP_CALL and _POP_CALL_LOAD_CONST_INLINE_BORROW separately. + // This looks for a preceding _PUSH_NULL instruction and + // simplifies to _POP_TOP(_LOAD_CONST_INLINE_BORROW). + last->opcode = _NOP; + opcode = buffer[pc].opcode = op_without_pop_null[opcode]; + assert(opcode); + } + else { + break; } } /* _PUSH_FRAME doesn't escape or error, but it diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 0b6bbd133d6ac9..49c6bfb6c1b01a 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -551,6 +551,14 @@ dummy_func(void) { value = sym_new_const(ctx, ptr); } + op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused -- value)) { + value = sym_new_const(ctx, ptr); + } + + op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused -- value)) { + value = sym_new_const(ctx, ptr); + } + op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused, unused -- value)) { value = sym_new_const(ctx, ptr); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 5a9fcf3b1b6924..bf7ac72d4579e7 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2601,6 +2601,24 @@ break; } + case _POP_CALL: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE: { + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_TWO: { + stack_pointer += -4; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { JitOptSymbol *value; PyObject *ptr = (PyObject *)this_instr->operand0; @@ -2618,6 +2636,26 @@ break; } + case _POP_CALL_LOAD_CONST_INLINE_BORROW: { + JitOptSymbol *value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = sym_new_const(ctx, ptr); + stack_pointer[-2] = value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + JitOptSymbol *value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = sym_new_const(ctx, ptr); + stack_pointer[-3] = value; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { JitOptSymbol *value; PyObject *ptr = (PyObject *)this_instr->operand0; From ac06b534eed08ae42eea89e3854e1e90e8231318 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 May 2025 11:24:09 -0600 Subject: [PATCH 301/989] gh-132775: Fix Recently Introduced Warnings (gh-134530) --- Modules/_interpchannelsmodule.c | 2 +- Modules/_interpqueuesmodule.c | 2 +- Python/crossinterp.c | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index 0ab553190001bd..bfd805bf5e4072 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -3443,7 +3443,7 @@ channelsmod_get_channel_defaults(PyObject *self, PyObject *args, PyObject *kwds) } int64_t cid = cid_data.cid; - struct _channeldefaults defaults; + struct _channeldefaults defaults = {0}; int err = channel_get_defaults(&_globals.channels, cid, &defaults); if (handle_channel_error(err, self, cid)) { return NULL; diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 816285c9eff44a..ffc52c8ee74d85 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -1772,7 +1772,7 @@ queuesmod_get_queue_defaults(PyObject *self, PyObject *args, PyObject *kwds) } int64_t qid = qidarg.id; - struct _queuedefaults defaults; + struct _queuedefaults defaults = {0}; int err = queue_get_defaults(&_globals.queues, qid, &defaults); if (handle_queue_error(err, self, qid)) { return NULL; diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 65ccab32daf730..26eecdddf4bdd0 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -1795,6 +1795,7 @@ typedef struct _sharednsitem { // in a different interpreter to release the XI data. } _PyXI_namespace_item; +#ifndef NDEBUG static int _sharednsitem_is_initialized(_PyXI_namespace_item *item) { @@ -1803,6 +1804,7 @@ _sharednsitem_is_initialized(_PyXI_namespace_item *item) } return 0; } +#endif static int _sharednsitem_init(_PyXI_namespace_item *item, PyObject *key) From d1ea8edbd797e2cde7446da36d07041c6ddbcb53 Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Thu, 22 May 2025 14:05:43 -0400 Subject: [PATCH 302/989] GH-131798: Optimize away isinstance calls in the JIT (GH-134369) From 742d5b5c5d75eae44c66a43ebfa24a4f286ea8a1 Mon Sep 17 00:00:00 2001 From: Kattni Date: Thu, 22 May 2025 14:26:47 -0400 Subject: [PATCH 303/989] Docs: Add note to tutorial clarifying scope (#134534) * Add note to tutorial * Update formatting --- Doc/tutorial/index.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Doc/tutorial/index.rst b/Doc/tutorial/index.rst index 96791f88c867ab..d0bf77dc40d0a1 100644 --- a/Doc/tutorial/index.rst +++ b/Doc/tutorial/index.rst @@ -4,6 +4,10 @@ The Python Tutorial ###################### +.. Tip:: This tutorial is designed for + *programmers* that are new to the Python language, + **not** *beginners* who are new to programming. + Python is an easy to learn, powerful programming language. It has efficient high-level data structures and a simple but effective approach to object-oriented programming. Python's elegant syntax and dynamic typing, @@ -21,7 +25,8 @@ implemented in C or C++ (or other languages callable from C). Python is also suitable as an extension language for customizable applications. This tutorial introduces the reader informally to the basic concepts and -features of the Python language and system. It helps to have a Python +features of the Python language and system. Be aware that it expects you to +have a basic understanding of programming in general. It helps to have a Python interpreter handy for hands-on experience, but all examples are self-contained, so the tutorial can be read off-line as well. From ad42dc1909bdf8ec775b63fb22ed48ff42797a17 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 22 May 2025 14:08:44 -0700 Subject: [PATCH 304/989] GH-130397: remove special-casing of C stack depth for WASI (#134469) Removed special-casing for WASI when setting C stack depth limits. Since WASI has its own C stack checking this isn't a security risk. Also disabled some tests that stopped passing. They all happened to have already been disabled under Emscripten. --- Include/pythonrun.h | 3 --- Lib/test/test_copy.py | 3 +++ Lib/test/test_descr.py | 2 ++ Lib/test/test_exception_group.py | 4 +++- Lib/test/test_isinstance.py | 2 ++ Lib/test/test_json/test_recursion.py | 2 ++ .../2025-05-21-15-14-32.gh-issue-130397.aG6EON.rst | 3 +++ Python/ceval.c | 3 --- 8 files changed, 15 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-15-14-32.gh-issue-130397.aG6EON.rst diff --git a/Include/pythonrun.h b/Include/pythonrun.h index fad2b3c77476e4..716c4ab3c81cae 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -29,9 +29,6 @@ PyAPI_DATA(int) (*PyOS_InputHook)(void); # define PYOS_LOG2_STACK_MARGIN 12 #elif defined(Py_DEBUG) && defined(WIN32) # define PYOS_LOG2_STACK_MARGIN 12 -#elif defined(__wasi__) - /* Web assembly has two stacks, so this isn't really a size */ -# define PYOS_LOG2_STACK_MARGIN 9 #else # define PYOS_LOG2_STACK_MARGIN 11 #endif diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py index 3cb8072abefe98..467ec09d99e462 100644 --- a/Lib/test/test_copy.py +++ b/Lib/test/test_copy.py @@ -372,6 +372,7 @@ def test_deepcopy_list(self): self.assertIsNot(x[0], y[0]) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_deepcopy_reflexive_list(self): x = [] x.append(x) @@ -400,6 +401,7 @@ def test_deepcopy_tuple_of_immutables(self): self.assertIs(x, y) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_deepcopy_reflexive_tuple(self): x = ([],) x[0].append(x) @@ -418,6 +420,7 @@ def test_deepcopy_dict(self): self.assertIsNot(x["foo"], y["foo"]) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_deepcopy_reflexive_dict(self): x = {} x['foo'] = x diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index d635855f537af0..ea076ba4fef2db 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -3943,6 +3943,7 @@ def __del__(self): del C.__del__ @unittest.skipIf(support.is_emscripten, "Seems to works in Pyodide?") + @support.skip_wasi_stack_overflow() def test_slots_trash(self): # Testing slot trash... # Deallocating deeply nested slotted trash caused stack overflows @@ -4868,6 +4869,7 @@ class Thing: deque.append(thing, thing) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_repr_as_str(self): # Issue #11603: crash or infinite loop when rebinding __str__ as # __repr__. diff --git a/Lib/test/test_exception_group.py b/Lib/test/test_exception_group.py index 242d7ced007b10..5df2c41c6b56bc 100644 --- a/Lib/test/test_exception_group.py +++ b/Lib/test/test_exception_group.py @@ -1,7 +1,7 @@ import collections.abc import types import unittest -from test.support import skip_emscripten_stack_overflow, exceeds_recursion_limit +from test.support import skip_emscripten_stack_overflow, skip_wasi_stack_overflow, exceeds_recursion_limit class TestExceptionGroupTypeHierarchy(unittest.TestCase): def test_exception_group_types(self): @@ -465,12 +465,14 @@ def make_deep_eg(self): return e @skip_emscripten_stack_overflow() + @skip_wasi_stack_overflow() def test_deep_split(self): e = self.make_deep_eg() with self.assertRaises(RecursionError): e.split(TypeError) @skip_emscripten_stack_overflow() + @skip_wasi_stack_overflow() def test_deep_subgroup(self): e = self.make_deep_eg() with self.assertRaises(RecursionError): diff --git a/Lib/test/test_isinstance.py b/Lib/test/test_isinstance.py index daad00e86432d0..f440fc28ee7b7d 100644 --- a/Lib/test/test_isinstance.py +++ b/Lib/test/test_isinstance.py @@ -318,6 +318,7 @@ def __bases__(self): self.assertRaises(RecursionError, isinstance, 1, X()) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_infinite_recursion_via_bases_tuple(self): """Regression test for bpo-30570.""" class Failure(object): @@ -328,6 +329,7 @@ def __getattr__(self, attr): issubclass(Failure(), int) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_infinite_cycle_in_bases(self): """Regression test for bpo-30570.""" class X: diff --git a/Lib/test/test_json/test_recursion.py b/Lib/test/test_json/test_recursion.py index d82093f3895167..8f0e5e078ed0d4 100644 --- a/Lib/test/test_json/test_recursion.py +++ b/Lib/test/test_json/test_recursion.py @@ -69,6 +69,7 @@ def default(self, o): @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_highly_nested_objects_decoding(self): very_deep = 200000 # test that loading highly-nested objects doesn't segfault when C @@ -98,6 +99,7 @@ def test_highly_nested_objects_encoding(self): self.dumps(d) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_endless_recursion(self): # See #12051 class EndlessJSONEncoder(self.json.JSONEncoder): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-15-14-32.gh-issue-130397.aG6EON.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-15-14-32.gh-issue-130397.aG6EON.rst new file mode 100644 index 00000000000000..34a2f4d1278e74 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-21-15-14-32.gh-issue-130397.aG6EON.rst @@ -0,0 +1,3 @@ +Remove special-casing for C stack depth limits for WASI. Due to +WebAssembly's built-in stack protection this does not pose a security +concern. diff --git a/Python/ceval.c b/Python/ceval.c index 490b653f132a6a..d1b6b9f1a1a35f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -360,9 +360,6 @@ _Py_EnterRecursiveCallUnchecked(PyThreadState *tstate) # define Py_C_STACK_SIZE 1200000 #elif defined(__sparc__) # define Py_C_STACK_SIZE 1600000 -#elif defined(__wasi__) - /* Web assembly has two stacks, so this isn't really the stack depth */ -# define Py_C_STACK_SIZE 131072 // wasi-libc DEFAULT_STACK_SIZE #elif defined(__hppa__) || defined(__powerpc64__) # define Py_C_STACK_SIZE 2000000 #else From 9b292ff0223dab0f999d5c1e13355552fd7cd528 Mon Sep 17 00:00:00 2001 From: Josh Cannon Date: Thu, 22 May 2025 16:32:00 -0500 Subject: [PATCH 305/989] Avoid `__file__` in `hashlib` example (GH-134540) --- Doc/library/hashlib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index bb2d2fad23bdb8..4818a4944a512a 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -284,7 +284,7 @@ a file or file-like object. Example: >>> import io, hashlib, hmac - >>> with open(hashlib.__file__, "rb") as f: + >>> with open("library/hashlib.rst", "rb") as f: ... digest = hashlib.file_digest(f, "sha256") ... >>> digest.hexdigest() # doctest: +ELLIPSIS From 2da2be4b8400d6de8709e68ca62408af164ee291 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 22 May 2025 14:41:50 -0700 Subject: [PATCH 306/989] GH-131769: fix detecting a pydebug build of the build Python when building for WASI (GH-134015) --- ...-05-14-09-43-48.gh-issue-131769.H0oy5x.rst | 1 + Tools/wasm/wasi/__main__.py | 22 ++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-05-14-09-43-48.gh-issue-131769.H0oy5x.rst diff --git a/Misc/NEWS.d/next/Build/2025-05-14-09-43-48.gh-issue-131769.H0oy5x.rst b/Misc/NEWS.d/next/Build/2025-05-14-09-43-48.gh-issue-131769.H0oy5x.rst new file mode 100644 index 00000000000000..834b0d9f7e145d --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-05-14-09-43-48.gh-issue-131769.H0oy5x.rst @@ -0,0 +1 @@ +Fix detecting when the build Python in a cross-build is a pydebug build. diff --git a/Tools/wasm/wasi/__main__.py b/Tools/wasm/wasi/__main__.py index ba5faeb9e20c66..54ccc95157d57d 100644 --- a/Tools/wasm/wasi/__main__.py +++ b/Tools/wasm/wasi/__main__.py @@ -112,7 +112,7 @@ def call(command, *, quiet, **kwargs): def build_platform(): """The name of the build/host platform.""" - # Can also be found via `config.guess`.` + # Can also be found via `config.guess`. return sysconfig.get_config_var("BUILD_GNU_TYPE") @@ -128,6 +128,15 @@ def build_python_path(): return binary +def build_python_is_pydebug(): + """Find out if the build Python is a pydebug build.""" + test = "import sys, test.support; sys.exit(test.support.Py_DEBUG)" + result = subprocess.run([build_python_path(), "-c", test], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + return bool(result.returncode) + + @subdir(BUILD_DIR, clean_ok=True) def configure_build_python(context, working_dir): """Configure the build/host Python.""" @@ -214,18 +223,15 @@ def configure_wasi_python(context, working_dir): lib_dirs = list(python_build_dir.glob("lib.*")) assert len(lib_dirs) == 1, f"Expected a single lib.* directory in {python_build_dir}" lib_dir = os.fsdecode(lib_dirs[0]) - pydebug = lib_dir.endswith("-pydebug") - python_version = lib_dir.removesuffix("-pydebug").rpartition("-")[-1] - sysconfig_data = f"{wasi_build_dir}/build/lib.wasi-wasm32-{python_version}" - if pydebug: - sysconfig_data += "-pydebug" + python_version = lib_dir.rpartition("-")[-1] + sysconfig_data_dir = f"{wasi_build_dir}/build/lib.wasi-wasm32-{python_version}" # Use PYTHONPATH to include sysconfig data which must be anchored to the # WASI guest's `/` directory. args = {"GUEST_DIR": "/", "HOST_DIR": CHECKOUT, "ENV_VAR_NAME": "PYTHONPATH", - "ENV_VAR_VALUE": f"/{sysconfig_data}", + "ENV_VAR_VALUE": f"/{sysconfig_data_dir}", "PYTHON_WASM": working_dir / "python.wasm"} # Check dynamically for wasmtime in case it was specified manually via # `--host-runner`. @@ -245,7 +251,7 @@ def configure_wasi_python(context, working_dir): f"--host={context.host_triple}", f"--build={build_platform()}", f"--with-build-python={build_python}"] - if pydebug: + if build_python_is_pydebug(): configure.append("--with-pydebug") if context.args: configure.extend(context.args) From a3d0306ca08d09b1417d72df62e3b9d775143031 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Thu, 22 May 2025 23:54:56 +0100 Subject: [PATCH 307/989] gh-128307: Update docs for asyncio.create_task, TaskGroup.create_task, asyncio.create_task (#134202) --- Doc/library/asyncio-eventloop.rst | 25 +++++++++++++++++++++++-- Doc/library/asyncio-task.rst | 21 +++++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/Doc/library/asyncio-eventloop.rst b/Doc/library/asyncio-eventloop.rst index 21f7d0547af1dd..91970c282391f7 100644 --- a/Doc/library/asyncio-eventloop.rst +++ b/Doc/library/asyncio-eventloop.rst @@ -361,7 +361,7 @@ Creating Futures and Tasks .. versionadded:: 3.5.2 -.. method:: loop.create_task(coro, *, name=None, context=None, eager_start=None) +.. method:: loop.create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Schedule the execution of :ref:`coroutine ` *coro*. Return a :class:`Task` object. @@ -370,6 +370,10 @@ Creating Futures and Tasks for interoperability. In this case, the result type is a subclass of :class:`Task`. + The full function signature is largely the same as that of the + :class:`Task` constructor (or factory) - all of the keyword arguments to + this function are passed through to that interface. + If the *name* argument is provided and not ``None``, it is set as the name of the task using :meth:`Task.set_name`. @@ -388,8 +392,15 @@ Creating Futures and Tasks .. versionchanged:: 3.11 Added the *context* parameter. + .. versionchanged:: 3.13.3 + Added ``kwargs`` which passes on arbitrary extra parameters, including ``name`` and ``context``. + + .. versionchanged:: 3.13.4 + Rolled back the change that passes on *name* and *context* (if it is None), + while still passing on other arbitrary keyword arguments (to avoid breaking backwards compatibility with 3.13.3). + .. versionchanged:: 3.14 - Added the *eager_start* parameter. + All *kwargs* are now passed on. The *eager_start* parameter works with eager task factories. .. method:: loop.set_task_factory(factory) @@ -402,6 +413,16 @@ Creating Futures and Tasks event loop, and *coro* is a coroutine object. The callable must pass on all *kwargs*, and return a :class:`asyncio.Task`-compatible object. + .. versionchanged:: 3.13.3 + Required that all *kwargs* are passed on to :class:`asyncio.Task`. + + .. versionchanged:: 3.13.4 + *name* is no longer passed to task factories. *context* is no longer passed + to task factories if it is ``None``. + + .. versionchanged:: 3.14 + *name* and *context* are now unconditionally passed on to task factories again. + .. method:: loop.get_task_factory() Return a task factory or ``None`` if the default one is in use. diff --git a/Doc/library/asyncio-task.rst b/Doc/library/asyncio-task.rst index 59acce1990ae04..b19ffa8213a971 100644 --- a/Doc/library/asyncio-task.rst +++ b/Doc/library/asyncio-task.rst @@ -238,18 +238,24 @@ Creating Tasks ----------------------------------------------- -.. function:: create_task(coro, *, name=None, context=None) +.. function:: create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Wrap the *coro* :ref:`coroutine ` into a :class:`Task` and schedule its execution. Return the Task object. - If *name* is not ``None``, it is set as the name of the task using - :meth:`Task.set_name`. + The full function signature is largely the same as that of the + :class:`Task` constructor (or factory) - all of the keyword arguments to + this function are passed through to that interface. An optional keyword-only *context* argument allows specifying a custom :class:`contextvars.Context` for the *coro* to run in. The current context copy is created when no *context* is provided. + An optional keyword-only *eager_start* argument allows specifying + if the task should execute eagerly during the call to create_task, + or be scheduled later. If *eager_start* is not passed the mode set + by :meth:`loop.set_task_factory` will be used. + The task is executed in the loop returned by :func:`get_running_loop`, :exc:`RuntimeError` is raised if there is no running loop in current thread. @@ -290,6 +296,9 @@ Creating Tasks .. versionchanged:: 3.11 Added the *context* parameter. + .. versionchanged:: 3.14 + Added the *eager_start* parameter by passing on all *kwargs*. + Task Cancellation ================= @@ -330,7 +339,7 @@ and reliable way to wait for all tasks in the group to finish. .. versionadded:: 3.11 - .. method:: create_task(coro, *, name=None, context=None) + .. method:: create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Create a task in this task group. The signature matches that of :func:`asyncio.create_task`. @@ -342,6 +351,10 @@ and reliable way to wait for all tasks in the group to finish. Close the given coroutine if the task group is not active. + .. versionchanged:: 3.14 + + Passes on all *kwargs* to :meth:`loop.create_task` + Example:: async def main(): From a7ed9dfcbe1436361b95a5f9d89a2d1f075e5468 Mon Sep 17 00:00:00 2001 From: Cheery Date: Fri, 23 May 2025 01:37:20 +0100 Subject: [PATCH 308/989] Docs: fix link in free-threading-python.rst (#134548) --- Doc/howto/free-threading-python.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/free-threading-python.rst b/Doc/howto/free-threading-python.rst index f7a894ac2cd78e..c33cef2c8e935b 100644 --- a/Doc/howto/free-threading-python.rst +++ b/Doc/howto/free-threading-python.rst @@ -32,7 +32,7 @@ optionally support installing free-threaded Python binaries. The installers are available at https://www.python.org/downloads/. For information on other platforms, see the `Installing a Free-Threaded Python -`_, a +`_, a community-maintained installation guide for installing free-threaded Python. When building CPython from source, the :option:`--disable-gil` configure option From 8dbc11971974a725dc8a11c0dc65d8f6fcb4d902 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Thu, 22 May 2025 20:30:10 -0700 Subject: [PATCH 309/989] gh-133885: Use locks instead of critical sections for _zstd (gh-134289) Move from using critical sections to locks for the (de)compression methods. Since the methods allow other threads to run, we should use a lock rather than a critical section. --- Lib/test/test_zstd.py | 56 +++++++++- Modules/_zstd/clinic/decompressor.c.h | 11 +- Modules/_zstd/clinic/zstddict.c.h | 27 +---- Modules/_zstd/compressor.c | 151 ++++++++++++++------------ Modules/_zstd/decompressor.c | 150 +++++++++++++------------ Modules/_zstd/zstddict.c | 13 ++- Modules/_zstd/zstddict.h | 3 + 7 files changed, 229 insertions(+), 182 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 53ca592ea38828..084f8f24fc009c 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -2430,10 +2430,8 @@ def test_buffer_protocol(self): self.assertEqual(f.write(arr), LENGTH) self.assertEqual(f.tell(), LENGTH) -@unittest.skip("it fails for now, see gh-133885") class FreeThreadingMethodTests(unittest.TestCase): - @unittest.skipUnless(Py_GIL_DISABLED, 'this test can only possibly fail with GIL disabled') @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_compress_locking(self): @@ -2470,7 +2468,6 @@ def run_method(method, input_data, output_data): actual = b''.join(output) + rest2 self.assertEqual(expected, actual) - @unittest.skipUnless(Py_GIL_DISABLED, 'this test can only possibly fail with GIL disabled') @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_decompress_locking(self): @@ -2506,6 +2503,59 @@ def run_method(method, input_data, output_data): actual = b''.join(output) self.assertEqual(expected, actual) + @threading_helper.reap_threads + @threading_helper.requires_working_threading() + def test_compress_shared_dict(self): + num_threads = 8 + + def run_method(b): + level = threading.get_ident() % 4 + # sync threads to increase chance of contention on + # capsule storing dictionary levels + b.wait() + ZstdCompressor(level=level, + zstd_dict=TRAINED_DICT.as_digested_dict) + b.wait() + ZstdCompressor(level=level, + zstd_dict=TRAINED_DICT.as_undigested_dict) + b.wait() + ZstdCompressor(level=level, + zstd_dict=TRAINED_DICT.as_prefix) + threads = [] + + b = threading.Barrier(num_threads) + for i in range(num_threads): + thread = threading.Thread(target=run_method, args=(b,)) + + threads.append(thread) + + with threading_helper.start_threads(threads): + pass + + @threading_helper.reap_threads + @threading_helper.requires_working_threading() + def test_decompress_shared_dict(self): + num_threads = 8 + + def run_method(b): + # sync threads to increase chance of contention on + # decompression dictionary + b.wait() + ZstdDecompressor(zstd_dict=TRAINED_DICT.as_digested_dict) + b.wait() + ZstdDecompressor(zstd_dict=TRAINED_DICT.as_undigested_dict) + b.wait() + ZstdDecompressor(zstd_dict=TRAINED_DICT.as_prefix) + threads = [] + + b = threading.Barrier(num_threads) + for i in range(num_threads): + thread = threading.Thread(target=run_method, args=(b,)) + + threads.append(thread) + + with threading_helper.start_threads(threads): + pass if __name__ == "__main__": diff --git a/Modules/_zstd/clinic/decompressor.c.h b/Modules/_zstd/clinic/decompressor.c.h index 4ecb19e9bde6ed..c6fdae74ab0447 100644 --- a/Modules/_zstd/clinic/decompressor.c.h +++ b/Modules/_zstd/clinic/decompressor.c.h @@ -7,7 +7,6 @@ preserve # include "pycore_runtime.h" // _Py_ID() #endif #include "pycore_abstract.h" // _PyNumber_Index() -#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_zstd_ZstdDecompressor_new__doc__, @@ -114,13 +113,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self); static PyObject * _zstd_ZstdDecompressor_unused_data_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); } PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, @@ -227,4 +220,4 @@ _zstd_ZstdDecompressor_decompress(PyObject *self, PyObject *const *args, Py_ssiz return return_value; } -/*[clinic end generated code: output=7a4d278f9244e684 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=30c12ef047027ede input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h index 34e0e4b3ecfe72..aaa29e491bc1bb 100644 --- a/Modules/_zstd/clinic/zstddict.c.h +++ b/Modules/_zstd/clinic/zstddict.c.h @@ -6,7 +6,6 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif -#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_zstd_ZstdDict_new__doc__, @@ -118,13 +117,7 @@ _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); } PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__, @@ -156,13 +149,7 @@ _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); } PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__, @@ -194,12 +181,6 @@ _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); } -/*[clinic end generated code: output=bfb31c1187477afd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8692eabee4e0d1fe input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 38baee2be1e95b..8f934858ef784f 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -17,6 +17,7 @@ class _zstd.ZstdCompressor "ZstdCompressor *" "&zstd_compressor_type_spec" #include "_zstdmodule.h" #include "buffer.h" #include "zstddict.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked #include // offsetof() #include // ZSTD_*() @@ -38,6 +39,9 @@ typedef struct { /* Compression level */ int compression_level; + + /* Lock to protect the compression context */ + PyMutex lock; } ZstdCompressor; #define ZstdCompressor_CAST(op) ((ZstdCompressor *)op) @@ -149,12 +153,12 @@ capsule_free_cdict(PyObject *capsule) ZSTD_CDict * _get_CDict(ZstdDict *self, int compressionLevel) { + assert(PyMutex_IsLocked(&self->lock)); PyObject *level = NULL; - PyObject *capsule; + PyObject *capsule = NULL; ZSTD_CDict *cdict; + int ret; - // TODO(emmatyping): refactor critical section code into a lock_held function - Py_BEGIN_CRITICAL_SECTION(self); /* int level object */ level = PyLong_FromLong(compressionLevel); @@ -163,12 +167,11 @@ _get_CDict(ZstdDict *self, int compressionLevel) } /* Get PyCapsule object from self->c_dicts */ - capsule = PyDict_GetItemWithError(self->c_dicts, level); + ret = PyDict_GetItemRef(self->c_dicts, level, &capsule); + if (ret < 0) { + goto error; + } if (capsule == NULL) { - if (PyErr_Occurred()) { - goto error; - } - /* Create ZSTD_CDict instance */ char *dict_buffer = PyBytes_AS_STRING(self->dict_content); Py_ssize_t dict_len = Py_SIZE(self->dict_content); @@ -196,11 +199,10 @@ _get_CDict(ZstdDict *self, int compressionLevel) } /* Add PyCapsule object to self->c_dicts */ - if (PyDict_SetItem(self->c_dicts, level, capsule) < 0) { - Py_DECREF(capsule); + ret = PyDict_SetItem(self->c_dicts, level, capsule); + if (ret < 0) { goto error; } - Py_DECREF(capsule); } else { /* ZSTD_CDict instance already exists */ @@ -212,15 +214,55 @@ _get_CDict(ZstdDict *self, int compressionLevel) cdict = NULL; success: Py_XDECREF(level); - Py_END_CRITICAL_SECTION(); + Py_XDECREF(capsule); return cdict; } static int -_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) +_zstd_load_impl(ZstdCompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) { - size_t zstd_ret; + if (type == DICT_TYPE_DIGESTED) { + /* Get ZSTD_CDict */ + ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level); + if (c_dict == NULL) { + return -1; + } + /* Reference a prepared dictionary. + It overrides some compression context's parameters. */ + zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict); + } + else if (type == DICT_TYPE_UNDIGESTED) { + /* Load a dictionary. + It doesn't override compression context's parameters. */ + zstd_ret = ZSTD_CCtx_loadDictionary( + self->cctx, + PyBytes_AS_STRING(zd->dict_content), + Py_SIZE(zd->dict_content)); + } + else if (type == DICT_TYPE_PREFIX) { + /* Load a prefix */ + zstd_ret = ZSTD_CCtx_refPrefix( + self->cctx, + PyBytes_AS_STRING(zd->dict_content), + Py_SIZE(zd->dict_content)); + } + else { + Py_UNREACHABLE(); + } + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_zstd_error(mod_state, ERR_LOAD_C_DICT, zstd_ret); + return -1; + } + return 0; +} + +static int +_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) +{ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; @@ -237,7 +279,10 @@ _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) /* When compressing, use undigested dictionary by default. */ zd = (ZstdDict*)dict; type = DICT_TYPE_UNDIGESTED; - goto load; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; } /* Check (ZstdDict, type) */ @@ -257,7 +302,10 @@ _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { assert(type >= 0); zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; } } } @@ -266,49 +314,6 @@ _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) PyErr_SetString(PyExc_TypeError, "zstd_dict argument should be ZstdDict object."); return -1; - -load: - if (type == DICT_TYPE_DIGESTED) { - /* Get ZSTD_CDict */ - ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level); - if (c_dict == NULL) { - return -1; - } - /* Reference a prepared dictionary. - It overrides some compression context's parameters. */ - Py_BEGIN_CRITICAL_SECTION(self); - zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict); - Py_END_CRITICAL_SECTION(); - } - else if (type == DICT_TYPE_UNDIGESTED) { - /* Load a dictionary. - It doesn't override compression context's parameters. */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_loadDictionary( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); - } - else if (type == DICT_TYPE_PREFIX) { - /* Load a prefix */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_refPrefix( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); - } - else { - Py_UNREACHABLE(); - } - - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(mod_state, ERR_LOAD_C_DICT, zstd_ret); - return -1; - } - return 0; } /*[clinic input] @@ -339,6 +344,7 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, self->use_multithread = 0; self->dict = NULL; + self->lock = (PyMutex){0}; /* Compression context */ self->cctx = ZSTD_createCCtx(); @@ -403,6 +409,8 @@ ZstdCompressor_dealloc(PyObject *ob) ZSTD_freeCCtx(self->cctx); } + assert(!PyMutex_IsLocked(&self->lock)); + /* Py_XDECREF the dict after free the compression context */ Py_CLEAR(self->dict); @@ -412,9 +420,10 @@ ZstdCompressor_dealloc(PyObject *ob) } static PyObject * -compress_impl(ZstdCompressor *self, Py_buffer *data, - ZSTD_EndDirective end_directive) +compress_lock_held(ZstdCompressor *self, Py_buffer *data, + ZSTD_EndDirective end_directive) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; @@ -495,8 +504,9 @@ mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) #endif static PyObject * -compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) +compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; @@ -529,7 +539,7 @@ compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) goto error; } - /* Like compress_impl(), output as much as possible. */ + /* Like compress_lock_held(), output as much as possible. */ if (out.pos == out.size) { if (_OutputBuffer_Grow(&buffer, &out) < 0) { goto error; @@ -588,14 +598,14 @@ _zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, } /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); + PyMutex_Lock(&self->lock); /* Compress */ if (self->use_multithread && mode == ZSTD_e_continue) { - ret = compress_mt_continue_impl(self, data); + ret = compress_mt_continue_lock_held(self, data); } else { - ret = compress_impl(self, data, mode); + ret = compress_lock_held(self, data, mode); } if (ret) { @@ -607,7 +617,7 @@ _zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, /* Resetting cctx's session never fail */ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); } - Py_END_CRITICAL_SECTION(); + PyMutex_Unlock(&self->lock); return ret; } @@ -642,8 +652,9 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) } /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); - ret = compress_impl(self, NULL, mode); + PyMutex_Lock(&self->lock); + + ret = compress_lock_held(self, NULL, mode); if (ret) { self->last_mode = mode; @@ -654,7 +665,7 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) /* Resetting cctx's session never fail */ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); } - Py_END_CRITICAL_SECTION(); + PyMutex_Unlock(&self->lock); return ret; } diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 58f9c9f804e549..e299f73b071353 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -17,6 +17,7 @@ class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec" #include "_zstdmodule.h" #include "buffer.h" #include "zstddict.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked #include // bool #include // offsetof() @@ -45,6 +46,9 @@ typedef struct { /* For ZstdDecompressor, 0 or 1. 1 means the end of the first frame has been reached. */ bool eof; + + /* Lock to protect the decompression context */ + PyMutex lock; } ZstdDecompressor; #define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op) @@ -54,6 +58,7 @@ typedef struct { static inline ZSTD_DDict * _get_DDict(ZstdDict *self) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_DDict *ret; /* Already created */ @@ -61,15 +66,14 @@ _get_DDict(ZstdDict *self) return self->d_dict; } - Py_BEGIN_CRITICAL_SECTION(self); if (self->d_dict == NULL) { /* Create ZSTD_DDict instance from dictionary content */ char *dict_buffer = PyBytes_AS_STRING(self->dict_content); Py_ssize_t dict_len = Py_SIZE(self->dict_content); Py_BEGIN_ALLOW_THREADS - self->d_dict = ZSTD_createDDict(dict_buffer, - dict_len); + ret = ZSTD_createDDict(dict_buffer, dict_len); Py_END_ALLOW_THREADS + self->d_dict = ret; if (self->d_dict == NULL) { _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); @@ -81,11 +85,7 @@ _get_DDict(ZstdDict *self) } } - /* Don't lose any exception */ - ret = self->d_dict; - Py_END_CRITICAL_SECTION(); - - return ret; + return self->d_dict; } /* Set decompression parameters to decompression context */ @@ -134,9 +134,7 @@ _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) } /* Set parameter to compression context */ - Py_BEGIN_CRITICAL_SECTION(self); zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); - Py_END_CRITICAL_SECTION(); /* Check error */ if (ZSTD_isError(zstd_ret)) { @@ -147,11 +145,53 @@ _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) return 0; } +static int +_zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) +{ + size_t zstd_ret; + if (type == DICT_TYPE_DIGESTED) { + /* Get ZSTD_DDict */ + ZSTD_DDict *d_dict = _get_DDict(zd); + if (d_dict == NULL) { + return -1; + } + /* Reference a prepared dictionary */ + zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict); + } + else if (type == DICT_TYPE_UNDIGESTED) { + /* Load a dictionary */ + zstd_ret = ZSTD_DCtx_loadDictionary( + self->dctx, + PyBytes_AS_STRING(zd->dict_content), + Py_SIZE(zd->dict_content)); + } + else if (type == DICT_TYPE_PREFIX) { + /* Load a prefix */ + zstd_ret = ZSTD_DCtx_refPrefix( + self->dctx, + PyBytes_AS_STRING(zd->dict_content), + Py_SIZE(zd->dict_content)); + } + else { + /* Impossible code path */ + PyErr_SetString(PyExc_SystemError, + "load_d_dict() impossible code path"); + return -1; + } + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_zstd_error(mod_state, ERR_LOAD_D_DICT, zstd_ret); + return -1; + } + return 0; +} + /* Load dictionary or prefix to decompression context */ static int _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) { - size_t zstd_ret; _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; @@ -168,7 +208,10 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) /* When decompressing, use digested dictionary by default. */ zd = (ZstdDict*)dict; type = DICT_TYPE_DIGESTED; - goto load; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; } /* Check (ZstdDict, type) */ @@ -188,7 +231,10 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) { assert(type >= 0); zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; } } } @@ -197,50 +243,6 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) PyErr_SetString(PyExc_TypeError, "zstd_dict argument should be ZstdDict object."); return -1; - -load: - if (type == DICT_TYPE_DIGESTED) { - /* Get ZSTD_DDict */ - ZSTD_DDict *d_dict = _get_DDict(zd); - if (d_dict == NULL) { - return -1; - } - /* Reference a prepared dictionary */ - Py_BEGIN_CRITICAL_SECTION(self); - zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict); - Py_END_CRITICAL_SECTION(); - } - else if (type == DICT_TYPE_UNDIGESTED) { - /* Load a dictionary */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_loadDictionary( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); - } - else if (type == DICT_TYPE_PREFIX) { - /* Load a prefix */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_refPrefix( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); - } - else { - /* Impossible code path */ - PyErr_SetString(PyExc_SystemError, - "load_d_dict() impossible code path"); - return -1; - } - - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(mod_state, ERR_LOAD_D_DICT, zstd_ret); - return -1; - } - return 0; } /* @@ -268,8 +270,8 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) Note, decompressing "an empty input" in any case will make it > 0. */ static PyObject * -decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - Py_ssize_t max_length) +decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in, + Py_ssize_t max_length) { size_t zstd_ret; ZSTD_outBuffer out; @@ -339,10 +341,9 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, } static void -decompressor_reset_session(ZstdDecompressor *self) +decompressor_reset_session_lock_held(ZstdDecompressor *self) { - // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here - // and ensure lock is always held + assert(PyMutex_IsLocked(&self->lock)); /* Reset variables */ self->in_begin = 0; @@ -359,8 +360,10 @@ decompressor_reset_session(ZstdDecompressor *self) } static PyObject * -stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length) +stream_decompress_lock_held(ZstdDecompressor *self, Py_buffer *data, + Py_ssize_t max_length) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; PyObject *ret = NULL; int use_input_buffer; @@ -456,7 +459,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length assert(in.pos == 0); /* Decompress */ - ret = decompress_impl(self, &in, max_length); + ret = decompress_lock_held(self, &in, max_length); if (ret == NULL) { goto error; } @@ -517,7 +520,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length error: /* Reset decompressor's states/session */ - decompressor_reset_session(self); + decompressor_reset_session_lock_held(self); Py_CLEAR(ret); return NULL; @@ -555,6 +558,7 @@ _zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, self->unused_data = NULL; self->eof = 0; self->dict = NULL; + self->lock = (PyMutex){0}; /* needs_input flag */ self->needs_input = 1; @@ -608,6 +612,8 @@ ZstdDecompressor_dealloc(PyObject *ob) ZSTD_freeDCtx(self->dctx); } + assert(!PyMutex_IsLocked(&self->lock)); + /* Py_CLEAR the dict after free decompression context */ Py_CLEAR(self->dict); @@ -623,7 +629,6 @@ ZstdDecompressor_dealloc(PyObject *ob) } /*[clinic input] -@critical_section @getter _zstd.ZstdDecompressor.unused_data @@ -635,11 +640,14 @@ decompressed, unused input data after the frame. Otherwise this will be b''. static PyObject * _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) -/*[clinic end generated code: output=f3a20940f11b6b09 input=5233800bef00df04]*/ +/*[clinic end generated code: output=f3a20940f11b6b09 input=54d41ecd681a3444]*/ { PyObject *ret; + PyMutex_Lock(&self->lock); + if (!self->eof) { + PyMutex_Unlock(&self->lock); return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else { @@ -656,6 +664,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) } } + PyMutex_Unlock(&self->lock); return ret; } @@ -693,10 +702,9 @@ _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, { PyObject *ret; /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); - - ret = stream_decompress(self, data, max_length); - Py_END_CRITICAL_SECTION(); + PyMutex_Lock(&self->lock); + ret = stream_decompress_lock_held(self, data, max_length); + PyMutex_Unlock(&self->lock); return ret; } diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index 7df187a6fa69d7..39828c9b36b5c2 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -17,6 +17,7 @@ class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" #include "_zstdmodule.h" #include "zstddict.h" #include "clinic/zstddict.c.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked #include // ZSTD_freeDDict(), ZSTD_getDictID_fromDict() @@ -53,6 +54,7 @@ _zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, self->dict_content = NULL; self->d_dict = NULL; self->dict_id = 0; + self->lock = (PyMutex){0}; /* ZSTD_CDict dict */ self->c_dicts = PyDict_New(); @@ -109,6 +111,8 @@ ZstdDict_dealloc(PyObject *ob) ZSTD_freeDDict(self->d_dict); } + assert(!PyMutex_IsLocked(&self->lock)); + /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ Py_CLEAR(self->dict_content); Py_CLEAR(self->c_dicts); @@ -143,7 +147,6 @@ static PyMemberDef ZstdDict_members[] = { }; /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_digested_dict @@ -160,13 +163,12 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digeste static PyObject * _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=09b086e7a7320dbb input=585448c79f31f74a]*/ +/*[clinic end generated code: output=09b086e7a7320dbb input=10cd2b6165931b77]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_undigested_dict @@ -181,13 +183,12 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undiges static PyObject * _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=43c7a989e6d4253a input=022b0829ffb1c220]*/ +/*[clinic end generated code: output=43c7a989e6d4253a input=11e5f5df690a85b4]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_prefix @@ -202,7 +203,7 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) static PyObject * _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) -/*[clinic end generated code: output=6f7130c356595a16 input=09fb82a6a5407e87]*/ +/*[clinic end generated code: output=6f7130c356595a16 input=b028e0ae6ec4292b]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); } diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h index e8a55a3670b869..dcba0f21852087 100644 --- a/Modules/_zstd/zstddict.h +++ b/Modules/_zstd/zstddict.h @@ -19,6 +19,9 @@ typedef struct { PyObject *dict_content; /* Dictionary id */ uint32_t dict_id; + + /* Lock to protect the digested dictionaries */ + PyMutex lock; } ZstdDict; #endif // !ZSTD_DICT_H From 5804ee7b467d86131be3ff7d569443efb0d0f9fd Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Fri, 23 May 2025 10:03:16 +0530 Subject: [PATCH 310/989] gh-114177: avoid calling connection lost callbacks when loop is already closed in asyncio subprocess (#134508) --- Lib/asyncio/base_subprocess.py | 7 ++++++- .../Library/2025-05-22-13-10-32.gh-issue-114177.3TYUJ3.rst | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-22-13-10-32.gh-issue-114177.3TYUJ3.rst diff --git a/Lib/asyncio/base_subprocess.py b/Lib/asyncio/base_subprocess.py index 9c2ba679ce2bf1..d40af422e614c1 100644 --- a/Lib/asyncio/base_subprocess.py +++ b/Lib/asyncio/base_subprocess.py @@ -104,7 +104,12 @@ def close(self): for proto in self._pipes.values(): if proto is None: continue - proto.pipe.close() + # See gh-114177 + # skip closing the pipe if loop is already closed + # this can happen e.g. when loop is closed immediately after + # process is killed + if self._loop and not self._loop.is_closed(): + proto.pipe.close() if (self._proc is not None and # has the child process finished? diff --git a/Misc/NEWS.d/next/Library/2025-05-22-13-10-32.gh-issue-114177.3TYUJ3.rst b/Misc/NEWS.d/next/Library/2025-05-22-13-10-32.gh-issue-114177.3TYUJ3.rst new file mode 100644 index 00000000000000..c98fde5fb04f5c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-22-13-10-32.gh-issue-114177.3TYUJ3.rst @@ -0,0 +1 @@ +Fix :mod:`asyncio` to not close subprocess pipes which would otherwise error out when the event loop is already closed. From f9324cb3cb4d9bb9f0aef2e48b8afa895bde4b0d Mon Sep 17 00:00:00 2001 From: Evgeny Demchenko Date: Fri, 23 May 2025 08:15:21 +0300 Subject: [PATCH 311/989] gh-134451: Converted `asyncio.tools.CycleFoundException` from dataclass to a regular exception type. (#134513) --- Lib/asyncio/tools.py | 12 +++++++++--- .../2025-05-22-14-12-53.gh-issue-134451.M1rD-j.rst | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-22-14-12-53.gh-issue-134451.M1rD-j.rst diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py index bf1cb5e64cbfbe..b2da7d2f6ba10c 100644 --- a/Lib/asyncio/tools.py +++ b/Lib/asyncio/tools.py @@ -13,11 +13,17 @@ class NodeType(Enum): TASK = 2 -@dataclass(frozen=True) class CycleFoundException(Exception): """Raised when there is a cycle when drawing the call tree.""" - cycles: list[list[int]] - id2name: dict[int, str] + def __init__( + self, + cycles: list[list[int]], + id2name: dict[int, str], + ) -> None: + super().__init__(cycles, id2name) + self.cycles = cycles + self.id2name = id2name + # ─── indexing helpers ─────────────────────────────────────────── diff --git a/Misc/NEWS.d/next/Library/2025-05-22-14-12-53.gh-issue-134451.M1rD-j.rst b/Misc/NEWS.d/next/Library/2025-05-22-14-12-53.gh-issue-134451.M1rD-j.rst new file mode 100644 index 00000000000000..3c8339f8842254 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-22-14-12-53.gh-issue-134451.M1rD-j.rst @@ -0,0 +1 @@ +Converted ``asyncio.tools.CycleFoundException`` from dataclass to a regular exception type. From 366d95d86239e62baf99b9cedb461e64d5dce057 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Fri, 23 May 2025 10:49:41 +0530 Subject: [PATCH 312/989] gh-133372: remove out of date todos from types module about generator wrapper (#134563) --- Lib/types.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/types.py b/Lib/types.py index 6efac3394345a5..cf0549315a7814 100644 --- a/Lib/types.py +++ b/Lib/types.py @@ -250,7 +250,6 @@ def deleter(self, fdel): class _GeneratorWrapper: - # TODO: Implement this in C. def __init__(self, gen): self.__wrapped = gen self.__isgen = gen.__class__ is GeneratorType @@ -305,7 +304,6 @@ def coroutine(func): # Check if 'func' is a generator function. # (0x20 == CO_GENERATOR) if co_flags & 0x20: - # TODO: Implement this in C. co = func.__code__ # 0x100 == CO_ITERABLE_COROUTINE func.__code__ = co.replace(co_flags=co.co_flags | 0x100) From da9b5c1c9c6ecea399b26175a2d048051d21aa04 Mon Sep 17 00:00:00 2001 From: Blaise Pabon Date: Fri, 23 May 2025 05:44:15 -0400 Subject: [PATCH 313/989] gh-106318: Add example for `str.center()` (#134518) --- Doc/library/stdtypes.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 3486a18b5cb1f0..ce613476854110 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1788,8 +1788,14 @@ expression support in the :mod:`re` module). Return centered in a string of length *width*. Padding is done using the specified *fillchar* (default is an ASCII space). The original string is - returned if *width* is less than or equal to ``len(s)``. - + returned if *width* is less than or equal to ``len(s)``. For example:: + + >>> 'Python'.center(10) + ' Python ' + >>> 'Python'.center(10, '-') + '--Python--' + >>> 'Python'.center(4) + 'Python' .. method:: str.count(sub[, start[, end]]) From 17294680167d4b61dbf3d705f07b213b7c58e2dd Mon Sep 17 00:00:00 2001 From: Blaise Pabon Date: Fri, 23 May 2025 05:47:11 -0400 Subject: [PATCH 314/989] gh-106318: Add example for `str.count()` (#134519) --- Doc/library/stdtypes.rst | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index ce613476854110..31d71031bca12c 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1805,8 +1805,18 @@ expression support in the :mod:`re` module). interpreted as in slice notation. If *sub* is empty, returns the number of empty strings between characters - which is the length of the string plus one. - + which is the length of the string plus one. For example:: + + >>> 'spam, spam, spam'.count('spam') + 3 + >>> 'spam, spam, spam'.count('spam', 5) + 2 + >>> 'spam, spam, spam'.count('spam', 5, 10) + 1 + >>> 'spam, spam, spam'.count('eggs') + 0 + >>> 'spam, spam, spam'.count('') + 17 .. method:: str.encode(encoding="utf-8", errors="strict") From 99a9ab1c64dce26c0f2dce7621b4d7f75da69856 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 23 May 2025 11:17:18 +0100 Subject: [PATCH 315/989] Further improves Advanced installation docs for PyManager (GH-134541) --- Doc/using/windows.rst | 47 +++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index cd22148b531b84..9628da3d2f6b12 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -504,6 +504,14 @@ configuration option. installing and uninstalling. +.. _Add-AppxPackage: https://learn.microsoft.com/powershell/module/appx/add-appxpackage + +.. _Remove-AppxPackage: https://learn.microsoft.com/powershell/module/appx/remove-appxpackage + +.. _Add-AppxProvisionedPackage: https://learn.microsoft.com/powershell/module/dism/add-appxprovisionedpackage + +.. _PackageManager: https://learn.microsoft.com/uwp/api/windows.management.deployment.packagemanager + .. _pymanager-advancedinstall: Advanced Installation @@ -559,12 +567,10 @@ downloaded MSIX can be installed by launching or using the commands below. .. code-block:: powershell - $> winget download 9NQ7512CXL7T -e --skip-license --accept-package-agreements --disable-interactivity + $> winget download 9NQ7512CXL7T -e --skip-license --accept-package-agreements --accept-source-agreements To programmatically install or uninstall an MSIX using only PowerShell, the -`Add-AppxPackage `_ -and `Remove-AppxPackage `_ -PowerShell cmdlets are recommended: +`Add-AppxPackage`_ and `Remove-AppxPackage`_ PowerShell cmdlets are recommended: .. code-block:: powershell @@ -572,18 +578,33 @@ PowerShell cmdlets are recommended: ... $> Get-AppxPackage PythonSoftwareFoundation.PythonManager | Remove-AppxPackage -The native APIs for package management may be found on the Windows -`PackageManager `_ -class. The :func:`!AddPackageAsync` method installs for the current user, or use -:func:`!StagePackageAsync` followed by :func:`!ProvisionPackageForAllUsersAsync` -to install the Python install manager for all users from the MSIX package. Users -will still need to install their own copies of Python itself, as there is no way -to trigger those installs without being a logged in user. +The latest release can be downloaded and installed by Windows by passing the +AppInstaller file to the Add-AppxPackage command. This installs using the MSIX +on python.org, and is only recommended for cases where installing via the Store +(interactively or using WinGet) is not possible. + +.. code-block:: powershell + + $> Add-AppxPackage -AppInstallerFile https://www.python.org/ftp/python/pymanager/pymanager.appinstaller + +Other tools and APIs may also be used to provision an MSIX package for all users +on a machine, but Python does not consider this a supported scenario. We suggest +looking into the PowerShell `Add-AppxProvisionedPackage`_ cmdlet, the native +Windows `PackageManager`_ class, or the documentation and support for your +deployment tool. + +Regardless of the install method, users will still need to install their own +copies of Python itself, as there is no way to trigger those installs without +being a logged in user. When using the MSIX, the latest version of Python will +be available for all users to install without network access. Note that the MSIX downloadable from the Store and from the Python website are subtly different and cannot be installed at the same time. Wherever possible, -we suggest using the above commands to download the package from the Store to -reduce the risk of setting up conflicting installs. +we suggest using the above WinGet commands to download the package from the +Store to reduce the risk of setting up conflicting installs. There are no +licensing restrictions on the Python install manager that would prevent using +the Store package in this way. + .. _pymanager-admin-config: From b8f55266bf873bc101c3aab7aa868b909ecbcb99 Mon Sep 17 00:00:00 2001 From: Collin Funk Date: Fri, 23 May 2025 04:11:04 -0700 Subject: [PATCH 316/989] gh-134486: Fix missing alloca() symbol in _ctypes on NetBSD (#134487) Previously the module would fail to load because the `alloca()` symbol was undefined. Now we check for GCC/Clang builtins for systems who do not define `alloca()` in headers. --- .../2025-05-21-22-13-30.gh-issue-134486.yvdL6f.rst | 3 +++ Modules/_ctypes/callbacks.c | 9 --------- Modules/_ctypes/callproc.c | 8 -------- Modules/_ctypes/ctypes.h | 14 +++++++++++++- 4 files changed, 16 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-05-21-22-13-30.gh-issue-134486.yvdL6f.rst diff --git a/Misc/NEWS.d/next/Build/2025-05-21-22-13-30.gh-issue-134486.yvdL6f.rst b/Misc/NEWS.d/next/Build/2025-05-21-22-13-30.gh-issue-134486.yvdL6f.rst new file mode 100644 index 00000000000000..2754e61f0182b5 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-05-21-22-13-30.gh-issue-134486.yvdL6f.rst @@ -0,0 +1,3 @@ +The :mod:`ctypes` module now performs a more portable test for the +definition of :manpage:`alloca(3)`, fixing a compilation failure on +NetBSD. diff --git a/Modules/_ctypes/callbacks.c b/Modules/_ctypes/callbacks.c index ec113e41d16323..fd508ae61f2e04 100644 --- a/Modules/_ctypes/callbacks.c +++ b/Modules/_ctypes/callbacks.c @@ -11,18 +11,9 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_runtime.h" // _Py_ID() -#ifdef MS_WIN32 -# include -#endif - #include #include "ctypes.h" -#ifdef HAVE_ALLOCA_H -/* AIX needs alloca.h for alloca() */ -#include -#endif - /**************************************************************/ static int diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index 9f5ad9f57b75e2..65a0f14e2b076c 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -77,16 +77,8 @@ module _ctypes #include #endif -#ifdef MS_WIN32 -#include -#endif - #include #include "ctypes.h" -#ifdef HAVE_ALLOCA_H -/* AIX needs alloca.h for alloca() */ -#include -#endif #ifdef _Py_MEMORY_SANITIZER #include diff --git a/Modules/_ctypes/ctypes.h b/Modules/_ctypes/ctypes.h index 2d859ed63e1758..6a45c11e61af5c 100644 --- a/Modules/_ctypes/ctypes.h +++ b/Modules/_ctypes/ctypes.h @@ -1,5 +1,17 @@ -#if defined (__SVR4) && defined (__sun) +/* Get a definition of alloca(). */ +#if (defined (__SVR4) && defined (__sun)) || defined(HAVE_ALLOCA_H) # include +#elif defined(MS_WIN32) +# include +#endif + +/* If the system does not define alloca(), we have to hope for a compiler builtin. */ +#ifndef alloca +# if defined __GNUC__ || (__clang_major__ >= 4) +# define alloca __builtin_alloca +# else +# error "Could not define alloca() on your platform." +# endif #endif #include From 71dea74865a4f0e065e3dfc1acc70f7eee4f659e Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Fri, 23 May 2025 08:48:45 -0400 Subject: [PATCH 317/989] gh-131798: Small improvements to `remove_unneeded_uops` (GH-134554) Improve remove_unneeded_uops --- Python/optimizer_analysis.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 851e1efa0497af..6a7df233819b9c 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -558,6 +558,7 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = { const bool op_skip[MAX_UOP_ID + 1] = { [_NOP] = true, [_CHECK_VALIDITY] = true, + [_CHECK_PERIODIC] = true, [_SET_IP] = true, }; @@ -617,7 +618,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) while (op_skip[last->opcode]) { last--; } - if (op_without_push[last->opcode]) { + if (op_without_push[last->opcode] && op_without_pop[opcode]) { last->opcode = op_without_push[last->opcode]; opcode = buffer[pc].opcode = op_without_pop[opcode]; if (op_without_pop[last->opcode]) { From fc0c9c24121c1a62b25b79062286f976699b59e9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 23 May 2025 19:58:34 +0300 Subject: [PATCH 318/989] gh-133454: Reduce the number of threads in test_racing_getbuf_and_releasebuf (GH-133458) The original reproducer only used 10 threads. --- Lib/test/test_memoryview.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_memoryview.py b/Lib/test/test_memoryview.py index 61b068c630c7ce..64f440f180bbf0 100644 --- a/Lib/test/test_memoryview.py +++ b/Lib/test/test_memoryview.py @@ -743,19 +743,21 @@ def test_racing_getbuf_and_releasebuf(self): from multiprocessing.managers import SharedMemoryManager except ImportError: self.skipTest("Test requires multiprocessing") - from threading import Thread + from threading import Thread, Event - n = 100 + start = Event() with SharedMemoryManager() as smm: obj = smm.ShareableList(range(100)) - threads = [] - for _ in range(n): - # Issue gh-127085, the `ShareableList.count` is just a convenient way to mess the `exports` - # counter of `memoryview`, this issue has no direct relation with `ShareableList`. - threads.append(Thread(target=obj.count, args=(1,))) - + def test(): + # Issue gh-127085, the `ShareableList.count` is just a + # convenient way to mess the `exports` counter of `memoryview`, + # this issue has no direct relation with `ShareableList`. + start.wait(support.SHORT_TIMEOUT) + for i in range(10): + obj.count(1) + threads = [Thread(target=test) for _ in range(10)] with threading_helper.start_threads(threads): - pass + start.set() del obj From 77eade39f972a4f3d8e9fec00288779f35ceee21 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 23 May 2025 19:59:10 +0300 Subject: [PATCH 319/989] gh-134578: Mark more slow tests (GH-134579) --- Lib/test/test_ast/test_ast.py | 1 + Lib/test/test_capi/test_object.py | 1 + Lib/test/test_collections.py | 2 ++ Lib/test/test_json/test_recursion.py | 1 + Lib/test/test_statistics.py | 1 + 5 files changed, 6 insertions(+) diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 1479a8eafd62ec..46745cfa8f8325 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -3292,6 +3292,7 @@ def check_output(self, source, expect, *flags): expect = self.text_normalize(expect) self.assertEqual(res, expect) + @support.requires_resource('cpu') def test_invocation(self): # test various combinations of parameters base_flags = ( diff --git a/Lib/test/test_capi/test_object.py b/Lib/test/test_capi/test_object.py index cd772120bdee2b..d4056727d07fbf 100644 --- a/Lib/test/test_capi/test_object.py +++ b/Lib/test/test_capi/test_object.py @@ -221,6 +221,7 @@ def test_decref_freed_object(self): """ self.check_negative_refcount(code) + @support.requires_resource('cpu') def test_decref_delayed(self): # gh-130519: Test that _PyObject_XDecRefDelayed() and QSBR code path # handles destructors that are possibly re-entrant or trigger a GC. diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index 1e93530398be79..d9d61e5c2053e3 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -542,6 +542,8 @@ def test_odd_sizes(self): self.assertEqual(Dot(1)._replace(d=999), (999,)) self.assertEqual(Dot(1)._fields, ('d',)) + @support.requires_resource('cpu') + def test_large_size(self): n = support.exceeds_recursion_limit() names = list(set(''.join([choice(string.ascii_letters) for j in range(10)]) for i in range(n))) diff --git a/Lib/test/test_json/test_recursion.py b/Lib/test/test_json/test_recursion.py index 8f0e5e078ed0d4..5d7b56ff9ad285 100644 --- a/Lib/test/test_json/test_recursion.py +++ b/Lib/test/test_json/test_recursion.py @@ -86,6 +86,7 @@ def test_highly_nested_objects_decoding(self): @support.skip_wasi_stack_overflow() @support.skip_emscripten_stack_overflow() + @support.requires_resource('cpu') def test_highly_nested_objects_encoding(self): # See #12051 l, d = [], {} diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 5980f939185965..0dd619dd7c8ceb 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2346,6 +2346,7 @@ def test_mixed_int_and_float(self): class TestKDE(unittest.TestCase): + @support.requires_resource('cpu') def test_kde(self): kde = statistics.kde StatisticsError = statistics.StatisticsError From 393773ae8763202ecf7afff81c4d57bd37c62ff6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 23 May 2025 21:07:49 +0300 Subject: [PATCH 320/989] gh-134565: Use ExceptionGroup to handle multiple errors in unittest.doModuleCleanups() (GH-134566) --- Lib/test/test_unittest/test_result.py | 37 +++++++++++-- Lib/test/test_unittest/test_runner.py | 52 +++++++++++++++++-- Lib/unittest/case.py | 4 +- Lib/unittest/suite.py | 20 +++++-- ...-05-23-10-15-36.gh-issue-134565.zmb66C.rst | 3 ++ 5 files changed, 101 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-23-10-15-36.gh-issue-134565.zmb66C.rst diff --git a/Lib/test/test_unittest/test_result.py b/Lib/test/test_unittest/test_result.py index 9ac4c52449c2ff..3f44e617303f81 100644 --- a/Lib/test/test_unittest/test_result.py +++ b/Lib/test/test_unittest/test_result.py @@ -1282,14 +1282,22 @@ def setUpModule(): suite(result) expected_out = '\nStdout:\ndo cleanup2\ndo cleanup1\n' self.assertEqual(stdout.getvalue(), expected_out) - self.assertEqual(len(result.errors), 1) + self.assertEqual(len(result.errors), 2) description = 'tearDownModule (Module)' test_case, formatted_exc = result.errors[0] self.assertEqual(test_case.description, description) self.assertIn('ValueError: bad cleanup2', formatted_exc) + self.assertNotIn('ExceptionGroup', formatted_exc) self.assertNotIn('TypeError', formatted_exc) self.assertIn(expected_out, formatted_exc) + test_case, formatted_exc = result.errors[1] + self.assertEqual(test_case.description, description) + self.assertIn('TypeError: bad cleanup1', formatted_exc) + self.assertNotIn('ExceptionGroup', formatted_exc) + self.assertNotIn('ValueError', formatted_exc) + self.assertIn(expected_out, formatted_exc) + def testBufferSetUpModule_DoModuleCleanups(self): with captured_stdout() as stdout: result = unittest.TestResult() @@ -1313,22 +1321,34 @@ def setUpModule(): suite(result) expected_out = '\nStdout:\nset up module\ndo cleanup2\ndo cleanup1\n' self.assertEqual(stdout.getvalue(), expected_out) - self.assertEqual(len(result.errors), 2) + self.assertEqual(len(result.errors), 3) description = 'setUpModule (Module)' test_case, formatted_exc = result.errors[0] self.assertEqual(test_case.description, description) self.assertIn('ZeroDivisionError: division by zero', formatted_exc) + self.assertNotIn('ExceptionGroup', formatted_exc) self.assertNotIn('ValueError', formatted_exc) self.assertNotIn('TypeError', formatted_exc) self.assertIn('\nStdout:\nset up module\n', formatted_exc) + test_case, formatted_exc = result.errors[1] self.assertIn(expected_out, formatted_exc) self.assertEqual(test_case.description, description) self.assertIn('ValueError: bad cleanup2', formatted_exc) + self.assertNotIn('ExceptionGroup', formatted_exc) self.assertNotIn('ZeroDivisionError', formatted_exc) self.assertNotIn('TypeError', formatted_exc) self.assertIn(expected_out, formatted_exc) + test_case, formatted_exc = result.errors[2] + self.assertIn(expected_out, formatted_exc) + self.assertEqual(test_case.description, description) + self.assertIn('TypeError: bad cleanup1', formatted_exc) + self.assertNotIn('ExceptionGroup', formatted_exc) + self.assertNotIn('ZeroDivisionError', formatted_exc) + self.assertNotIn('ValueError', formatted_exc) + self.assertIn(expected_out, formatted_exc) + def testBufferTearDownModule_DoModuleCleanups(self): with captured_stdout() as stdout: result = unittest.TestResult() @@ -1355,21 +1375,32 @@ def tearDownModule(): suite(result) expected_out = '\nStdout:\ntear down module\ndo cleanup2\ndo cleanup1\n' self.assertEqual(stdout.getvalue(), expected_out) - self.assertEqual(len(result.errors), 2) + self.assertEqual(len(result.errors), 3) description = 'tearDownModule (Module)' test_case, formatted_exc = result.errors[0] self.assertEqual(test_case.description, description) self.assertIn('ZeroDivisionError: division by zero', formatted_exc) + self.assertNotIn('ExceptionGroup', formatted_exc) self.assertNotIn('ValueError', formatted_exc) self.assertNotIn('TypeError', formatted_exc) self.assertIn('\nStdout:\ntear down module\n', formatted_exc) + test_case, formatted_exc = result.errors[1] self.assertEqual(test_case.description, description) self.assertIn('ValueError: bad cleanup2', formatted_exc) + self.assertNotIn('ExceptionGroup', formatted_exc) self.assertNotIn('ZeroDivisionError', formatted_exc) self.assertNotIn('TypeError', formatted_exc) self.assertIn(expected_out, formatted_exc) + test_case, formatted_exc = result.errors[2] + self.assertEqual(test_case.description, description) + self.assertIn('TypeError: bad cleanup1', formatted_exc) + self.assertNotIn('ExceptionGroup', formatted_exc) + self.assertNotIn('ZeroDivisionError', formatted_exc) + self.assertNotIn('ValueError', formatted_exc) + self.assertIn(expected_out, formatted_exc) + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_unittest/test_runner.py b/Lib/test/test_unittest/test_runner.py index 4d3cfd60b8d9c3..a47e2ebb59da02 100644 --- a/Lib/test/test_unittest/test_runner.py +++ b/Lib/test/test_unittest/test_runner.py @@ -13,6 +13,7 @@ LoggingResult, ResultWithNoStartTestRunStopTestRun, ) +from test.support.testcase import ExceptionIsLikeMixin def resultFactory(*_): @@ -604,7 +605,7 @@ class EmptyTest(unittest.TestCase): @support.force_not_colorized_test_class -class TestModuleCleanUp(unittest.TestCase): +class TestModuleCleanUp(ExceptionIsLikeMixin, unittest.TestCase): def test_add_and_do_ModuleCleanup(self): module_cleanups = [] @@ -646,11 +647,50 @@ class Module(object): [(module_cleanup_good, (1, 2, 3), dict(four='hello', five='goodbye')), (module_cleanup_bad, (), {})]) - with self.assertRaises(CustomError) as e: + with self.assertRaises(Exception) as e: unittest.case.doModuleCleanups() - self.assertEqual(str(e.exception), 'CleanUpExc') + self.assertExceptionIsLike(e.exception, + ExceptionGroup('module cleanup failed', + [CustomError('CleanUpExc')])) self.assertEqual(unittest.case._module_cleanups, []) + def test_doModuleCleanup_with_multiple_errors_in_addModuleCleanup(self): + def module_cleanup_bad1(): + raise TypeError('CleanUpExc1') + + def module_cleanup_bad2(): + raise ValueError('CleanUpExc2') + + class Module: + unittest.addModuleCleanup(module_cleanup_bad1) + unittest.addModuleCleanup(module_cleanup_bad2) + with self.assertRaises(ExceptionGroup) as e: + unittest.case.doModuleCleanups() + self.assertExceptionIsLike(e.exception, + ExceptionGroup('module cleanup failed', [ + ValueError('CleanUpExc2'), + TypeError('CleanUpExc1'), + ])) + + def test_doModuleCleanup_with_exception_group_in_addModuleCleanup(self): + def module_cleanup_bad(): + raise ExceptionGroup('CleanUpExc', [ + ValueError('CleanUpExc2'), + TypeError('CleanUpExc1'), + ]) + + class Module: + unittest.addModuleCleanup(module_cleanup_bad) + with self.assertRaises(ExceptionGroup) as e: + unittest.case.doModuleCleanups() + self.assertExceptionIsLike(e.exception, + ExceptionGroup('module cleanup failed', [ + ExceptionGroup('CleanUpExc', [ + ValueError('CleanUpExc2'), + TypeError('CleanUpExc1'), + ]), + ])) + def test_addModuleCleanup_arg_errors(self): cleanups = [] def cleanup(*args, **kwargs): @@ -871,9 +911,11 @@ def tearDownClass(cls): ordering = [] blowUp = True suite = unittest.defaultTestLoader.loadTestsFromTestCase(TestableTest) - with self.assertRaises(CustomError) as cm: + with self.assertRaises(Exception) as cm: suite.debug() - self.assertEqual(str(cm.exception), 'CleanUpExc') + self.assertExceptionIsLike(cm.exception, + ExceptionGroup('module cleanup failed', + [CustomError('CleanUpExc')])) self.assertEqual(ordering, ['setUpModule', 'setUpClass', 'test', 'tearDownClass', 'tearDownModule', 'cleanup_exc']) self.assertEqual(unittest.case._module_cleanups, []) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index 884fc1b21f64d8..db10de68e4ac73 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -149,9 +149,7 @@ def doModuleCleanups(): except Exception as exc: exceptions.append(exc) if exceptions: - # Swallows all but first exception. If a multi-exception handler - # gets written we should use that here instead. - raise exceptions[0] + raise ExceptionGroup('module cleanup failed', exceptions) def skip(reason): diff --git a/Lib/unittest/suite.py b/Lib/unittest/suite.py index 6f45b6fe5f6039..ae9ca2d615de06 100644 --- a/Lib/unittest/suite.py +++ b/Lib/unittest/suite.py @@ -223,6 +223,11 @@ def _handleModuleFixture(self, test, result): if result._moduleSetUpFailed: try: case.doModuleCleanups() + except ExceptionGroup as eg: + for e in eg.exceptions: + self._createClassOrModuleLevelException(result, e, + 'setUpModule', + currentModule) except Exception as e: self._createClassOrModuleLevelException(result, e, 'setUpModule', @@ -235,15 +240,15 @@ def _createClassOrModuleLevelException(self, result, exc, method_name, errorName = f'{method_name} ({parent})' self._addClassOrModuleLevelException(result, exc, errorName, info) - def _addClassOrModuleLevelException(self, result, exception, errorName, + def _addClassOrModuleLevelException(self, result, exc, errorName, info=None): error = _ErrorHolder(errorName) addSkip = getattr(result, 'addSkip', None) - if addSkip is not None and isinstance(exception, case.SkipTest): - addSkip(error, str(exception)) + if addSkip is not None and isinstance(exc, case.SkipTest): + addSkip(error, str(exc)) else: if not info: - result.addError(error, sys.exc_info()) + result.addError(error, (type(exc), exc, exc.__traceback__)) else: result.addError(error, info) @@ -273,6 +278,13 @@ def _handleModuleTearDown(self, result): previousModule) try: case.doModuleCleanups() + except ExceptionGroup as eg: + if isinstance(result, _DebugResult): + raise + for e in eg.exceptions: + self._createClassOrModuleLevelException(result, e, + 'tearDownModule', + previousModule) except Exception as e: if isinstance(result, _DebugResult): raise diff --git a/Misc/NEWS.d/next/Library/2025-05-23-10-15-36.gh-issue-134565.zmb66C.rst b/Misc/NEWS.d/next/Library/2025-05-23-10-15-36.gh-issue-134565.zmb66C.rst new file mode 100644 index 00000000000000..17d2b23b62d3c3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-23-10-15-36.gh-issue-134565.zmb66C.rst @@ -0,0 +1,3 @@ +:func:`unittest.doModuleCleanups` no longer swallows all but first exception +raised in the cleanup code, but raises a :exc:`ExceptionGroup` if multiple +errors occurred. From 05a19b5e56894fd1e63aff6b38fb23ad7c7b3047 Mon Sep 17 00:00:00 2001 From: Daniel Li Date: Fri, 23 May 2025 14:45:45 -0400 Subject: [PATCH 321/989] gh-120170: Exclude __mp_main__ in C version of whichmodule() (#120171) Co-authored-by: Lysandros Nikolaou --- .../Library/2024-06-06-17-49-07.gh-issue-120170.DUxhmT.rst | 3 +++ Modules/_pickle.c | 4 ++++ 2 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-06-06-17-49-07.gh-issue-120170.DUxhmT.rst diff --git a/Misc/NEWS.d/next/Library/2024-06-06-17-49-07.gh-issue-120170.DUxhmT.rst b/Misc/NEWS.d/next/Library/2024-06-06-17-49-07.gh-issue-120170.DUxhmT.rst new file mode 100644 index 00000000000000..ce7d874aa20375 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-06-17-49-07.gh-issue-120170.DUxhmT.rst @@ -0,0 +1,3 @@ +Fix an issue in the :mod:`!_pickle` extension module in which importing +:mod:`multiprocessing` could change how pickle identifies which module an +object belongs to, potentially breaking the unpickling of those objects. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index d260f1a68f8c70..29ef0cb0c2e088 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1879,6 +1879,10 @@ _checkmodule(PyObject *module_name, PyObject *module, _PyUnicode_EqualToASCIIString(module_name, "__main__")) { return -1; } + if (PyUnicode_Check(module_name) && + _PyUnicode_EqualToASCIIString(module_name, "__mp_main__")) { + return -1; + } PyObject *candidate = getattribute(module, dotted_path, 0); if (candidate == NULL) { From 9a2346df861f26d5f8d054ad2f9c37134dee3822 Mon Sep 17 00:00:00 2001 From: "Jiucheng(Oliver)" Date: Fri, 23 May 2025 15:22:14 -0400 Subject: [PATCH 322/989] gh-134381: Fix RuntimeError when starting not-yet started Thread after fork (gh-134514) --- Lib/test/_test_multiprocessing.py | 22 +++++++++++++++++++ ...-05-22-14-48-19.gh-issue-134381.2BXhth.rst | 1 + Modules/_threadmodule.c | 6 +++++ 3 files changed, 29 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-14-48-19.gh-issue-134381.2BXhth.rst diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 6a20a1eb03e32b..75f31d858d3306 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -6844,6 +6844,28 @@ def f(x): return x*x self.assertEqual("332833500", out.decode('utf-8').strip()) self.assertFalse(err, msg=err.decode('utf-8')) + def test_forked_thread_not_started(self): + # gh-134381: Ensure that a thread that has not been started yet in + # the parent process can be started within a forked child process. + + if multiprocessing.get_start_method() != "fork": + self.skipTest("fork specific test") + + q = multiprocessing.Queue() + t = threading.Thread(target=lambda: q.put("done"), daemon=True) + + def child(): + t.start() + t.join() + + p = multiprocessing.Process(target=child) + p.start() + p.join(support.SHORT_TIMEOUT) + + self.assertEqual(p.exitcode, 0) + self.assertEqual(q.get_nowait(), "done") + close_queue(q) + # # Mixins diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-14-48-19.gh-issue-134381.2BXhth.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-14-48-19.gh-issue-134381.2BXhth.rst new file mode 100644 index 00000000000000..aa8900296ae2fc --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-14-48-19.gh-issue-134381.2BXhth.rst @@ -0,0 +1 @@ +Fix :exc:`RuntimeError` when using a not-started :class:`threading.Thread` after calling :func:`os.fork` diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 74b972b201a546..cc83be4b5ff311 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -296,6 +296,12 @@ _PyThread_AfterFork(struct _pythread_runtime_state *state) continue; } + // Keep handles for threads that have not been started yet. They are + // safe to start in the child process. + if (handle->state == THREAD_HANDLE_NOT_STARTED) { + continue; + } + // Mark all threads as done. Any attempts to join or detach the // underlying OS thread (if any) could crash. We are the only thread; // it's safe to set this non-atomically. From 8a793c4a365d06a7264887698ccd7d6ba6aba9f2 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 23 May 2025 14:04:20 -0600 Subject: [PATCH 323/989] gh-134557: Revert "gh-132775: Use _PyCode GetScriptXIData()" (gh-134599) This reverts commit 09e72cf091d, AKA gh-134511. We are reverting due to refleaks on free-threaded builds. --- Lib/test/support/interpreters/channels.py | 2 +- Lib/test/test__interpreters.py | 25 +- Lib/test/test_interpreters/test_api.py | 23 +- Modules/_interpretersmodule.c | 294 ++++++++++++++++------ 4 files changed, 240 insertions(+), 104 deletions(-) diff --git a/Lib/test/support/interpreters/channels.py b/Lib/test/support/interpreters/channels.py index 3b6e0f0effd969..7a2bd7d63f808f 100644 --- a/Lib/test/support/interpreters/channels.py +++ b/Lib/test/support/interpreters/channels.py @@ -69,7 +69,7 @@ def list_all(): if not hasattr(send, '_unboundop'): send._set_unbound(unboundop) else: - assert send._unbound[0] == unboundop + assert send._unbound[0] == op channels.append(chan) return channels diff --git a/Lib/test/test__interpreters.py b/Lib/test/test__interpreters.py index ad3ebbfdff64a7..63fdaad8de7ef5 100644 --- a/Lib/test/test__interpreters.py +++ b/Lib/test/test__interpreters.py @@ -474,15 +474,13 @@ def setUp(self): def test_signatures(self): # See https://github.com/python/cpython/issues/126654 - msg = r'_interpreters.exec\(\) argument 3 must be dict, not int' + msg = "expected 'shared' to be a dict" with self.assertRaisesRegex(TypeError, msg): _interpreters.exec(self.id, 'a', 1) with self.assertRaisesRegex(TypeError, msg): _interpreters.exec(self.id, 'a', shared=1) - msg = r'_interpreters.run_string\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.run_string(self.id, 'a', shared=1) - msg = r'_interpreters.run_func\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.run_func(self.id, lambda: None, shared=1) @@ -954,8 +952,7 @@ def test_invalid_syntax(self): """) with self.subTest('script'): - with self.assertRaises(SyntaxError): - _interpreters.run_string(self.id, script) + self.assert_run_failed(SyntaxError, script) with self.subTest('module'): modname = 'spam_spam_spam' @@ -1022,19 +1019,12 @@ def script(): with open(w, 'w', encoding="utf-8") as spipe: with contextlib.redirect_stdout(spipe): print('it worked!', end='') - failed = None def f(): - nonlocal failed - try: - _interpreters.set___main___attrs(self.id, dict(w=w)) - _interpreters.run_func(self.id, script) - except Exception as exc: - failed = exc + _interpreters.set___main___attrs(self.id, dict(w=w)) + _interpreters.run_func(self.id, script) t = threading.Thread(target=f) t.start() t.join() - if failed: - raise Exception from failed with open(r, encoding="utf-8") as outfile: out = outfile.read() @@ -1063,16 +1053,19 @@ def test_closure(self): spam = True def script(): assert spam - with self.assertRaises(ValueError): + + with self.assertRaises(TypeError): _interpreters.run_func(self.id, script) + # XXX This hasn't been fixed yet. + @unittest.expectedFailure def test_return_value(self): def script(): return 'spam' with self.assertRaises(ValueError): _interpreters.run_func(self.id, script) -# @unittest.skip("we're not quite there yet") + @unittest.skip("we're not quite there yet") def test_args(self): with self.subTest('args'): def script(a, b=0): diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 165949167ceba8..1e2d572b1cbb81 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -839,16 +839,9 @@ def test_bad_script(self): interp.exec(10) def test_bytes_for_script(self): - r, w = self.pipe() - RAN = b'R' - DONE = b'D' interp = interpreters.create() - interp.exec(f"""if True: - import os - os.write({w}, {RAN!r}) - """) - os.write(w, DONE) - self.assertEqual(os.read(r, 1), RAN) + with self.assertRaises(TypeError): + interp.exec(b'print("spam")') def test_with_background_threads_still_running(self): r_interp, w_interp = self.pipe() @@ -1017,6 +1010,8 @@ def test_call(self): for i, (callable, args, kwargs) in enumerate([ (call_func_noop, (), {}), + (call_func_return_shareable, (), {}), + (call_func_return_not_shareable, (), {}), (Spam.noop, (), {}), ]): with self.subTest(f'success case #{i+1}'): @@ -1041,8 +1036,6 @@ def test_call(self): (call_func_complex, ('custom', 'spam!'), {}), (call_func_complex, ('custom-inner', 'eggs!'), {}), (call_func_complex, ('???',), {'exc': ValueError('spam')}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), ]): with self.subTest(f'invalid case #{i+1}'): with self.assertRaises(Exception): @@ -1058,6 +1051,8 @@ def test_call_in_thread(self): for i, (callable, args, kwargs) in enumerate([ (call_func_noop, (), {}), + (call_func_return_shareable, (), {}), + (call_func_return_not_shareable, (), {}), (Spam.noop, (), {}), ]): with self.subTest(f'success case #{i+1}'): @@ -1084,8 +1079,6 @@ def test_call_in_thread(self): (call_func_complex, ('custom', 'spam!'), {}), (call_func_complex, ('custom-inner', 'eggs!'), {}), (call_func_complex, ('???',), {'exc': ValueError('spam')}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), ]): with self.subTest(f'invalid case #{i+1}'): if args or kwargs: @@ -1625,8 +1618,8 @@ def test_exec(self): def test_call(self): with self.subTest('no args'): interpid = _interpreters.create() - with self.assertRaises(ValueError): - _interpreters.call(interpid, call_func_return_shareable) + exc = _interpreters.call(interpid, call_func_return_shareable) + self.assertIs(exc, None) with self.subTest('uncaught exception'): interpid = _interpreters.create() diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 376517ab92360f..f4807fd214b19e 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -9,6 +9,7 @@ #include "pycore_code.h" // _PyCode_HAS_EXECUTORS() #include "pycore_crossinterp.h" // _PyXIData_t #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() +#include "pycore_function.h" // _PyFunction_VerifyStateless() #include "pycore_interp.h" // _PyInterpreterState_IDIncref() #include "pycore_modsupport.h" // _PyArg_BadArgument() #include "pycore_namespace.h" // _PyNamespace_New() @@ -360,6 +361,81 @@ _get_current_xibufferview_type(void) } +/* Python code **************************************************************/ + +static const char * +check_code_str(PyUnicodeObject *text) +{ + assert(text != NULL); + if (PyUnicode_GET_LENGTH(text) == 0) { + return "too short"; + } + + // XXX Verify that it parses? + + return NULL; +} + +#ifndef NDEBUG +static int +code_has_args(PyCodeObject *code) +{ + assert(code != NULL); + return (code->co_argcount > 0 + || code->co_posonlyargcount > 0 + || code->co_kwonlyargcount > 0 + || code->co_flags & (CO_VARARGS | CO_VARKEYWORDS)); +} +#endif + +#define RUN_TEXT 1 +#define RUN_CODE 2 + +static const char * +get_code_str(PyObject *arg, Py_ssize_t *len_p, PyObject **bytes_p, int *flags_p) +{ + const char *codestr = NULL; + Py_ssize_t len = -1; + PyObject *bytes_obj = NULL; + int flags = 0; + + if (PyUnicode_Check(arg)) { + assert(PyUnicode_Check(arg) + && (check_code_str((PyUnicodeObject *)arg) == NULL)); + codestr = PyUnicode_AsUTF8AndSize(arg, &len); + if (codestr == NULL) { + return NULL; + } + if (strlen(codestr) != (size_t)len) { + PyErr_SetString(PyExc_ValueError, + "source code string cannot contain null bytes"); + return NULL; + } + flags = RUN_TEXT; + } + else { + assert(PyCode_Check(arg)); + assert(_PyCode_VerifyStateless( + PyThreadState_Get(), (PyCodeObject *)arg, NULL, NULL, NULL) == 0); + assert(!code_has_args((PyCodeObject *)arg)); + flags = RUN_CODE; + + // Serialize the code object. + bytes_obj = PyMarshal_WriteObjectToString(arg, Py_MARSHAL_VERSION); + if (bytes_obj == NULL) { + return NULL; + } + codestr = PyBytes_AS_STRING(bytes_obj); + len = PyBytes_GET_SIZE(bytes_obj); + } + + *flags_p = flags; + *bytes_p = bytes_obj; + *len_p = len; + return codestr; +} + + /* interpreter-specific code ************************************************/ static int @@ -423,14 +499,22 @@ config_from_object(PyObject *configobj, PyInterpreterConfig *config) static int -_run_script(_PyXIData_t *script, PyObject *ns) +_run_script(PyObject *ns, const char *codestr, Py_ssize_t codestrlen, int flags) { - PyObject *code = _PyXIData_NewObject(script); - if (code == NULL) { - return -1; + PyObject *result = NULL; + if (flags & RUN_TEXT) { + result = PyRun_StringFlags(codestr, Py_file_input, ns, ns, NULL); + } + else if (flags & RUN_CODE) { + PyObject *code = PyMarshal_ReadObjectFromString(codestr, codestrlen); + if (code != NULL) { + result = PyEval_EvalCode(code, ns, ns); + Py_DECREF(code); + } + } + else { + Py_UNREACHABLE(); } - PyObject *result = PyEval_EvalCode(code, ns, ns); - Py_DECREF(code); if (result == NULL) { return -1; } @@ -439,11 +523,12 @@ _run_script(_PyXIData_t *script, PyObject *ns) } static int -_exec_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, - _PyXIData_t *script, PyObject *shareables, +_run_in_interpreter(PyInterpreterState *interp, + const char *codestr, Py_ssize_t codestrlen, + PyObject *shareables, int flags, PyObject **p_excinfo) { - assert(!_PyErr_Occurred(tstate)); + assert(!PyErr_Occurred()); _PyXI_session *session = _PyXI_NewSession(); if (session == NULL) { return -1; @@ -451,7 +536,7 @@ _exec_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, // Prep and switch interpreters. if (_PyXI_Enter(session, interp, shareables) < 0) { - if (_PyErr_Occurred(tstate)) { + if (PyErr_Occurred()) { // If an error occured at this step, it means that interp // was not prepared and switched. _PyXI_FreeSession(session); @@ -473,7 +558,7 @@ _exec_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, if (mainns == NULL) { goto finally; } - res = _run_script(script, mainns); + res = _run_script(mainns, codestr, codestrlen, flags); finally: // Clean up and switch back. @@ -867,23 +952,104 @@ PyDoc_STRVAR(set___main___attrs_doc, Bind the given attributes in the interpreter's __main__ module."); -static void -unwrap_not_shareable(PyThreadState *tstate) +static PyUnicodeObject * +convert_script_arg(PyThreadState *tstate, + PyObject *arg, const char *fname, const char *displayname, + const char *expected) { - PyObject *exctype = _PyXIData_GetNotShareableErrorType(tstate); - if (!_PyErr_ExceptionMatches(tstate, exctype)) { - return; + PyUnicodeObject *str = NULL; + if (PyUnicode_CheckExact(arg)) { + str = (PyUnicodeObject *)Py_NewRef(arg); } - PyObject *exc = _PyErr_GetRaisedException(tstate); - PyObject *cause = PyException_GetCause(exc); - if (cause != NULL) { - Py_DECREF(exc); - exc = cause; + else if (PyUnicode_Check(arg)) { + // XXX str = PyUnicode_FromObject(arg); + str = (PyUnicodeObject *)Py_NewRef(arg); } else { - assert(PyException_GetContext(exc) == NULL); + _PyArg_BadArgument(fname, displayname, expected, arg); + return NULL; } + + const char *err = check_code_str(str); + if (err != NULL) { + Py_DECREF(str); + _PyErr_Format(tstate, PyExc_ValueError, + "%.200s(): bad script text (%s)", fname, err); + return NULL; + } + + return str; +} + +static PyCodeObject * +convert_code_arg(PyThreadState *tstate, + PyObject *arg, const char *fname, const char *displayname, + const char *expected) +{ + PyObject *cause; + PyCodeObject *code = NULL; + if (PyFunction_Check(arg)) { + // For now we allow globals, so we can't use + // _PyFunction_VerifyStateless(). + PyObject *codeobj = PyFunction_GetCode(arg); + if (_PyCode_VerifyStateless( + tstate, (PyCodeObject *)codeobj, NULL, NULL, NULL) < 0) { + goto chained; + } + code = (PyCodeObject *)Py_NewRef(codeobj); + } + else if (PyCode_Check(arg)) { + if (_PyCode_VerifyStateless( + tstate, (PyCodeObject *)arg, NULL, NULL, NULL) < 0) { + goto chained; + } + code = (PyCodeObject *)Py_NewRef(arg); + } + else { + _PyArg_BadArgument(fname, displayname, expected, arg); + return NULL; + } + + return code; + +chained: + cause = _PyErr_GetRaisedException(tstate); + assert(cause != NULL); + _PyArg_BadArgument(fname, displayname, expected, arg); + PyObject *exc = _PyErr_GetRaisedException(tstate); + PyException_SetCause(exc, cause); _PyErr_SetRaisedException(tstate, exc); + return NULL; +} + +static int +_interp_exec(PyObject *self, PyInterpreterState *interp, + PyObject *code_arg, PyObject *shared_arg, PyObject **p_excinfo) +{ + if (shared_arg != NULL && !PyDict_CheckExact(shared_arg)) { + PyErr_SetString(PyExc_TypeError, "expected 'shared' to be a dict"); + return -1; + } + + // Extract code. + Py_ssize_t codestrlen = -1; + PyObject *bytes_obj = NULL; + int flags = 0; + const char *codestr = get_code_str(code_arg, + &codestrlen, &bytes_obj, &flags); + if (codestr == NULL) { + return -1; + } + + // Run the code in the interpreter. + int res = _run_in_interpreter(interp, codestr, codestrlen, + shared_arg, flags, p_excinfo); + Py_XDECREF(bytes_obj); + if (res < 0) { + return -1; + } + + return 0; } static PyObject * @@ -896,9 +1062,8 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O!$p:" FUNCNAME, kwlist, - &id, &code, &PyDict_Type, &shared, - &restricted)) + "OO|O$p:" FUNCNAME, kwlist, + &id, &code, &shared, &restricted)) { return NULL; } @@ -910,17 +1075,22 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - // We don't need the script to be "pure", which means it can use - // global variables. They will be resolved against __main__. - _PyXIData_t xidata = {0}; - if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { - unwrap_not_shareable(tstate); + const char *expected = "a string, a function, or a code object"; + if (PyUnicode_Check(code)) { + code = (PyObject *)convert_script_arg(tstate, code, FUNCNAME, + "argument 2", expected); + } + else { + code = (PyObject *)convert_code_arg(tstate, code, FUNCNAME, + "argument 2", expected); + } + if (code == NULL) { return NULL; } PyObject *excinfo = NULL; - int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); - _PyXIData_Release(&xidata); + int res = _interp_exec(self, interp, code, shared, &excinfo); + Py_DECREF(code); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; @@ -956,9 +1126,8 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OU|O!$p:" FUNCNAME, kwlist, - &id, &script, &PyDict_Type, &shared, - &restricted)) + "OU|O$p:" FUNCNAME, kwlist, + &id, &script, &shared, &restricted)) { return NULL; } @@ -970,20 +1139,15 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - if (PyFunction_Check(script) || PyCode_Check(script)) { - _PyArg_BadArgument(FUNCNAME, "argument 2", "a string", script); - return NULL; - } - - _PyXIData_t xidata = {0}; - if (_PyCode_GetScriptXIData(tstate, script, &xidata) < 0) { - unwrap_not_shareable(tstate); + script = (PyObject *)convert_script_arg(tstate, script, FUNCNAME, + "argument 2", "a string"); + if (script == NULL) { return NULL; } PyObject *excinfo = NULL; - int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); - _PyXIData_Release(&xidata); + int res = _interp_exec(self, interp, script, shared, &excinfo); + Py_DECREF(script); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; @@ -1009,9 +1173,8 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O!$p:" FUNCNAME, kwlist, - &id, &func, &PyDict_Type, &shared, - &restricted)) + "OO|O$p:" FUNCNAME, kwlist, + &id, &func, &shared, &restricted)) { return NULL; } @@ -1023,29 +1186,16 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - // We don't worry about checking globals. They will be resolved - // against __main__. - PyObject *code; - if (PyFunction_Check(func)) { - code = PyFunction_GET_CODE(func); - } - else if (PyCode_Check(func)) { - code = func; - } - else { - _PyArg_BadArgument(FUNCNAME, "argument 2", "a function", func); - return NULL; - } - - _PyXIData_t xidata = {0}; - if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { - unwrap_not_shareable(tstate); + PyCodeObject *code = convert_code_arg(tstate, func, FUNCNAME, + "argument 2", + "a function or a code object"); + if (code == NULL) { return NULL; } PyObject *excinfo = NULL; - int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); - _PyXIData_Release(&xidata); + int res = _interp_exec(self, interp, (PyObject *)code, shared, &excinfo); + Py_DECREF(code); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; @@ -1098,15 +1248,15 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - _PyXIData_t xidata = {0}; - if (_PyCode_GetPureScriptXIData(tstate, callable, &xidata) < 0) { - unwrap_not_shareable(tstate); + PyObject *code = (PyObject *)convert_code_arg(tstate, callable, FUNCNAME, + "argument 2", "a function"); + if (code == NULL) { return NULL; } PyObject *excinfo = NULL; - int res = _exec_in_interpreter(tstate, interp, &xidata, NULL, &excinfo); - _PyXIData_Release(&xidata); + int res = _interp_exec(self, interp, code, NULL, &excinfo); + Py_DECREF(code); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; From f478331f98930d94f7efc741f3bed4b693d5cec1 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Fri, 23 May 2025 14:51:41 -0700 Subject: [PATCH 324/989] gh-132983: Slightly tweak error messages for _zstd compressor/decompressor options dict (#134601) Slightly tweak error messages for options dict --- Lib/test/test_zstd.py | 5 +++-- Modules/_zstd/compressor.c | 11 +++++------ Modules/_zstd/decompressor.c | 7 +++---- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 084f8f24fc009c..34c7c721b1ad32 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -1424,11 +1424,12 @@ def test_init_bad_mode(self): with self.assertRaises(ValueError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), "rw") - with self.assertRaisesRegex(TypeError, r"NOT be CompressionParameter"): + with self.assertRaisesRegex(TypeError, + r"NOT be a CompressionParameter"): ZstdFile(io.BytesIO(), 'rb', options={CompressionParameter.compression_level:5}) with self.assertRaisesRegex(TypeError, - r"NOT be DecompressionParameter"): + r"NOT be a DecompressionParameter"): ZstdFile(io.BytesIO(), 'wb', options={DecompressionParameter.window_log_max:21}) diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 8f934858ef784f..0f2967b60555a5 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -93,24 +93,23 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, /* Check key type */ if (Py_TYPE(key) == mod_state->DParameter_type) { PyErr_SetString(PyExc_TypeError, - "Key of compression option dict should " - "NOT be DecompressionParameter."); + "Key of compression options dict should " + "NOT be a DecompressionParameter attribute."); return -1; } int key_v = PyLong_AsInt(key); if (key_v == -1 && PyErr_Occurred()) { PyErr_SetString(PyExc_ValueError, - "Key of options dict should be a CompressionParameter attribute."); + "Key of options dict should be either a " + "CompressionParameter attribute or an int."); return -1; } - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? int value_v = PyLong_AsInt(value); if (value_v == -1 && PyErr_Occurred()) { PyErr_SetString(PyExc_ValueError, - "Value of option dict should be an int."); + "Value of options dict should be an int."); return -1; } diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index e299f73b071353..70848d98534fa5 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -112,7 +112,7 @@ _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) if (Py_TYPE(key) == mod_state->CParameter_type) { PyErr_SetString(PyExc_TypeError, "Key of decompression options dict should " - "NOT be CompressionParameter."); + "NOT be a CompressionParameter attribute."); return -1; } @@ -120,12 +120,11 @@ _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) int key_v = PyLong_AsInt(key); if (key_v == -1 && PyErr_Occurred()) { PyErr_SetString(PyExc_ValueError, - "Key of options dict should be a DecompressionParameter attribute."); + "Key of options dict should be either a " + "DecompressionParameter attribute or an int."); return -1; } - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? int value_v = PyLong_AsInt(value); if (value_v == -1 && PyErr_Occurred()) { PyErr_SetString(PyExc_ValueError, From 973b8f69d31b4d86c37e0b7194a209f4f2efff06 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Fri, 23 May 2025 19:03:21 -0700 Subject: [PATCH 325/989] gh-132983: Make _zstd C code PEP 7 compliant (GH-134605) Make _zstd C code PEP 7 compliant --- Modules/_zstd/_zstdmodule.c | 118 +++++++++++++++--------------- Modules/_zstd/buffer.h | 9 ++- Modules/_zstd/clinic/zstddict.c.h | 14 +++- Modules/_zstd/compressor.c | 43 ++++++----- Modules/_zstd/decompressor.c | 30 ++++---- Modules/_zstd/zstddict.c | 32 +++++--- 6 files changed, 137 insertions(+), 109 deletions(-) diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 17d3bff1e98769..56ad999e5cd4e4 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -28,41 +28,42 @@ set_zstd_error(const _zstd_state* const state, char *msg; assert(ZSTD_isError(zstd_ret)); - switch (type) - { - case ERR_DECOMPRESS: - msg = "Unable to decompress Zstandard data: %s"; - break; - case ERR_COMPRESS: - msg = "Unable to compress Zstandard data: %s"; - break; - - case ERR_LOAD_D_DICT: - msg = "Unable to load Zstandard dictionary or prefix for decompression: %s"; - break; - case ERR_LOAD_C_DICT: - msg = "Unable to load Zstandard dictionary or prefix for compression: %s"; - break; - - case ERR_GET_C_BOUNDS: - msg = "Unable to get zstd compression parameter bounds: %s"; - break; - case ERR_GET_D_BOUNDS: - msg = "Unable to get zstd decompression parameter bounds: %s"; - break; - case ERR_SET_C_LEVEL: - msg = "Unable to set zstd compression level: %s"; - break; - - case ERR_TRAIN_DICT: - msg = "Unable to train the Zstandard dictionary: %s"; - break; - case ERR_FINALIZE_DICT: - msg = "Unable to finalize the Zstandard dictionary: %s"; - break; - - default: - Py_UNREACHABLE(); + switch (type) { + case ERR_DECOMPRESS: + msg = "Unable to decompress Zstandard data: %s"; + break; + case ERR_COMPRESS: + msg = "Unable to compress Zstandard data: %s"; + break; + + case ERR_LOAD_D_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "decompression: %s"; + break; + case ERR_LOAD_C_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "compression: %s"; + break; + + case ERR_GET_C_BOUNDS: + msg = "Unable to get zstd compression parameter bounds: %s"; + break; + case ERR_GET_D_BOUNDS: + msg = "Unable to get zstd decompression parameter bounds: %s"; + break; + case ERR_SET_C_LEVEL: + msg = "Unable to set zstd compression level: %s"; + break; + + case ERR_TRAIN_DICT: + msg = "Unable to train the Zstandard dictionary: %s"; + break; + case ERR_FINALIZE_DICT: + msg = "Unable to finalize the Zstandard dictionary: %s"; + break; + + default: + Py_UNREACHABLE(); } PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret)); } @@ -183,7 +184,7 @@ calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, chunks_number = Py_SIZE(samples_sizes); if ((size_t) chunks_number > UINT32_MAX) { PyErr_Format(PyExc_ValueError, - "The number of samples should be <= %u.", UINT32_MAX); + "The number of samples should be <= %u.", UINT32_MAX); return -1; } @@ -200,8 +201,8 @@ calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, (*chunk_sizes)[i] = PyLong_AsSize_t(size); if ((*chunk_sizes)[i] == (size_t)-1 && PyErr_Occurred()) { PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); + "Items in samples_sizes should be an int " + "object, with a value between 0 and %u.", SIZE_MAX); return -1; } sizes_sum += (*chunk_sizes)[i]; @@ -209,7 +210,8 @@ calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, if (sizes_sum != Py_SIZE(samples_bytes)) { PyErr_SetString(PyExc_ValueError, - "The samples size tuple doesn't match the concatenation's size."); + "The samples size tuple doesn't match the " + "concatenation's size."); return -1; } return chunks_number; @@ -242,15 +244,15 @@ _zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, /* Check arguments */ if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); return NULL; } /* Check that the samples are valid and get their sizes */ chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, &chunk_sizes); - if (chunks_number < 0) - { + if (chunks_number < 0) { goto error; } @@ -271,7 +273,7 @@ _zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret); goto error; } @@ -324,15 +326,15 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, /* Check arguments */ if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); return NULL; } /* Check that the samples are valid and get their sizes */ chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, &chunk_sizes); - if (chunks_number < 0) - { + if (chunks_number < 0) { goto error; } @@ -355,14 +357,15 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, Py_BEGIN_ALLOW_THREADS zstd_ret = ZDICT_finalizeDictionary( PyBytes_AS_STRING(dst_dict_bytes), dict_size, - PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes), + PyBytes_AS_STRING(custom_dict_bytes), + Py_SIZE(custom_dict_bytes), PyBytes_AS_STRING(samples_bytes), chunk_sizes, (uint32_t)chunks_number, params); Py_END_ALLOW_THREADS /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret); goto error; } @@ -402,7 +405,7 @@ _zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress) if (is_compress) { bound = ZSTD_cParam_getBounds(parameter); if (ZSTD_isError(bound.error)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error); return NULL; } @@ -410,7 +413,7 @@ _zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress) else { bound = ZSTD_dParam_getBounds(parameter); if (ZSTD_isError(bound.error)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error); return NULL; } @@ -435,9 +438,10 @@ _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) { size_t frame_size; - frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, frame_buffer->len); + frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, + frame_buffer->len); if (ZSTD_isError(frame_size)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); PyErr_Format(mod_state->ZstdError, "Error when finding the compressed size of a Zstandard frame. " "Ensure the frame_buffer argument starts from the " @@ -473,7 +477,7 @@ _zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) /* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */ if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); PyErr_SetString(mod_state->ZstdError, "Error when getting information from the header of " "a Zstandard frame. Ensure the frame_buffer argument " @@ -508,7 +512,7 @@ _zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, PyObject *d_parameter_type) /*[clinic end generated code: output=f3313b1294f19502 input=75d7a953580fae5f]*/ { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) { PyErr_SetString(PyExc_ValueError, @@ -568,7 +572,7 @@ do { \ Py_DECREF(v); \ } while (0) - _zstd_state* const mod_state = get_zstd_state(m); + _zstd_state* mod_state = get_zstd_state(m); /* Reusable objects & variables */ mod_state->CParameter_type = NULL; @@ -674,7 +678,7 @@ do { \ static int _zstd_traverse(PyObject *module, visitproc visit, void *arg) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); Py_VISIT(mod_state->ZstdDict_type); Py_VISIT(mod_state->ZstdCompressor_type); @@ -691,7 +695,7 @@ _zstd_traverse(PyObject *module, visitproc visit, void *arg) static int _zstd_clear(PyObject *module) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); Py_CLEAR(mod_state->ZstdDict_type); Py_CLEAR(mod_state->ZstdCompressor_type); diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h index bff3a81d8aa11c..4c885fa0d720fd 100644 --- a/Modules/_zstd/buffer.h +++ b/Modules/_zstd/buffer.h @@ -19,7 +19,8 @@ _OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, /* Ensure .list was set to NULL */ assert(buffer->list == NULL); - Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, &ob->dst); + Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, + &ob->dst); if (res < 0) { return -1; } @@ -34,8 +35,7 @@ _OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, Return -1 on failure */ static inline int _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, - Py_ssize_t max_length, - Py_ssize_t init_size) + Py_ssize_t max_length, Py_ssize_t init_size) { Py_ssize_t block_size; @@ -50,7 +50,8 @@ _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, block_size = init_size; } - Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, &ob->dst); + Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, + &ob->dst); if (res < 0) { return -1; } diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h index aaa29e491bc1bb..810befdaf71f44 100644 --- a/Modules/_zstd/clinic/zstddict.c.h +++ b/Modules/_zstd/clinic/zstddict.c.h @@ -89,7 +89,9 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__, "Load as a digested dictionary to compressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_digested_dict)\n" +"\n" "1. Some advanced compression parameters of compressor may be overridden\n" " by parameters of digested dictionary.\n" "2. ZstdDict has a digested dictionaries cache for each compression level.\n" @@ -123,7 +125,9 @@ _zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context)) PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__, "Load as an undigested dictionary to compressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"\n" "1. The advanced compression parameters of compressor will not be overridden.\n" "2. Loading an undigested dictionary is costly. If load an undigested dictionary\n" " multiple times, consider reusing a compressor object.\n" @@ -155,7 +159,9 @@ _zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context)) PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__, "Load as a prefix to compressor/decompressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_prefix)\n" +"\n" "1. Prefix is compatible with long distance matching, while dictionary is not.\n" "2. It only works for the first frame, then the compressor/decompressor will\n" " return to no prefix state.\n" @@ -183,4 +189,4 @@ _zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) { return _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); } -/*[clinic end generated code: output=8692eabee4e0d1fe input=a9049054013a1b77]*/ +/*[clinic end generated code: output=47b12b5848b53ed8 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 0f2967b60555a5..31cb8c535c05a6 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -53,7 +53,7 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, const char *arg_name, const char* arg_type) { size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; } @@ -63,8 +63,8 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, int level = PyLong_AsInt(level_or_options); if (level == -1 && PyErr_Occurred()) { PyErr_Format(PyExc_ValueError, - "Compression level should be an int value between %d and %d.", - ZSTD_minCLevel(), ZSTD_maxCLevel()); + "Compression level should be an int value between " + "%d and %d.", ZSTD_minCLevel(), ZSTD_maxCLevel()); return -1; } @@ -138,7 +138,8 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, } return 0; } - PyErr_Format(PyExc_TypeError, "Invalid type for %s. Expected %s", arg_name, arg_type); + PyErr_Format(PyExc_TypeError, + "Invalid type for %s. Expected %s", arg_name, arg_type); return -1; } @@ -181,7 +182,7 @@ _get_CDict(ZstdDict *self, int compressionLevel) Py_END_ALLOW_THREADS if (cdict == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Failed to create a ZSTD_CDict instance from " @@ -262,7 +263,7 @@ _zstd_load_impl(ZstdCompressor *self, ZstdDict *zd, static int _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; } @@ -295,9 +296,9 @@ _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) else if (ret > 0) { /* type == -1 may indicate an error. */ type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) + if (type == DICT_TYPE_DIGESTED + || type == DICT_TYPE_UNDIGESTED + || type == DICT_TYPE_PREFIX) { assert(type >= 0); zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); @@ -348,7 +349,7 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, /* Compression context */ self->cctx = ZSTD_createCCtx(); if (self->cctx == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Unable to create ZSTD_CCtx instance."); @@ -360,7 +361,8 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, self->last_mode = ZSTD_e_end; if (level != Py_None && options != Py_None) { - PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used."); + PyErr_SetString(PyExc_RuntimeError, + "Only one of level or options should be used."); goto error; } @@ -449,7 +451,7 @@ compress_lock_held(ZstdCompressor *self, Py_buffer *data, } if (_OutputBuffer_InitWithSize(&buffer, &out, -1, - (Py_ssize_t) output_buffer_size) < 0) { + (Py_ssize_t) output_buffer_size) < 0) { goto error; } @@ -462,7 +464,7 @@ compress_lock_held(ZstdCompressor *self, Py_buffer *data, /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); } @@ -525,13 +527,16 @@ compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data) while (1) { Py_BEGIN_ALLOW_THREADS do { - zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, ZSTD_e_continue); - } while (out.pos != out.size && in.pos != in.size && !ZSTD_isError(zstd_ret)); + zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, + ZSTD_e_continue); + } while (out.pos != out.size + && in.pos != in.size + && !ZSTD_isError(zstd_ret)); Py_END_ALLOW_THREADS /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); } @@ -678,12 +683,12 @@ static PyMethodDef ZstdCompressor_methods[] = { PyDoc_STRVAR(ZstdCompressor_last_mode_doc, "The last mode used to this compressor object, its value can be .CONTINUE,\n" ".FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME.\n\n" -"It can be used to get the current state of a compressor, such as, data flushed,\n" -"a frame ended."); +"It can be used to get the current state of a compressor, such as, data\n" +"flushed, or a frame ended."); static PyMemberDef ZstdCompressor_members[] = { {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode), - Py_READONLY, ZstdCompressor_last_mode_doc}, + Py_READONLY, ZstdCompressor_last_mode_doc}, {NULL} }; diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 70848d98534fa5..d084f0847c72dd 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -76,7 +76,7 @@ _get_DDict(ZstdDict *self) self->d_dict = ret; if (self->d_dict == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Failed to create a ZSTD_DDict instance from " @@ -95,7 +95,7 @@ _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) size_t zstd_ret; PyObject *key, *value; Py_ssize_t pos; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; } @@ -191,7 +191,7 @@ _zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd, static int _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; } @@ -224,9 +224,9 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) else if (ret > 0) { /* type == -1 may indicate an error. */ type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) + if (type == DICT_TYPE_DIGESTED + || type == DICT_TYPE_UNDIGESTED + || type == DICT_TYPE_PREFIX) { assert(type >= 0); zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); @@ -291,7 +291,7 @@ decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in, /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); } @@ -369,7 +369,8 @@ stream_decompress_lock_held(ZstdDecompressor *self, Py_buffer *data, /* Check .eof flag */ if (self->eof) { - PyErr_SetString(PyExc_EOFError, "Already at the end of a Zstandard frame."); + PyErr_SetString(PyExc_EOFError, + "Already at the end of a Zstandard frame."); assert(ret == NULL); return NULL; } @@ -486,8 +487,8 @@ stream_decompress_lock_held(ZstdDecompressor *self, Py_buffer *data, if (!use_input_buffer) { /* Discard buffer if it's too small (resizing it may needlessly copy the current contents) */ - if (self->input_buffer != NULL && - self->input_buffer_size < data_size) + if (self->input_buffer != NULL + && self->input_buffer_size < data_size) { PyMem_Free(self->input_buffer); self->input_buffer = NULL; @@ -565,7 +566,7 @@ _zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, /* Decompression context */ self->dctx = ZSTD_createDCtx(); if (self->dctx == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Unable to create ZSTD_DCtx instance."); @@ -717,9 +718,10 @@ PyDoc_STRVAR(ZstdDecompressor_eof_doc, "after that, an EOFError exception will be raised."); PyDoc_STRVAR(ZstdDecompressor_needs_input_doc, -"If the max_length output limit in .decompress() method has been reached, and\n" -"the decompressor has (or may has) unconsumed input data, it will be set to\n" -"False. In this case, pass b'' to .decompress() method may output further data."); +"If the max_length output limit in .decompress() method has been reached,\n" +"and the decompressor has (or may has) unconsumed input data, it will be set\n" +"to False. In this case, passing b'' to the .decompress() method may output\n" +"further data."); static PyMemberDef ZstdDecompressor_members[] = { {"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof), diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index 39828c9b36b5c2..e3e9e5d064515f 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -74,13 +74,15 @@ _zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, at least 8 bytes */ if (Py_SIZE(self->dict_content) < 8) { PyErr_SetString(PyExc_ValueError, - "Zstandard dictionary content should at least 8 bytes."); + "Zstandard dictionary content should at least " + "8 bytes."); goto error; } /* Get dict_id, 0 means "raw content" dictionary. */ - self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content)); + self->dict_id = ZSTD_getDictID_fromDict( + PyBytes_AS_STRING(self->dict_content), + Py_SIZE(self->dict_content)); /* Check validity for ordinary dictionary */ if (!is_raw && self->dict_id == 0) { @@ -141,8 +143,10 @@ ZstdDict_str(PyObject *ob) } static PyMemberDef ZstdDict_members[] = { - {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, - {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc}, + {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, + ZstdDict_dictid_doc}, + {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), + Py_READONLY, ZstdDict_dictcontent_doc}, {NULL} }; @@ -152,7 +156,9 @@ _zstd.ZstdDict.as_digested_dict Load as a digested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_digested_dict) + 1. Some advanced compression parameters of compressor may be overridden by parameters of digested dictionary. 2. ZstdDict has a digested dictionaries cache for each compression level. @@ -163,7 +169,7 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digeste static PyObject * _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=09b086e7a7320dbb input=10cd2b6165931b77]*/ +/*[clinic end generated code: output=09b086e7a7320dbb input=ee45e1b4a48f6f2c]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); } @@ -174,7 +180,9 @@ _zstd.ZstdDict.as_undigested_dict Load as an undigested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_undigested_dict) + 1. The advanced compression parameters of compressor will not be overridden. 2. Loading an undigested dictionary is costly. If load an undigested dictionary multiple times, consider reusing a compressor object. @@ -183,7 +191,7 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undiges static PyObject * _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=43c7a989e6d4253a input=11e5f5df690a85b4]*/ +/*[clinic end generated code: output=43c7a989e6d4253a input=d39210eedec76fed]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); } @@ -194,7 +202,9 @@ _zstd.ZstdDict.as_prefix Load as a prefix to compressor/decompressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_prefix) + 1. Prefix is compatible with long distance matching, while dictionary is not. 2. It only works for the first frame, then the compressor/decompressor will return to no prefix state. @@ -203,7 +213,7 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) static PyObject * _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) -/*[clinic end generated code: output=6f7130c356595a16 input=b028e0ae6ec4292b]*/ +/*[clinic end generated code: output=6f7130c356595a16 input=d59757b0b5a9551a]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); } From 47f1161d3a2bec52b5b5e952150141709c247da2 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 23 May 2025 21:57:13 -0500 Subject: [PATCH 326/989] gh-128840: Limit the number of parts in IPv6 address parsing (GH-128841) GH-128840: Limit the number of parts in IPv6 address parsing Limit length of IP address string to 39 --------- Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Serhiy Storchaka Co-authored-by: Gregory P. Smith --- Lib/ipaddress.py | 11 +++++++++-- Lib/test/test_ipaddress.py | 11 +++++++++++ .../2025-01-14-11-19-07.gh-issue-128840.M1doZW.rst | 2 ++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-01-14-11-19-07.gh-issue-128840.M1doZW.rst diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py index d8a84f33264dc5..69e933a6541f78 100644 --- a/Lib/ipaddress.py +++ b/Lib/ipaddress.py @@ -1660,8 +1660,16 @@ def _ip_int_from_string(cls, ip_str): """ if not ip_str: raise AddressValueError('Address cannot be empty') + if len(ip_str) > 39: + msg = ("At most 39 characters expected in " + f"{ip_str[:14]!r}({len(ip_str)-28} chars elided){ip_str[-14:]!r}") + raise AddressValueError(msg) - parts = ip_str.split(':') + # We want to allow more parts than the max to be 'split' + # to preserve the correct error message when there are + # too many parts combined with '::' + _max_parts = cls._HEXTET_COUNT + 1 + parts = ip_str.split(':', maxsplit=_max_parts) # An IPv6 address needs at least 2 colons (3 parts). _min_parts = 3 @@ -1681,7 +1689,6 @@ def _ip_int_from_string(cls, ip_str): # An IPv6 address can't have more than 8 colons (9 parts). # The extra colon comes from using the "::" notation for a single # leading or trailing zero part. - _max_parts = cls._HEXTET_COUNT + 1 if len(parts) > _max_parts: msg = "At most %d colons permitted in %r" % (_max_parts-1, ip_str) raise AddressValueError(msg) diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py index a06608d0016253..ee95454e64bb0d 100644 --- a/Lib/test/test_ipaddress.py +++ b/Lib/test/test_ipaddress.py @@ -397,6 +397,17 @@ def assertBadSplit(addr): # A trailing IPv4 address is two parts assertBadSplit("10:9:8:7:6:5:4:3:42.42.42.42%scope") + def test_bad_address_split_v6_too_long(self): + def assertBadSplit(addr): + msg = r"At most 39 characters expected in %s" + with self.assertAddressError(msg, repr(re.escape(addr[:14]))): + ipaddress.IPv6Address(addr) + + # Long IPv6 address + long_addr = ("0:" * 10000) + "0" + assertBadSplit(long_addr) + assertBadSplit(long_addr + "%zoneid") + def test_bad_address_split_v6_too_many_parts(self): def assertBadSplit(addr): msg = "Exactly 8 parts expected without '::' in %r" diff --git a/Misc/NEWS.d/next/Security/2025-01-14-11-19-07.gh-issue-128840.M1doZW.rst b/Misc/NEWS.d/next/Security/2025-01-14-11-19-07.gh-issue-128840.M1doZW.rst new file mode 100644 index 00000000000000..b57ec3e70dcc5f --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-01-14-11-19-07.gh-issue-128840.M1doZW.rst @@ -0,0 +1,2 @@ +Short-circuit the processing of long IPv6 addresses early in :mod:`ipaddress` to prevent excessive +memory consumption and a minor denial-of-service. From 74a9c60f3ee2195e506144c3811090f1334c996b Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 23 May 2025 23:07:39 -0400 Subject: [PATCH 327/989] gh-134546: ensure remote pdb script is readable (#134552) --- Lib/pdb.py | 3 +++ .../Library/2025-05-22-18-14-13.gh-issue-134546.fjLVzK.rst | 1 + 2 files changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-05-22-18-14-13.gh-issue-134546.fjLVzK.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index 78ee35f61bbcea..fc83728fb6dc94 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -75,6 +75,7 @@ import code import glob import json +import stat import token import types import atexit @@ -3418,6 +3419,8 @@ def attach(pid, commands=()): ) ) connect_script.close() + orig_mode = os.stat(connect_script.name).st_mode + os.chmod(connect_script.name, orig_mode | stat.S_IROTH | stat.S_IRGRP) sys.remote_exec(pid, connect_script.name) # TODO Add a timeout? Or don't bother since the user can ^C? diff --git a/Misc/NEWS.d/next/Library/2025-05-22-18-14-13.gh-issue-134546.fjLVzK.rst b/Misc/NEWS.d/next/Library/2025-05-22-18-14-13.gh-issue-134546.fjLVzK.rst new file mode 100644 index 00000000000000..eea897f59181c9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-22-18-14-13.gh-issue-134546.fjLVzK.rst @@ -0,0 +1 @@ +Ensure :mod:`pdb` remote debugging script is readable by remote Python process. From 80284b5c5eebd0e603c38322f94a97a2853ceeba Mon Sep 17 00:00:00 2001 From: Eddy Mulyono Date: Fri, 23 May 2025 23:50:19 -0400 Subject: [PATCH 328/989] gh-80334: fix multiprocessing.freeze_support for other spawn platforms (GH-134462) Doc/library/multiprocessing.rst: freeze_support: Change to specify spawn method instead of platform Have multiprocessing.freeze_support() enable on spawn, not just win32. --------- Co-authored-by: Gregory P. Smith --- Doc/library/multiprocessing.rst | 10 +++++----- Lib/multiprocessing/context.py | 2 +- .../2025-05-24-03-10-36.gh-issue-80334.z21cMa.rst | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-24-03-10-36.gh-issue-80334.z21cMa.rst diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 80e33c4a1df015..fc3c1134f97c85 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -1081,7 +1081,7 @@ Miscellaneous .. function:: freeze_support() Add support for when a program which uses :mod:`multiprocessing` has been - frozen to produce a Windows executable. (Has been tested with **py2exe**, + frozen to produce an executable. (Has been tested with **py2exe**, **PyInstaller** and **cx_Freeze**.) One needs to call this function straight after the ``if __name__ == @@ -1099,10 +1099,10 @@ Miscellaneous If the ``freeze_support()`` line is omitted then trying to run the frozen executable will raise :exc:`RuntimeError`. - Calling ``freeze_support()`` has no effect when invoked on any operating - system other than Windows. In addition, if the module is being run - normally by the Python interpreter on Windows (the program has not been - frozen), then ``freeze_support()`` has no effect. + Calling ``freeze_support()`` has no effect when the start method is not + *spawn*. In addition, if the module is being run normally by the Python + interpreter (the program has not been frozen), then ``freeze_support()`` + has no effect. .. function:: get_all_start_methods() diff --git a/Lib/multiprocessing/context.py b/Lib/multiprocessing/context.py index d0a3ad00e53ad8..051d567d457928 100644 --- a/Lib/multiprocessing/context.py +++ b/Lib/multiprocessing/context.py @@ -145,7 +145,7 @@ def freeze_support(self): '''Check whether this is a fake forked process in a frozen executable. If so then run code specified by commandline and exit. ''' - if sys.platform == 'win32' and getattr(sys, 'frozen', False): + if self.get_start_method() == 'spawn' and getattr(sys, 'frozen', False): from .spawn import freeze_support freeze_support() diff --git a/Misc/NEWS.d/next/Library/2025-05-24-03-10-36.gh-issue-80334.z21cMa.rst b/Misc/NEWS.d/next/Library/2025-05-24-03-10-36.gh-issue-80334.z21cMa.rst new file mode 100644 index 00000000000000..228429516db416 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-24-03-10-36.gh-issue-80334.z21cMa.rst @@ -0,0 +1,2 @@ +:func:`multiprocessing.freeze_support` now checks for work on any "spawn" +start method platform rather than only on Windows. From 7b1010a57db9405f277139abef4016ba987b75fc Mon Sep 17 00:00:00 2001 From: Jasper Wong <111284156+3willows@users.noreply.github.com> Date: Sat, 24 May 2025 08:15:00 +0100 Subject: [PATCH 329/989] gh-134595: Update HOWTO to reflect change in CIBW option (#134598) --- Doc/howto/free-threading-extensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 5a3970f15d52d6..175bb5dc831848 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -396,7 +396,7 @@ The wheels, shared libraries, and binaries are indicated by a ``t`` suffix. free-threaded build, with the ``t`` suffix, such as ``python3.13t``. * `pypa/cibuildwheel `_ supports the free-threaded build if you set - `CIBW_FREE_THREADED_SUPPORT `_. + `CIBW_ENABLE to cpython-freethreading `_. Limited C API and Stable ABI ............................ From 5d9c8fe3f6168785cb608dddd3010042f39bb226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 24 May 2025 13:48:50 +0200 Subject: [PATCH 330/989] gh-131178: add E2E mockless tests for `http.server` command-line interface (#134279) --- Lib/test/test_httpservers.py | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 0af1c45ecb2a8b..2548a7c5f292f0 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -21,6 +21,7 @@ import html import http, http.client import urllib.parse +import urllib.request import tempfile import time import datetime @@ -33,6 +34,8 @@ from test.support import ( is_apple, import_helper, os_helper, threading_helper ) +from test.support.script_helper import kill_python, spawn_python +from test.support.socket_helper import find_unused_port try: import ssl @@ -1452,6 +1455,73 @@ def test_unknown_flag(self, _): self.assertIn('error', stderr.getvalue()) +class CommandLineRunTimeTestCase(unittest.TestCase): + served_data = os.urandom(32) + served_filename = 'served_filename' + tls_cert = certdata_file('ssl_cert.pem') + tls_key = certdata_file('ssl_key.pem') + tls_password = b'somepass' + tls_password_file = 'ssl_key_password' + + def setUp(self): + super().setUp() + server_dir_context = os_helper.temp_cwd() + server_dir = self.enterContext(server_dir_context) + with open(self.served_filename, 'wb') as f: + f.write(self.served_data) + with open(self.tls_password_file, 'wb') as f: + f.write(self.tls_password) + + def fetch_file(self, path, context=None): + req = urllib.request.Request(path, method='GET') + with urllib.request.urlopen(req, context=context) as res: + return res.read() + + def parse_cli_output(self, output): + match = re.search(r'Serving (HTTP|HTTPS) on (.+) port (\d+)', output) + if match is None: + return None, None, None + return match.group(1).lower(), match.group(2), int(match.group(3)) + + def wait_for_server(self, proc, protocol, bind, port): + """Check that the server has been successfully started.""" + line = proc.stdout.readline().strip() + if support.verbose: + print() + print('python -m http.server: ', line) + return self.parse_cli_output(line) == (protocol, bind, port) + + def test_http_client(self): + bind, port = '127.0.0.1', find_unused_port() + proc = spawn_python('-u', '-m', 'http.server', str(port), '-b', bind, + bufsize=1, text=True) + self.addCleanup(kill_python, proc) + self.addCleanup(proc.terminate) + self.assertTrue(self.wait_for_server(proc, 'http', bind, port)) + res = self.fetch_file(f'http://{bind}:{port}/{self.served_filename}') + self.assertEqual(res, self.served_data) + + @unittest.skipIf(ssl is None, "requires ssl") + def test_https_client(self): + context = ssl.create_default_context() + # allow self-signed certificates + context.check_hostname = False + context.verify_mode = ssl.CERT_NONE + + bind, port = '127.0.0.1', find_unused_port() + proc = spawn_python('-u', '-m', 'http.server', str(port), '-b', bind, + '--tls-cert', self.tls_cert, + '--tls-key', self.tls_key, + '--tls-password-file', self.tls_password_file, + bufsize=1, text=True) + self.addCleanup(kill_python, proc) + self.addCleanup(proc.terminate) + self.assertTrue(self.wait_for_server(proc, 'https', bind, port)) + url = f'https://{bind}:{port}/{self.served_filename}' + res = self.fetch_file(url, context=context) + self.assertEqual(res, self.served_data) + + def setUpModule(): unittest.addModuleCleanup(os.chdir, os.getcwd()) From 2fd09b011031f3c00c342b44e02e2817010e507c Mon Sep 17 00:00:00 2001 From: ggqlq <124190229+ggqlq@users.noreply.github.com> Date: Sat, 24 May 2025 20:19:20 +0800 Subject: [PATCH 331/989] gh-134168: fix `http.server` CLI support for IPv6 and `--directory` when serving over HTTPS (#134169) --- Lib/http/server.py | 15 +++++++++++---- ...2025-05-18-13-23-29.gh-issue-134168.hgx3Xg.rst | 2 ++ 2 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-18-13-23-29.gh-issue-134168.hgx3Xg.rst diff --git a/Lib/http/server.py b/Lib/http/server.py index f6d1b998f4201e..ef10d185932633 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -980,8 +980,8 @@ def test(HandlerClass=BaseHTTPRequestHandler, HandlerClass.protocol_version = protocol if tls_cert: - server = ThreadingHTTPSServer(addr, HandlerClass, certfile=tls_cert, - keyfile=tls_key, password=tls_password) + server = ServerClass(addr, HandlerClass, certfile=tls_cert, + keyfile=tls_key, password=tls_password) else: server = ServerClass(addr, HandlerClass) @@ -1041,7 +1041,7 @@ def _main(args=None): parser.error(f"Failed to read TLS password file: {e}") # ensure dual-stack is not disabled; ref #38907 - class DualStackServer(ThreadingHTTPServer): + class DualStackServerMixin: def server_bind(self): # suppress exception when protocol is IPv4 @@ -1054,9 +1054,16 @@ def finish_request(self, request, client_address): self.RequestHandlerClass(request, client_address, self, directory=args.directory) + class HTTPDualStackServer(DualStackServerMixin, ThreadingHTTPServer): + pass + class HTTPSDualStackServer(DualStackServerMixin, ThreadingHTTPSServer): + pass + + ServerClass = HTTPSDualStackServer if args.tls_cert else HTTPDualStackServer + test( HandlerClass=SimpleHTTPRequestHandler, - ServerClass=DualStackServer, + ServerClass=ServerClass, port=args.port, bind=args.bind, protocol=args.protocol, diff --git a/Misc/NEWS.d/next/Library/2025-05-18-13-23-29.gh-issue-134168.hgx3Xg.rst b/Misc/NEWS.d/next/Library/2025-05-18-13-23-29.gh-issue-134168.hgx3Xg.rst new file mode 100644 index 00000000000000..5a0e20005db11e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-18-13-23-29.gh-issue-134168.hgx3Xg.rst @@ -0,0 +1,2 @@ +:mod:`http.server`: Fix IPv6 address binding and +:option:`--directory ` handling when using HTTPS. From 91b48868a82821c1cbfd2245212699b57d3a24c7 Mon Sep 17 00:00:00 2001 From: Chris Eibl <138194463+chris-eibl@users.noreply.github.com> Date: Sun, 25 May 2025 15:17:43 +0200 Subject: [PATCH 332/989] GH-130328: Speedup pasting in legacy console on Windows (gh-133728) --- Lib/_pyrepl/commands.py | 8 +++++- Lib/_pyrepl/windows_console.py | 26 +++++++++++-------- Lib/test/test_pyrepl/test_unix_console.py | 1 + Lib/test/test_pyrepl/test_windows_console.py | 1 + ...-05-09-09-10-34.gh-issue-130328.s9h4By.rst | 2 ++ 5 files changed, 26 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-09-09-10-34.gh-issue-130328.s9h4By.rst diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 2354fbb2ec2c1e..50c824995d85b8 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -370,6 +370,13 @@ def do(self) -> None: r = self.reader text = self.event * r.get_arg() r.insert(text) + if r.paste_mode: + data = "" + ev = r.console.getpending() + data += ev.data + if data: + r.insert(data) + r.last_refresh_cache.invalidated = True class insert_nl(EditCommand): @@ -484,7 +491,6 @@ def do(self) -> None: data = "" start = time.time() while done not in data: - self.reader.console.wait(100) ev = self.reader.console.getpending() data += ev.data trace( diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 95749198b3b2f9..c56dcd6d7dd434 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -419,10 +419,7 @@ def _getscrollbacksize(self) -> int: return info.srWindow.Bottom # type: ignore[no-any-return] - def _read_input(self, block: bool = True) -> INPUT_RECORD | None: - if not block and not self.wait(timeout=0): - return None - + def _read_input(self) -> INPUT_RECORD | None: rec = INPUT_RECORD() read = DWORD() if not ReadConsoleInput(InHandle, rec, 1, read): @@ -431,14 +428,10 @@ def _read_input(self, block: bool = True) -> INPUT_RECORD | None: return rec def _read_input_bulk( - self, block: bool, n: int + self, n: int ) -> tuple[ctypes.Array[INPUT_RECORD], int]: rec = (n * INPUT_RECORD)() read = DWORD() - - if not block and not self.wait(timeout=0): - return rec, 0 - if not ReadConsoleInput(InHandle, rec, n, read): raise WinError(GetLastError()) @@ -449,8 +442,11 @@ def get_event(self, block: bool = True) -> Event | None: and there is no event pending, otherwise waits for the completion of an event.""" + if not block and not self.wait(timeout=0): + return None + while self.event_queue.empty(): - rec = self._read_input(block) + rec = self._read_input() if rec is None: return None @@ -551,12 +547,20 @@ def getpending(self) -> Event: if e2: e.data += e2.data - recs, rec_count = self._read_input_bulk(False, 1024) + recs, rec_count = self._read_input_bulk(1024) for i in range(rec_count): rec = recs[i] + # In case of a legacy console, we do not only receive a keydown + # event, but also a keyup event - and for uppercase letters + # an additional SHIFT_PRESSED event. if rec and rec.EventType == KEY_EVENT: key_event = rec.Event.KeyEvent + if not key_event.bKeyDown: + continue ch = key_event.uChar.UnicodeChar + if ch == "\x00": + # ignore SHIFT_PRESSED and special keys + continue if ch == "\r": ch += "\n" e.data += ch diff --git a/Lib/test/test_pyrepl/test_unix_console.py b/Lib/test/test_pyrepl/test_unix_console.py index c447b310c49a06..b3f7dc028fe210 100644 --- a/Lib/test/test_pyrepl/test_unix_console.py +++ b/Lib/test/test_pyrepl/test_unix_console.py @@ -20,6 +20,7 @@ def unix_console(events, **kwargs): console = UnixConsole() console.get_event = MagicMock(side_effect=events) + console.getpending = MagicMock(return_value=Event("key", "")) height = kwargs.get("height", 25) width = kwargs.get("width", 80) diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index e7bab226b31ddf..a52ae96a83ddde 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -35,6 +35,7 @@ class WindowsConsoleTests(TestCase): def console(self, events, **kwargs) -> Console: console = WindowsConsole() console.get_event = MagicMock(side_effect=events) + console.getpending = MagicMock(return_value=Event("key", "")) console.wait = MagicMock() console._scroll = MagicMock() console._hide_cursor = MagicMock() diff --git a/Misc/NEWS.d/next/Library/2025-05-09-09-10-34.gh-issue-130328.s9h4By.rst b/Misc/NEWS.d/next/Library/2025-05-09-09-10-34.gh-issue-130328.s9h4By.rst new file mode 100644 index 00000000000000..00b556c6a336ee --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-09-09-10-34.gh-issue-130328.s9h4By.rst @@ -0,0 +1,2 @@ +Speedup pasting in ``PyREPL`` on Windows in a legacy console. Patch by Chris +Eibl. From 4443110c3409ecba9f0fd49495d039030cb7ed58 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sun, 25 May 2025 08:38:18 -0700 Subject: [PATCH 333/989] gh-133778: Fix setting `__annotations__` under PEP 563 (#133794) --- Lib/test/test_type_annotations.py | 22 +++++++++ ...-05-09-18-11-21.gh-issue-133778.pWEV3t.rst | 2 + Objects/typeobject.c | 45 +++++++++++++++---- 3 files changed, 60 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-18-11-21.gh-issue-133778.pWEV3t.rst diff --git a/Lib/test/test_type_annotations.py b/Lib/test/test_type_annotations.py index 2c886bb6d362fa..c66cb058552643 100644 --- a/Lib/test/test_type_annotations.py +++ b/Lib/test/test_type_annotations.py @@ -498,6 +498,28 @@ def f(x: int) -> int: pass self.assertEqual(f.__annotate__(annotationlib.Format.VALUE), annos) self.assertEqual(f.__annotations__, annos) + def test_set_annotations(self): + function_code = textwrap.dedent(""" + def f(x: int): + pass + """) + class_code = textwrap.dedent(""" + class f: + x: int + """) + for future in (False, True): + for label, code in (("function", function_code), ("class", class_code)): + with self.subTest(future=future, label=label): + if future: + code = "from __future__ import annotations\n" + code + ns = run_code(code) + f = ns["f"] + anno = "int" if future else int + self.assertEqual(f.__annotations__, {"x": anno}) + + f.__annotations__ = {"x": str} + self.assertEqual(f.__annotations__, {"x": str}) + def test_name_clash_with_format(self): # this test would fail if __annotate__'s parameter was called "format" # during symbol table construction diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-18-11-21.gh-issue-133778.pWEV3t.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-18-11-21.gh-issue-133778.pWEV3t.rst new file mode 100644 index 00000000000000..6eb6881213c434 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-18-11-21.gh-issue-133778.pWEV3t.rst @@ -0,0 +1,2 @@ +Fix bug where assigning to the :attr:`~type.__annotations__` attributes of +classes defined under ``from __future__ import annotations`` had no effect. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a7ab69fef4c721..ee09289425b91a 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2065,19 +2065,46 @@ type_set_annotations(PyObject *tp, PyObject *value, void *Py_UNUSED(closure)) return -1; } - int result; PyObject *dict = PyType_GetDict(type); - if (value != NULL) { - /* set */ - result = PyDict_SetItem(dict, &_Py_ID(__annotations_cache__), value); - } else { - /* delete */ - result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); - if (result == 0) { - PyErr_SetString(PyExc_AttributeError, "__annotations__"); + int result = PyDict_ContainsString(dict, "__annotations__"); + if (result < 0) { + Py_DECREF(dict); + return -1; + } + if (result) { + // If __annotations__ is currently in the dict, we update it, + if (value != NULL) { + result = PyDict_SetItem(dict, &_Py_ID(__annotations__), value); + } else { + result = PyDict_Pop(dict, &_Py_ID(__annotations__), NULL); + if (result == 0) { + // Somebody else just deleted it? + PyErr_SetString(PyExc_AttributeError, "__annotations__"); + Py_DECREF(dict); + return -1; + } + } + if (result < 0) { Py_DECREF(dict); return -1; } + // Also clear __annotations_cache__ just in case. + result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); + } + else { + // Else we update only __annotations_cache__. + if (value != NULL) { + /* set */ + result = PyDict_SetItem(dict, &_Py_ID(__annotations_cache__), value); + } else { + /* delete */ + result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); + if (result == 0) { + PyErr_SetString(PyExc_AttributeError, "__annotations__"); + Py_DECREF(dict); + return -1; + } + } } if (result < 0) { Py_DECREF(dict); From 3e562b394252ff75d9809b7940020a775e4df68b Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sun, 25 May 2025 08:40:58 -0700 Subject: [PATCH 334/989] gh-133684: Fix get_annotations() where PEP 563 is involved (#133795) --- Lib/annotationlib.py | 19 +++++- Lib/test/test_annotationlib.py | 66 ++++++++++++++++++- ...-05-09-18-29-25.gh-issue-133684.Y1DFSt.rst | 3 + 3 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-09-18-29-25.gh-issue-133684.Y1DFSt.rst diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index 32b8553458930c..a7dfb91515a1c4 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -1042,14 +1042,27 @@ def _get_and_call_annotate(obj, format): return None +_BASE_GET_ANNOTATIONS = type.__dict__["__annotations__"].__get__ + + def _get_dunder_annotations(obj): """Return the annotations for an object, checking that it is a dictionary. Does not return a fresh dictionary. """ - ann = getattr(obj, "__annotations__", None) - if ann is None: - return None + # This special case is needed to support types defined under + # from __future__ import annotations, where accessing the __annotations__ + # attribute directly might return annotations for the wrong class. + if isinstance(obj, type): + try: + ann = _BASE_GET_ANNOTATIONS(obj) + except AttributeError: + # For static types, the descriptor raises AttributeError. + return None + else: + ann = getattr(obj, "__annotations__", None) + if ann is None: + return None if not isinstance(ann, dict): raise ValueError(f"{obj!r}.__annotations__ is neither a dict nor None") diff --git a/Lib/test/test_annotationlib.py b/Lib/test/test_annotationlib.py index 73a821d15e3481..fe091e52a86dc4 100644 --- a/Lib/test/test_annotationlib.py +++ b/Lib/test/test_annotationlib.py @@ -7,7 +7,7 @@ import functools import itertools import pickle -from string.templatelib import Interpolation, Template +from string.templatelib import Template import typing import unittest from annotationlib import ( @@ -815,6 +815,70 @@ def test_stringized_annotations_on_class(self): {"x": int}, ) + def test_stringized_annotation_permutations(self): + def define_class(name, has_future, has_annos, base_text, extra_names=None): + lines = [] + if has_future: + lines.append("from __future__ import annotations") + lines.append(f"class {name}({base_text}):") + if has_annos: + lines.append(f" {name}_attr: int") + else: + lines.append(" pass") + code = "\n".join(lines) + ns = support.run_code(code, extra_names=extra_names) + return ns[name] + + def check_annotations(cls, has_future, has_annos): + if has_annos: + if has_future: + anno = "int" + else: + anno = int + self.assertEqual(get_annotations(cls), {f"{cls.__name__}_attr": anno}) + else: + self.assertEqual(get_annotations(cls), {}) + + for meta_future, base_future, child_future, meta_has_annos, base_has_annos, child_has_annos in itertools.product( + (False, True), + (False, True), + (False, True), + (False, True), + (False, True), + (False, True), + ): + with self.subTest( + meta_future=meta_future, + base_future=base_future, + child_future=child_future, + meta_has_annos=meta_has_annos, + base_has_annos=base_has_annos, + child_has_annos=child_has_annos, + ): + meta = define_class( + "Meta", + has_future=meta_future, + has_annos=meta_has_annos, + base_text="type", + ) + base = define_class( + "Base", + has_future=base_future, + has_annos=base_has_annos, + base_text="metaclass=Meta", + extra_names={"Meta": meta}, + ) + child = define_class( + "Child", + has_future=child_future, + has_annos=child_has_annos, + base_text="Base", + extra_names={"Base": base}, + ) + check_annotations(meta, meta_future, meta_has_annos) + check_annotations(base, base_future, base_has_annos) + check_annotations(child, child_future, child_has_annos) + def test_modify_annotations(self): def f(x: int): pass diff --git a/Misc/NEWS.d/next/Library/2025-05-09-18-29-25.gh-issue-133684.Y1DFSt.rst b/Misc/NEWS.d/next/Library/2025-05-09-18-29-25.gh-issue-133684.Y1DFSt.rst new file mode 100644 index 00000000000000..0cb1bc237a12c9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-09-18-29-25.gh-issue-133684.Y1DFSt.rst @@ -0,0 +1,3 @@ +Fix bug where :func:`annotationlib.get_annotations` would return the wrong +result for certain classes that are part of a class hierarchy where ``from +__future__ import annotations`` is used. From 52509cc94b1a18cb325dbfa7e5f830b32759a903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Simon?= Date: Sun, 25 May 2025 18:23:38 +0200 Subject: [PATCH 335/989] gh-134582: Fix t-strings untokenize() roundtrip removing space between braces (#134603) --- Lib/test/test_tokenize.py | 4 ++++ Lib/tokenize.py | 14 +++++++------- .../2025-05-23-23-43-39.gh-issue-134582.9POq3l.rst | 1 + 3 files changed, 12 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-23-23-43-39.gh-issue-134582.9POq3l.rst diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 2d41a5e5ac0697..e6b19fe1812d44 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1975,6 +1975,10 @@ def test_roundtrip(self): for case in cases: self.check_roundtrip(case) + self.check_roundtrip(r"t'{ {}}'") + self.check_roundtrip(r"t'{f'{ {}}'}{ {}}'") + self.check_roundtrip(r"f'{t'{ {}}'}{ {}}'") + def test_continuation(self): # Balancing continuation diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 8d01fd7bce41b0..559a7aecbde2d1 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -274,7 +274,7 @@ def compat(self, token, iterable): toks_append = self.tokens.append startline = token[0] in (NEWLINE, NL) prevstring = False - in_fstring = 0 + in_fstring_or_tstring = 0 for tok in _itertools.chain([token], iterable): toknum, tokval = tok[:2] @@ -293,10 +293,10 @@ def compat(self, token, iterable): else: prevstring = False - if toknum == FSTRING_START: - in_fstring += 1 - elif toknum == FSTRING_END: - in_fstring -= 1 + if toknum in {FSTRING_START, TSTRING_START}: + in_fstring_or_tstring += 1 + elif toknum in {FSTRING_END, TSTRING_END}: + in_fstring_or_tstring -= 1 if toknum == INDENT: indents.append(tokval) continue @@ -311,8 +311,8 @@ def compat(self, token, iterable): elif toknum in {FSTRING_MIDDLE, TSTRING_MIDDLE}: tokval = self.escape_brackets(tokval) - # Insert a space between two consecutive brackets if we are in an f-string - if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring: + # Insert a space between two consecutive brackets if we are in an f-string or t-string + if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring_or_tstring: tokval = ' ' + tokval # Insert a space between two consecutive f-strings diff --git a/Misc/NEWS.d/next/Library/2025-05-23-23-43-39.gh-issue-134582.9POq3l.rst b/Misc/NEWS.d/next/Library/2025-05-23-23-43-39.gh-issue-134582.9POq3l.rst new file mode 100644 index 00000000000000..23e1d5891b685f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-23-23-43-39.gh-issue-134582.9POq3l.rst @@ -0,0 +1 @@ +Fix tokenize.untokenize() round-trip errors related to t-strings braces escaping From b51b08a0a5fedde4f74e4cc338b8b5ad9656ad50 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sun, 25 May 2025 10:23:28 -0700 Subject: [PATCH 336/989] annotationlib docs: note that ForwardRef.evaluate eventually defaults to empty globals (#134661) --- Doc/library/annotationlib.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/library/annotationlib.rst b/Doc/library/annotationlib.rst index 41c9ce479ff0f8..7dfc11449a6cbc 100644 --- a/Doc/library/annotationlib.rst +++ b/Doc/library/annotationlib.rst @@ -211,6 +211,10 @@ Classes means may not have any information about their scope, so passing arguments to this method may be necessary to evaluate them successfully. + If no *owner*, *globals*, *locals*, or *type_params* are provided and the + :class:`~ForwardRef` does not contain information about its origin, + empty globals and locals dictionaries are used. + .. versionadded:: 3.14 From 57fef27cfc2bdfc1e3a65ef8c8a760198d15b14d Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sun, 25 May 2025 10:26:39 -0700 Subject: [PATCH 337/989] gh-133960: Improve typing.evaluate_forward_ref (#133961) As explained in #133960, this removes most of the behavior differences with ForwardRef.evaluate. The remaining difference is about recursive evaluation of forwardrefs; this is practically useful in cases where an annotation refers to a type alias that itself is string-valued. This also improves several edge cases that were previously not handled optimally. For example, the function now takes advantage of the partial evaluation behavior of ForwardRef.evaluate() to evaluate more ForwardRefs in the FORWARDREF format. This also fixes #133959 as a side effect, because the buggy behavior in #133959 derives from evaluate_forward_ref(). --- Doc/library/typing.rst | 15 +-- Lib/test/test_typing.py | 116 +++++++++++++++--- Lib/typing.py | 52 ++++---- ...-05-12-20-38-57.gh-issue-133960.Aee79f.rst | 3 + 4 files changed, 131 insertions(+), 55 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-12-20-38-57.gh-issue-133960.Aee79f.rst diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index 54cc3ea3311adf..dd8ea3c364f49a 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -3500,20 +3500,11 @@ Introspection helpers Evaluate an :class:`annotationlib.ForwardRef` as a :term:`type hint`. This is similar to calling :meth:`annotationlib.ForwardRef.evaluate`, - but unlike that method, :func:`!evaluate_forward_ref` also: - - * Recursively evaluates forward references nested within the type hint. - * Raises :exc:`TypeError` when it encounters certain objects that are - not valid type hints. - * Replaces type hints that evaluate to :const:`!None` with - :class:`types.NoneType`. - * Supports the :attr:`~annotationlib.Format.FORWARDREF` and - :attr:`~annotationlib.Format.STRING` formats. + but unlike that method, :func:`!evaluate_forward_ref` also + recursively evaluates forward references nested within the type hint. See the documentation for :meth:`annotationlib.ForwardRef.evaluate` for - the meaning of the *owner*, *globals*, *locals*, and *type_params* parameters. - *format* specifies the format of the annotation and is a member of - the :class:`annotationlib.Format` enum. + the meaning of the *owner*, *globals*, *locals*, *type_params*, and *format* parameters. .. versionadded:: 3.14 diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 246be22a0d8ec4..e2b6f459aa24fc 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -6859,12 +6859,10 @@ def test_forward_ref_and_final(self): self.assertEqual(hints, {'value': Final}) def test_top_level_class_var(self): - # https://bugs.python.org/issue45166 - with self.assertRaisesRegex( - TypeError, - r'typing.ClassVar\[int\] is not valid as type argument', - ): - get_type_hints(ann_module6) + # This is not meaningful but we don't raise for it. + # https://github.com/python/cpython/issues/133959 + hints = get_type_hints(ann_module6) + self.assertEqual(hints, {'wrong': ClassVar[int]}) def test_get_type_hints_typeddict(self): self.assertEqual(get_type_hints(TotalMovie), {'title': str, 'year': int}) @@ -6967,6 +6965,11 @@ def foo(a: 'Callable[..., T]'): self.assertEqual(get_type_hints(foo, globals(), locals()), {'a': Callable[..., T]}) + def test_special_forms_no_forward(self): + def f(x: ClassVar[int]): + pass + self.assertEqual(get_type_hints(f), {'x': ClassVar[int]}) + def test_special_forms_forward(self): class C: @@ -6982,8 +6985,9 @@ class CF: self.assertEqual(get_type_hints(C, globals())['b'], Final[int]) self.assertEqual(get_type_hints(C, globals())['x'], ClassVar) self.assertEqual(get_type_hints(C, globals())['y'], Final) - with self.assertRaises(TypeError): - get_type_hints(CF, globals()), + lfi = get_type_hints(CF, globals())['b'] + self.assertIs(get_origin(lfi), list) + self.assertEqual(get_args(lfi), (Final[int],)) def test_union_forward_recursion(self): ValueList = List['Value'] @@ -7216,33 +7220,113 @@ class C(Generic[T]): pass class EvaluateForwardRefTests(BaseTestCase): def test_evaluate_forward_ref(self): int_ref = ForwardRef('int') - missing = ForwardRef('missing') + self.assertIs(typing.evaluate_forward_ref(int_ref), int) self.assertIs( typing.evaluate_forward_ref(int_ref, type_params=()), int, ) + self.assertIs( + typing.evaluate_forward_ref(int_ref, format=annotationlib.Format.VALUE), + int, + ) self.assertIs( typing.evaluate_forward_ref( - int_ref, type_params=(), format=annotationlib.Format.FORWARDREF, + int_ref, format=annotationlib.Format.FORWARDREF, ), int, ) + self.assertEqual( + typing.evaluate_forward_ref( + int_ref, format=annotationlib.Format.STRING, + ), + 'int', + ) + + def test_evaluate_forward_ref_undefined(self): + missing = ForwardRef('missing') + with self.assertRaises(NameError): + typing.evaluate_forward_ref(missing) self.assertIs( typing.evaluate_forward_ref( - missing, type_params=(), format=annotationlib.Format.FORWARDREF, + missing, format=annotationlib.Format.FORWARDREF, ), missing, ) self.assertEqual( typing.evaluate_forward_ref( - int_ref, type_params=(), format=annotationlib.Format.STRING, + missing, format=annotationlib.Format.STRING, ), - 'int', + "missing", ) - def test_evaluate_forward_ref_no_type_params(self): - ref = ForwardRef('int') - self.assertIs(typing.evaluate_forward_ref(ref), int) + def test_evaluate_forward_ref_nested(self): + ref = ForwardRef("int | list['str']") + self.assertEqual( + typing.evaluate_forward_ref(ref), + int | list[str], + ) + self.assertEqual( + typing.evaluate_forward_ref(ref, format=annotationlib.Format.FORWARDREF), + int | list[str], + ) + self.assertEqual( + typing.evaluate_forward_ref(ref, format=annotationlib.Format.STRING), + "int | list['str']", + ) + + why = ForwardRef('"\'str\'"') + self.assertIs(typing.evaluate_forward_ref(why), str) + + def test_evaluate_forward_ref_none(self): + none_ref = ForwardRef('None') + self.assertIs(typing.evaluate_forward_ref(none_ref), None) + + def test_globals(self): + A = "str" + ref = ForwardRef('list[A]') + with self.assertRaises(NameError): + typing.evaluate_forward_ref(ref) + self.assertEqual( + typing.evaluate_forward_ref(ref, globals={'A': A}), + list[str], + ) + + def test_owner(self): + ref = ForwardRef("A") + + with self.assertRaises(NameError): + typing.evaluate_forward_ref(ref) + + # We default to the globals of `owner`, + # so it no longer raises `NameError` + self.assertIs( + typing.evaluate_forward_ref(ref, owner=Loop), A + ) + + def test_inherited_owner(self): + # owner passed to evaluate_forward_ref + ref = ForwardRef("list['A']") + self.assertEqual( + typing.evaluate_forward_ref(ref, owner=Loop), + list[A], + ) + + # owner set on the ForwardRef + ref = ForwardRef("list['A']", owner=Loop) + self.assertEqual( + typing.evaluate_forward_ref(ref), + list[A], + ) + + def test_partial_evaluation(self): + ref = ForwardRef("list[A]") + with self.assertRaises(NameError): + typing.evaluate_forward_ref(ref) + + self.assertEqual( + typing.evaluate_forward_ref(ref, format=annotationlib.Format.FORWARDREF), + list[EqualToForwardRef('A')], + ) class CollectionsAbcTests(BaseTestCase): diff --git a/Lib/typing.py b/Lib/typing.py index 98af61be8b0716..ed1dd4fc6413a5 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -956,12 +956,8 @@ def evaluate_forward_ref( """Evaluate a forward reference as a type hint. This is similar to calling the ForwardRef.evaluate() method, - but unlike that method, evaluate_forward_ref() also: - - * Recursively evaluates forward references nested within the type hint. - * Rejects certain objects that are not valid type hints. - * Replaces type hints that evaluate to None with types.NoneType. - * Supports the *FORWARDREF* and *STRING* formats. + but unlike that method, evaluate_forward_ref() also + recursively evaluates forward references nested within the type hint. *forward_ref* must be an instance of ForwardRef. *owner*, if given, should be the object that holds the annotations that the forward reference @@ -981,23 +977,24 @@ def evaluate_forward_ref( if forward_ref.__forward_arg__ in _recursive_guard: return forward_ref - try: - value = forward_ref.evaluate(globals=globals, locals=locals, - type_params=type_params, owner=owner) - except NameError: - if format == _lazy_annotationlib.Format.FORWARDREF: - return forward_ref - else: - raise - - type_ = _type_check( - value, - "Forward references must evaluate to types.", - is_argument=forward_ref.__forward_is_argument__, - allow_special_forms=forward_ref.__forward_is_class__, - ) + if format is None: + format = _lazy_annotationlib.Format.VALUE + value = forward_ref.evaluate(globals=globals, locals=locals, + type_params=type_params, owner=owner, format=format) + + if (isinstance(value, _lazy_annotationlib.ForwardRef) + and format == _lazy_annotationlib.Format.FORWARDREF): + return value + + if isinstance(value, str): + value = _make_forward_ref(value, module=forward_ref.__forward_module__, + owner=owner or forward_ref.__owner__, + is_argument=forward_ref.__forward_is_argument__, + is_class=forward_ref.__forward_is_class__) + if owner is None: + owner = forward_ref.__owner__ return _eval_type( - type_, + value, globals, locals, type_params, @@ -2338,12 +2335,12 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False, # This only affects ForwardRefs. base_globals, base_locals = base_locals, base_globals for name, value in ann.items(): - if value is None: - value = type(None) if isinstance(value, str): value = _make_forward_ref(value, is_argument=False, is_class=True) value = _eval_type(value, base_globals, base_locals, base.__type_params__, format=format, owner=obj) + if value is None: + value = type(None) hints[name] = value if include_extras or format == Format.STRING: return hints @@ -2377,8 +2374,6 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False, localns = globalns type_params = getattr(obj, "__type_params__", ()) for name, value in hints.items(): - if value is None: - value = type(None) if isinstance(value, str): # class-level forward refs were handled above, this must be either # a module-level annotation or a function argument annotation @@ -2387,7 +2382,10 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False, is_argument=not isinstance(obj, types.ModuleType), is_class=False, ) - hints[name] = _eval_type(value, globalns, localns, type_params, format=format, owner=obj) + value = _eval_type(value, globalns, localns, type_params, format=format, owner=obj) + if value is None: + value = type(None) + hints[name] = value return hints if include_extras else {k: _strip_annotations(t) for k, t in hints.items()} diff --git a/Misc/NEWS.d/next/Library/2025-05-12-20-38-57.gh-issue-133960.Aee79f.rst b/Misc/NEWS.d/next/Library/2025-05-12-20-38-57.gh-issue-133960.Aee79f.rst new file mode 100644 index 00000000000000..66e8483b25bc37 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-12-20-38-57.gh-issue-133960.Aee79f.rst @@ -0,0 +1,3 @@ +Simplify and improve :func:`typing.evaluate_forward_ref`. It now no longer +raises errors on certain invalid types. In several situations, it is now +able to evaluate forward references that were previously unsupported. From 1000283694136ee0538baa6c6b2eee662ee618d4 Mon Sep 17 00:00:00 2001 From: Chris Eibl <138194463+chris-eibl@users.noreply.github.com> Date: Sun, 25 May 2025 20:17:13 +0200 Subject: [PATCH 338/989] GH-130328: Fix WindowsConsoleGetEventTests after gh-133728 (gh-134660) --- Lib/test/test_pyrepl/test_windows_console.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index a52ae96a83ddde..f9607e02c604ff 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -386,6 +386,7 @@ def get_event(self, input_records, **kwargs) -> Console: self.console._read_input = self.mock self.console._WindowsConsole__vt_support = kwargs.get("vt_support", False) + self.console.wait = MagicMock(return_value=True) event = self.console.get_event(block=False) return event From 24a47155d2172966fab7d56f2bf9181056fba8d0 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sun, 25 May 2025 21:22:52 +0200 Subject: [PATCH 339/989] Fix sphinx-lint warnings (default-role used). (GH-134647) --- Doc/c-api/code.rst | 2 +- Doc/c-api/function.rst | 2 +- Doc/c-api/typeobj.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst index 6eae24b38fae48..42594f063b0709 100644 --- a/Doc/c-api/code.rst +++ b/Doc/c-api/code.rst @@ -182,7 +182,7 @@ bound into a function. Type of a code object watcher callback function. If *event* is ``PY_CODE_EVENT_CREATE``, then the callback is invoked - after `co` has been fully initialized. Otherwise, the callback is invoked + after *co* has been fully initialized. Otherwise, the callback is invoked before the destruction of *co* takes place, so the prior state of *co* can be inspected. diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst index 58792edeed25e3..63b78f677674e9 100644 --- a/Doc/c-api/function.rst +++ b/Doc/c-api/function.rst @@ -169,7 +169,7 @@ There are a few functions specific to Python functions. unpredictable effects, including infinite recursion. If *event* is ``PyFunction_EVENT_CREATE``, then the callback is invoked - after `func` has been fully initialized. Otherwise, the callback is invoked + after *func* has been fully initialized. Otherwise, the callback is invoked before the modification to *func* takes place, so the prior state of *func* can be inspected. The runtime is permitted to optimize away the creation of function objects when possible. In such cases no event will be emitted. diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 5df0c0fe608e53..91046c0e6f18ae 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -1238,7 +1238,7 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:macro:: Py_TPFLAGS_MANAGED_DICT - This bit indicates that instances of the class have a `~object.__dict__` + This bit indicates that instances of the class have a :attr:`~object.__dict__` attribute, and that the space for the dictionary is managed by the VM. If this flag is set, :c:macro:`Py_TPFLAGS_HAVE_GC` should also be set. From 328a778db8cc6ecadf0964a8e6a1834078b2d0d3 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sun, 25 May 2025 23:09:02 +0300 Subject: [PATCH 340/989] gh-134357: Remove unused imports in tests (#134340) --- Lib/test/.ruff.toml | 7 +++++++ Lib/test/support/__init__.py | 3 +-- Lib/test/support/interpreters/channels.py | 4 ++-- Lib/test/support/interpreters/queues.py | 1 - Lib/test/test_capi/test_config.py | 1 - Lib/test/test_codeccallbacks.py | 1 - Lib/test/test_crossinterp.py | 2 -- Lib/test/test_ctypes/_support.py | 1 - Lib/test/test_ctypes/test_byteswap.py | 1 - Lib/test/test_ctypes/test_generated_structs.py | 2 +- Lib/test/test_decimal.py | 1 - Lib/test/test_generated_cases.py | 5 +---- Lib/test/test_genericpath.py | 2 +- Lib/test/test_gzip.py | 1 - Lib/test/test_hashlib.py | 1 - Lib/test/test_hmac.py | 1 - Lib/test/test_idle.py | 2 +- Lib/test/test_interpreters/test_queues.py | 1 - Lib/test/test_interpreters/utils.py | 1 - Lib/test/test_ntpath.py | 3 +-- Lib/test/test_peepholer.py | 2 +- Lib/test/test_pty.py | 1 - Lib/test/test_pydoc/test_pydoc.py | 2 +- Lib/test/test_remote_pdb.py | 9 ++------- Lib/test/test_shutil.py | 2 +- Lib/test/test_string/_support.py | 1 - Lib/test/test_sysconfig.py | 1 - Lib/test/test_threading.py | 2 +- Lib/test/test_tools/i18n_data/docstrings.py | 2 +- Lib/test/test_types.py | 2 +- Lib/test/test_typing.py | 3 +-- Lib/test/test_venv.py | 2 +- Lib/test/test_webbrowser.py | 1 - Lib/test/test_zipfile/__main__.py | 2 +- Lib/test/test_zstd.py | 1 - 35 files changed, 26 insertions(+), 48 deletions(-) diff --git a/Lib/test/.ruff.toml b/Lib/test/.ruff.toml index 7aa8a4785d6844..f1a967203ce4ba 100644 --- a/Lib/test/.ruff.toml +++ b/Lib/test/.ruff.toml @@ -19,5 +19,12 @@ extend-exclude = [ [lint] select = [ + "F401", # Unused import "F811", # Redefinition of unused variable (useful for finding test methods with the same name) ] + +[lint.per-file-ignores] +"*/**/__main__.py" = ["F401"] # Unused import +"test_import/*.py" = ["F401"] # Unused import +"test_importlib/*.py" = ["F401"] # Unused import +"typinganndata/partialexecution/*.py" = ["F401"] # Unused import diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index b7cd7940eb15b3..351d832a26d1df 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1101,7 +1101,6 @@ def __init__(self): self.started = False def start(self): - import warnings try: f = open(self.procfile, 'r') except OSError as e: @@ -2728,7 +2727,7 @@ def iter_builtin_types(): # Fall back to making a best-effort guess. if hasattr(object, '__flags__'): # Look for any type object with the Py_TPFLAGS_STATIC_BUILTIN flag set. - import datetime + import datetime # noqa: F401 seen = set() for cls, subs in walk_class_hierarchy(object): if cls in seen: diff --git a/Lib/test/support/interpreters/channels.py b/Lib/test/support/interpreters/channels.py index 7a2bd7d63f808f..b25a17b1aabb93 100644 --- a/Lib/test/support/interpreters/channels.py +++ b/Lib/test/support/interpreters/channels.py @@ -6,8 +6,8 @@ # aliases: from _interpchannels import ( - ChannelError, ChannelNotFoundError, ChannelClosedError, - ChannelEmptyError, ChannelNotEmptyError, + ChannelError, ChannelNotFoundError, ChannelClosedError, # noqa: F401 + ChannelEmptyError, ChannelNotEmptyError, # noqa: F401 ) from ._crossinterp import ( UNBOUND_ERROR, UNBOUND_REMOVE, diff --git a/Lib/test/support/interpreters/queues.py b/Lib/test/support/interpreters/queues.py index d6a3197d9e0e26..99987f2f6926b0 100644 --- a/Lib/test/support/interpreters/queues.py +++ b/Lib/test/support/interpreters/queues.py @@ -1,6 +1,5 @@ """Cross-interpreter Queues High Level Module.""" -import pickle import queue import time import weakref diff --git a/Lib/test/test_capi/test_config.py b/Lib/test/test_capi/test_config.py index a2d70dd3af482d..04a27de8d84994 100644 --- a/Lib/test/test_capi/test_config.py +++ b/Lib/test/test_capi/test_config.py @@ -3,7 +3,6 @@ """ import os import sys -import sysconfig import types import unittest from test import support diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index a767f67a02cf56..65d54d1004d647 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -2,7 +2,6 @@ import codecs import html.entities import itertools -import re import sys import unicodedata import unittest diff --git a/Lib/test/test_crossinterp.py b/Lib/test/test_crossinterp.py index c54635eaeab3f9..2fa0077a09bbbb 100644 --- a/Lib/test/test_crossinterp.py +++ b/Lib/test/test_crossinterp.py @@ -1,6 +1,4 @@ import contextlib -import importlib -import importlib.util import itertools import sys import types diff --git a/Lib/test/test_ctypes/_support.py b/Lib/test/test_ctypes/_support.py index 946d654a19aff8..700657a4e41f74 100644 --- a/Lib/test/test_ctypes/_support.py +++ b/Lib/test/test_ctypes/_support.py @@ -3,7 +3,6 @@ import ctypes from _ctypes import Structure, Union, _Pointer, Array, _SimpleCData, CFuncPtr import sys -from test import support _CData = Structure.__base__ diff --git a/Lib/test/test_ctypes/test_byteswap.py b/Lib/test/test_ctypes/test_byteswap.py index ea5951603f9324..f14e1aa32e17ab 100644 --- a/Lib/test/test_ctypes/test_byteswap.py +++ b/Lib/test/test_ctypes/test_byteswap.py @@ -1,5 +1,4 @@ import binascii -import ctypes import math import struct import sys diff --git a/Lib/test/test_ctypes/test_generated_structs.py b/Lib/test/test_ctypes/test_generated_structs.py index aa448fad5bbae6..1cb46a82701553 100644 --- a/Lib/test/test_ctypes/test_generated_structs.py +++ b/Lib/test/test_ctypes/test_generated_structs.py @@ -10,7 +10,7 @@ """ import unittest -from test.support import import_helper, verbose +from test.support import import_helper import re from dataclasses import dataclass from functools import cached_property diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 9e298401dc3dcc..c0a1e378583ba8 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -28,7 +28,6 @@ import math import os, sys import operator -import warnings import pickle, copy import unittest import numbers diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index a71ddc01d1c045..37046d8e1c02b7 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1,11 +1,9 @@ import contextlib import os -import re import sys import tempfile import unittest -from io import StringIO from test import support from test import test_tools @@ -31,12 +29,11 @@ def skip_if_different_mount_drives(): test_tools.skip_if_missing("cases_generator") with test_tools.imports_under_tool("cases_generator"): - from analyzer import analyze_forest, StackItem + from analyzer import StackItem from cwriter import CWriter import parser from stack import Local, Stack import tier1_generator - import opcode_metadata_generator import optimizer_generator diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py index df07af01fc7540..16c3268fefb034 100644 --- a/Lib/test/test_genericpath.py +++ b/Lib/test/test_genericpath.py @@ -8,7 +8,7 @@ import unittest import warnings from test.support import ( - is_apple, is_emscripten, os_helper, warnings_helper + is_apple, os_helper, warnings_helper ) from test.support.script_helper import assert_python_ok from test.support.os_helper import FakePath diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index ccbacc7c19b6e6..a12ff5662a73db 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -9,7 +9,6 @@ import struct import sys import unittest -import warnings from subprocess import PIPE, Popen from test.support import catch_unraisable_exception from test.support import import_helper diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index de4c8a1670f591..161c7652d7ab11 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -17,7 +17,6 @@ import tempfile import threading import unittest -import warnings from test import support from test.support import _4G, bigmemtest from test.support import hashlib_helper diff --git a/Lib/test/test_hmac.py b/Lib/test/test_hmac.py index e898644dd8a552..ff6e1bce0ef801 100644 --- a/Lib/test/test_hmac.py +++ b/Lib/test/test_hmac.py @@ -21,7 +21,6 @@ import hmac import hashlib import random -import test.support import test.support.hashlib_helper as hashlib_helper import types import unittest diff --git a/Lib/test/test_idle.py b/Lib/test/test_idle.py index 3d8b7ecc0ecb6d..ebf572ac5caac1 100644 --- a/Lib/test/test_idle.py +++ b/Lib/test/test_idle.py @@ -16,7 +16,7 @@ # Unittest.main and test.libregrtest.runtest.runtest_inner # call load_tests, when present here, to discover tests to run. -from idlelib.idle_test import load_tests +from idlelib.idle_test import load_tests # noqa: F401 if __name__ == '__main__': tk.NoDefaultRoot() diff --git a/Lib/test/test_interpreters/test_queues.py b/Lib/test/test_interpreters/test_queues.py index 64a2db1230d023..757373904d7a43 100644 --- a/Lib/test/test_interpreters/test_queues.py +++ b/Lib/test/test_interpreters/test_queues.py @@ -9,7 +9,6 @@ _queues = import_helper.import_module('_interpqueues') from test.support import interpreters from test.support.interpreters import queues, _crossinterp -import test._crossinterp_definitions as defs from .utils import _run_output, TestBase as _TestBase diff --git a/Lib/test/test_interpreters/utils.py b/Lib/test/test_interpreters/utils.py index fc4ad662e03b66..c25e0fb7475e7e 100644 --- a/Lib/test/test_interpreters/utils.py +++ b/Lib/test/test_interpreters/utils.py @@ -12,7 +12,6 @@ import threading import types import unittest -import warnings from test import support diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index f83ef225a6e48e..c3b0bdaebc2329 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -6,8 +6,7 @@ import sys import unittest import warnings -from test.support import cpython_only, os_helper -from test.support import TestFailed, is_emscripten +from test.support import TestFailed, cpython_only, os_helper from test.support.os_helper import FakePath from test import test_genericpath from tempfile import TemporaryFile diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 0a9ba578673b39..f33de3d420ca34 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -12,7 +12,7 @@ from test import support from test.support.bytecode_helper import ( - BytecodeTestCase, CfgOptimizationTestCase, CompilationStepTestCase) + BytecodeTestCase, CfgOptimizationTestCase) def compile_pattern_with_fast_locals(pattern): diff --git a/Lib/test/test_pty.py b/Lib/test/test_pty.py index c1728f5019d042..4836f38c388c05 100644 --- a/Lib/test/test_pty.py +++ b/Lib/test/test_pty.py @@ -20,7 +20,6 @@ import signal import socket import io # readline -import warnings TEST_STRING_1 = b"I wish to buy a fish license.\n" TEST_STRING_2 = b"For my pet fish, Eric.\n" diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index 281b24eaa36b80..d1d6f4987def0c 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -553,7 +553,7 @@ class object # of the known subclasses of object. (doc.docclass() used to # fail if HeapType was imported before running this test, like # when running tests sequentially.) - from _testcapi import HeapType + from _testcapi import HeapType # noqa: F401 except ImportError: pass text = doc.docclass(object) diff --git a/Lib/test/test_remote_pdb.py b/Lib/test/test_remote_pdb.py index aef8a6b0129092..a1c50af15f3dd2 100644 --- a/Lib/test/test_remote_pdb.py +++ b/Lib/test/test_remote_pdb.py @@ -1,5 +1,4 @@ import io -import time import itertools import json import os @@ -8,16 +7,13 @@ import socket import subprocess import sys -import tempfile import textwrap -import threading import unittest import unittest.mock from contextlib import closing, contextmanager, redirect_stdout, redirect_stderr, ExitStack -from pathlib import Path from test.support import is_wasi, cpython_only, force_color, requires_subprocess, SHORT_TIMEOUT -from test.support.os_helper import temp_dir, TESTFN, unlink -from typing import Dict, List, Optional, Tuple, Union, Any +from test.support.os_helper import TESTFN, unlink +from typing import List import pdb from pdb import _PdbServer, _PdbClient @@ -1434,7 +1430,6 @@ def test_multi_line_commands(self): def _supports_remote_attaching(): - from contextlib import suppress PROCESS_VM_READV_SUPPORTED = False try: diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 62c80aab4b3305..ebb6cf88336249 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3492,7 +3492,7 @@ def test_module_all_attribute(self): target_api.append('disk_usage') self.assertEqual(set(shutil.__all__), set(target_api)) with self.assertWarns(DeprecationWarning): - from shutil import ExecError + from shutil import ExecError # noqa: F401 if __name__ == '__main__': diff --git a/Lib/test/test_string/_support.py b/Lib/test/test_string/_support.py index eaa3354a559246..abdddaf187b4fe 100644 --- a/Lib/test/test_string/_support.py +++ b/Lib/test/test_string/_support.py @@ -1,4 +1,3 @@ -import unittest from string.templatelib import Interpolation diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index d30f69ded6643a..2c0df9376abfc6 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -32,7 +32,6 @@ from sysconfig.__main__ import _main, _parse_makefile, _get_pybuilddir, _get_json_data_name import _imp import _osx_support -import _sysconfig HAS_USER_BASE = sysconfig._HAS_USER_BASE diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 0e51e7fc8c5a76..59b3a749d2fffa 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1253,7 +1253,7 @@ def test_start_new_thread_failed(self): # its state should be removed from interpreter' thread states list # to avoid its double cleanup try: - from resource import setrlimit, RLIMIT_NPROC + from resource import setrlimit, RLIMIT_NPROC # noqa: F401 except ImportError as err: self.skipTest(err) # RLIMIT_NPROC is specific to Linux and BSD code = """if 1: diff --git a/Lib/test/test_tools/i18n_data/docstrings.py b/Lib/test/test_tools/i18n_data/docstrings.py index 151a55a4b56ba6..14559a632da158 100644 --- a/Lib/test/test_tools/i18n_data/docstrings.py +++ b/Lib/test/test_tools/i18n_data/docstrings.py @@ -1,7 +1,7 @@ """Module docstring""" # Test docstring extraction -from gettext import gettext as _ +from gettext import gettext as _ # noqa: F401 # Empty docstring diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 3097c7ddf05901..9011e0e1962820 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -2516,7 +2516,7 @@ def setUpClass(cls): from test.support import interpreters except ModuleNotFoundError: raise unittest.SkipTest('subinterpreters required') - import test.support.interpreters.channels + import test.support.interpreters.channels # noqa: F401 @cpython_only @no_rerun('channels (and queues) might have a refleak; see gh-122199') diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index e2b6f459aa24fc..ef02e8202fc829 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -46,11 +46,10 @@ import textwrap import typing import weakref -import warnings import types from test.support import ( - captured_stderr, cpython_only, infinite_recursion, requires_docstrings, import_helper, run_code, + captured_stderr, cpython_only, requires_docstrings, import_helper, run_code, EqualToForwardRef, ) from test.typinganndata import ( diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py index 12c30e178aeb51..d62f3fba2d1a94 100644 --- a/Lib/test/test_venv.py +++ b/Lib/test/test_venv.py @@ -1008,7 +1008,7 @@ def do_test_with_pip(self, system_site_packages): err, flags=re.MULTILINE) # Ignore warning about missing optional module: try: - import ssl + import ssl # noqa: F401 except ImportError: err = re.sub( "^WARNING: Disabling truststore since ssl support is missing$", diff --git a/Lib/test/test_webbrowser.py b/Lib/test/test_webbrowser.py index 4c3ea1cd8df13e..6b577ae100e419 100644 --- a/Lib/test/test_webbrowser.py +++ b/Lib/test/test_webbrowser.py @@ -6,7 +6,6 @@ import sys import unittest import webbrowser -from functools import partial from test import support from test.support import import_helper from test.support import is_apple_mobile diff --git a/Lib/test/test_zipfile/__main__.py b/Lib/test/test_zipfile/__main__.py index e25ac946edffe4..90da74ade38c69 100644 --- a/Lib/test/test_zipfile/__main__.py +++ b/Lib/test/test_zipfile/__main__.py @@ -1,6 +1,6 @@ import unittest -from . import load_tests # noqa: F401 +from . import load_tests if __name__ == "__main__": diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 34c7c721b1ad32..bc809603cbc629 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -12,7 +12,6 @@ from test.support.import_helper import import_module from test.support import threading_helper from test.support import _1M -from test.support import Py_GIL_DISABLED _zstd = import_module("_zstd") zstd = import_module("compression.zstd") From 42b25ad4d3d6bcdc28ddfe07d2bf8831378bb0d1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 25 May 2025 21:19:29 +0100 Subject: [PATCH 341/989] gh-91048: Refactor and optimize remote debugging module (#134652) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completely refactor Modules/_remote_debugging_module.c with improved code organization, replacing scattered reference counting and error handling with centralized goto error paths. This cleanup improves maintainability and reduces code duplication throughout the module while preserving the same external API. Implement memory page caching optimization in Python/remote_debug.h to avoid repeated reads of the same memory regions during debugging operations. The cache stores previously read memory pages and reuses them for subsequent reads, significantly reducing system calls and improving performance. Add code object caching mechanism with a new code_object_generation field in the interpreter state that tracks when code object caches need invalidation. This allows efficient reuse of parsed code object metadata and eliminates redundant processing of the same code objects across debugging sessions. Optimize memory operations by replacing multiple individual structure copies with single bulk reads for the same data structures. This reduces the number of memory operations and system calls required to gather debugging information from the target process. Update Makefile.pre.in to include Python/remote_debug.h in the headers list, ensuring that changes to the remote debugging header force proper recompilation of dependent modules and maintain build consistency across the codebase. Also, make the module compatible with the free threading build as an extra :) Co-authored-by: Łukasz Langa --- Include/cpython/pystate.h | 2 + Include/internal/pycore_debug_offsets.h | 15 + .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_interp_structs.h | 6 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 + Lib/asyncio/tools.py | 8 +- Lib/test/test_external_inspection.py | 82 +- Makefile.pre.in | 1 + Modules/_remote_debugging_module.c | 2951 +++++++++++------ Modules/clinic/_remote_debugging_module.c.h | 243 ++ Objects/codeobject.c | 2 + Python/index_pool.c | 4 + Python/pystate.c | 10 +- Python/remote_debug.h | 117 +- 16 files changed, 2390 insertions(+), 1058 deletions(-) create mode 100644 Modules/clinic/_remote_debugging_module.c.h diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 7f1bc363861ddf..54d7e62292966e 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -61,6 +61,8 @@ typedef struct _stack_chunk { PyObject * data[1]; /* Variable sized */ } _PyStackChunk; +/* Minimum size of data stack chunk */ +#define _PY_DATA_STACK_CHUNK_SIZE (16*1024) struct _ts { /* See Python/ceval.c for comments explaining most fields */ diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 1a265c59ff8c08..ce3fcb109f49f7 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -54,11 +54,13 @@ extern "C" { # define _Py_Debug_Free_Threaded 1 # define _Py_Debug_code_object_co_tlbc offsetof(PyCodeObject, co_tlbc) # define _Py_Debug_interpreter_frame_tlbc_index offsetof(_PyInterpreterFrame, tlbc_index) +# define _Py_Debug_interpreter_state_tlbc_generation offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) #else # define _Py_Debug_gilruntimestate_enabled 0 # define _Py_Debug_Free_Threaded 0 # define _Py_Debug_code_object_co_tlbc 0 # define _Py_Debug_interpreter_frame_tlbc_index 0 +# define _Py_Debug_interpreter_state_tlbc_generation 0 #endif @@ -89,6 +91,8 @@ typedef struct _Py_DebugOffsets { uint64_t gil_runtime_state_enabled; uint64_t gil_runtime_state_locked; uint64_t gil_runtime_state_holder; + uint64_t code_object_generation; + uint64_t tlbc_generation; } interpreter_state; // Thread state offset; @@ -216,6 +220,11 @@ typedef struct _Py_DebugOffsets { uint64_t gi_frame_state; } gen_object; + struct _llist_node { + uint64_t next; + uint64_t prev; + } llist_node; + struct _debugger_support { uint64_t eval_breaker; uint64_t remote_debugger_support; @@ -251,6 +260,8 @@ typedef struct _Py_DebugOffsets { .gil_runtime_state_enabled = _Py_Debug_gilruntimestate_enabled, \ .gil_runtime_state_locked = offsetof(PyInterpreterState, _gil.locked), \ .gil_runtime_state_holder = offsetof(PyInterpreterState, _gil.last_holder), \ + .code_object_generation = offsetof(PyInterpreterState, _code_object_generation), \ + .tlbc_generation = _Py_Debug_interpreter_state_tlbc_generation, \ }, \ .thread_state = { \ .size = sizeof(PyThreadState), \ @@ -347,6 +358,10 @@ typedef struct _Py_DebugOffsets { .gi_iframe = offsetof(PyGenObject, gi_iframe), \ .gi_frame_state = offsetof(PyGenObject, gi_frame_state), \ }, \ + .llist_node = { \ + .next = offsetof(struct llist_node, next), \ + .prev = offsetof(struct llist_node, prev), \ + }, \ .debugger_support = { \ .eval_breaker = offsetof(PyThreadState, eval_breaker), \ .remote_debugger_support = offsetof(PyThreadState, remote_debugger_support), \ diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index d896e870630418..356bcaa7c350a1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -795,6 +795,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(align)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all_threads)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(allow_code)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(any)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(append)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index a06d7495bab8e7..aebe798031ce4f 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -286,6 +286,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(alias) STRUCT_FOR_ID(align) STRUCT_FOR_ID(all) + STRUCT_FOR_ID(all_threads) STRUCT_FOR_ID(allow_code) STRUCT_FOR_ID(any) STRUCT_FOR_ID(append) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index c3e6c77405bfe7..8a29c533b99058 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -726,6 +726,10 @@ typedef struct _PyIndexPool { // Next index to allocate if no free indices are available int32_t next_index; + + // Generation counter incremented on thread creation/destruction + // Used for TLBC cache invalidation in remote debugging + uint32_t tlbc_generation; } _PyIndexPool; typedef union _Py_unique_id_entry { @@ -843,6 +847,8 @@ struct _is { /* The per-interpreter GIL, which might not be used. */ struct _gil_runtime_state _gil; + uint64_t _code_object_generation; + /* ---------- IMPORTANT --------------------------- The fields above this line are declared as early as possible to facilitate out-of-process observability diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 83301d8aef7697..0fa1fa5af99a92 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -793,6 +793,7 @@ extern "C" { INIT_ID(alias), \ INIT_ID(align), \ INIT_ID(all), \ + INIT_ID(all_threads), \ INIT_ID(allow_code), \ INIT_ID(any), \ INIT_ID(append), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index c0f5f2b17f6609..4982c4532afd89 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -932,6 +932,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(all_threads); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(allow_code); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py index b2da7d2f6ba10c..3fc4524c008db6 100644 --- a/Lib/asyncio/tools.py +++ b/Lib/asyncio/tools.py @@ -1,11 +1,10 @@ """Tools to analyze tasks running in asyncio programs.""" -from dataclasses import dataclass from collections import defaultdict from itertools import count from enum import Enum import sys -from _remote_debugging import get_all_awaited_by +from _remote_debugging import RemoteUnwinder class NodeType(Enum): @@ -118,6 +117,11 @@ def dfs(v): # ─── PRINT TREE FUNCTION ─────────────────────────────────────── +def get_all_awaited_by(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_all_awaited_by() + + def build_async_tree(result, task_emoji="(T)", cor_emoji=""): """ Build a list of strings for pretty-print an async call tree. diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index ad3f669a03043e..291c419066ac5b 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -4,6 +4,7 @@ import importlib import sys import socket +import threading from asyncio import staggered, taskgroups from unittest.mock import ANY from test.support import os_helper, SHORT_TIMEOUT, busy_retry @@ -16,9 +17,7 @@ try: from _remote_debugging import PROCESS_VM_READV_SUPPORTED - from _remote_debugging import get_stack_trace - from _remote_debugging import get_async_stack_trace - from _remote_debugging import get_all_awaited_by + from _remote_debugging import RemoteUnwinder except ImportError: raise unittest.SkipTest("Test only runs when _remote_debugging is available") @@ -34,7 +33,23 @@ def _make_test_script(script_dir, script_basename, source): ) +def get_stack_trace(pid): + unwinder = RemoteUnwinder(pid, all_threads=True) + return unwinder.get_stack_trace() + + +def get_async_stack_trace(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_async_stack_trace() + + +def get_all_awaited_by(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_all_awaited_by() + + class TestGetStackTrace(unittest.TestCase): + maxDiff = None @skip_if_not_supported @unittest.skipIf( @@ -46,7 +61,7 @@ def test_remote_stack_trace(self): port = find_unused_port() script = textwrap.dedent( f"""\ - import time, sys, socket + import time, sys, socket, threading # Connect to the test process sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(('localhost', {port})) @@ -55,13 +70,16 @@ def bar(): for x in range(100): if x == 50: baz() + def baz(): foo() def foo(): - sock.sendall(b"ready"); time.sleep(10_000) # same line number + sock.sendall(b"ready:thread\\n"); time.sleep(10_000) # same line number - bar() + t = threading.Thread(target=bar) + t.start() + sock.sendall(b"ready:main\\n"); t.join() # same line number """ ) stack_trace = None @@ -82,8 +100,9 @@ def foo(): p = subprocess.Popen([sys.executable, script_name]) client_socket, _ = server_socket.accept() server_socket.close() - response = client_socket.recv(1024) - self.assertEqual(response, b"ready") + response = b"" + while b"ready:main" not in response or b"ready:thread" not in response: + response += client_socket.recv(1024) stack_trace = get_stack_trace(p.pid) except PermissionError: self.skipTest("Insufficient permissions to read the stack trace") @@ -94,13 +113,23 @@ def foo(): p.terminate() p.wait(timeout=SHORT_TIMEOUT) - expected_stack_trace = [ - ("foo", script_name, 14), - ("baz", script_name, 11), + thread_expected_stack_trace = [ + ("foo", script_name, 15), + ("baz", script_name, 12), ("bar", script_name, 9), - ("", script_name, 16), + ('Thread.run', threading.__file__, ANY) ] - self.assertEqual(stack_trace, expected_stack_trace) + # Is possible that there are more threads, so we check that the + # expected stack traces are in the result (looking at you Windows!) + self.assertIn((ANY, thread_expected_stack_trace), stack_trace) + + # Check that the main thread stack trace is in the result + frame = ("", script_name, 19) + for _, stack in stack_trace: + if frame in stack: + break + else: + self.fail("Main thread stack trace not found in result") @skip_if_not_supported @unittest.skipIf( @@ -700,13 +729,28 @@ async def main(): ) def test_self_trace(self): stack_trace = get_stack_trace(os.getpid()) + # Is possible that there are more threads, so we check that the + # expected stack traces are in the result (looking at you Windows!) + this_tread_stack = None + for thread_id, stack in stack_trace: + if thread_id == threading.get_native_id(): + this_tread_stack = stack + break + self.assertIsNotNone(this_tread_stack) self.assertEqual( - stack_trace[0], - ( - "TestGetStackTrace.test_self_trace", - __file__, - self.test_self_trace.__code__.co_firstlineno + 6, - ), + stack[:2], + [ + ( + "get_stack_trace", + __file__, + get_stack_trace.__code__.co_firstlineno + 2, + ), + ( + "TestGetStackTrace.test_self_trace", + __file__, + self.test_self_trace.__code__.co_firstlineno + 6, + ), + ] ) diff --git a/Makefile.pre.in b/Makefile.pre.in index 3ab7c3d6c48ad9..b5703fbe6ae974 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1206,6 +1206,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/unicodeobject.h \ $(srcdir)/Include/warnings.h \ $(srcdir)/Include/weakrefobject.h \ + $(srcdir)/Python/remote_debug.h \ \ pyconfig.h \ $(PARSER_HEADERS) \ diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 8c0f40f835c36e..a13cbd63ad3bd8 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -1,5 +1,16 @@ +/****************************************************************************** + * Python Remote Debugging Module + * + * This module provides functionality to debug Python processes remotely by + * reading their memory and reconstructing stack traces and asyncio task states. + ******************************************************************************/ + #define _GNU_SOURCE +/* ============================================================================ + * HEADERS AND INCLUDES + * ============================================================================ */ + #include #include #include @@ -23,6 +34,47 @@ # define HAVE_PROCESS_VM_READV 0 #endif +/* ============================================================================ + * TYPE DEFINITIONS AND STRUCTURES + * ============================================================================ */ + +#define GET_MEMBER(type, obj, offset) (*(type*)((char*)(obj) + (offset))) + +/* Size macros for opaque buffers */ +#define SIZEOF_BYTES_OBJ sizeof(PyBytesObject) +#define SIZEOF_CODE_OBJ sizeof(PyCodeObject) +#define SIZEOF_GEN_OBJ sizeof(PyGenObject) +#define SIZEOF_INTERP_FRAME sizeof(_PyInterpreterFrame) +#define SIZEOF_LLIST_NODE sizeof(struct llist_node) +#define SIZEOF_PAGE_CACHE_ENTRY sizeof(page_cache_entry_t) +#define SIZEOF_PYOBJECT sizeof(PyObject) +#define SIZEOF_SET_OBJ sizeof(PySetObject) +#define SIZEOF_TASK_OBJ 4096 +#define SIZEOF_THREAD_STATE sizeof(PyThreadState) +#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject) +#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject) +#define SIZEOF_LONG_OBJ sizeof(PyLongObject) + +// Calculate the minimum buffer size needed to read interpreter state fields +// We need to read code_object_generation and potentially tlbc_generation +#ifndef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#ifdef Py_GIL_DISABLED +#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)) +#else +#define INTERP_STATE_MIN_SIZE MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)) +#endif +#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256) + + + +// Copied from Modules/_asynciomodule.c because it's not exported + struct _Py_AsyncioModuleDebugOffsets { struct _asyncio_task_object { uint64_t size; @@ -45,6 +97,127 @@ struct _Py_AsyncioModuleDebugOffsets { } asyncio_thread_state; }; +typedef struct { + PyObject_HEAD + proc_handle_t handle; + uintptr_t runtime_start_address; + struct _Py_DebugOffsets debug_offsets; + int async_debug_offsets_available; + struct _Py_AsyncioModuleDebugOffsets async_debug_offsets; + uintptr_t interpreter_addr; + uintptr_t tstate_addr; + uint64_t code_object_generation; + _Py_hashtable_t *code_object_cache; +#ifdef Py_GIL_DISABLED + // TLBC cache invalidation tracking + uint32_t tlbc_generation; // Track TLBC index pool changes + _Py_hashtable_t *tlbc_cache; // Cache of TLBC arrays by code object address +#endif +} RemoteUnwinderObject; + +typedef struct { + PyObject *func_name; + PyObject *file_name; + int first_lineno; + PyObject *linetable; // bytes + uintptr_t addr_code_adaptive; +} CachedCodeMetadata; + +typedef struct { + /* Types */ + PyTypeObject *RemoteDebugging_Type; +} RemoteDebuggingState; + +typedef struct +{ + int lineno; + int end_lineno; + int column; + int end_column; +} LocationInfo; + +typedef struct { + uintptr_t remote_addr; + size_t size; + void *local_copy; +} StackChunkInfo; + +typedef struct { + StackChunkInfo *chunks; + size_t count; +} StackChunkList; + +#include "clinic/_remote_debugging_module.c.h" + +/*[clinic input] +module _remote_debugging +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=5f507d5b2e76a7f7]*/ + + +/* ============================================================================ + * FORWARD DECLARATIONS + * ============================================================================ */ + +static int +parse_tasks_in_set( + RemoteUnwinderObject *unwinder, + uintptr_t set_addr, + PyObject *awaited_by, + int recurse_task +); + +static int +parse_task( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *render_to, + int recurse_task +); + +static int +parse_coro_chain( + RemoteUnwinderObject *unwinder, + uintptr_t coro_address, + PyObject *render_to +); + +/* Forward declarations for task parsing functions */ +static int parse_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* previous_frame +); + +/* ============================================================================ + * UTILITY FUNCTIONS AND HELPERS + * ============================================================================ */ + +static void +cached_code_metadata_destroy(void *ptr) +{ + CachedCodeMetadata *meta = (CachedCodeMetadata *)ptr; + Py_DECREF(meta->func_name); + Py_DECREF(meta->file_name); + Py_DECREF(meta->linetable); + PyMem_RawFree(meta); +} + +static inline RemoteDebuggingState * +RemoteDebugging_GetState(PyObject *module) +{ + void *state = _PyModule_GetState(module); + assert(state != NULL); + return (RemoteDebuggingState *)state; +} + +static inline int +RemoteDebugging_InitState(RemoteDebuggingState *st) +{ + return 0; +} + // Helper to chain exceptions and avoid repetitions static void chain_exceptions(PyObject *exception, const char *string) @@ -54,36 +227,14 @@ chain_exceptions(PyObject *exception, const char *string) _PyErr_ChainExceptions1(exc); } -// Get the PyAsyncioDebug section address for any platform -static uintptr_t -_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) -{ - uintptr_t address; - -#ifdef MS_WINDOWS - // On Windows, search for asyncio debug in executable or DLL - address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio"); -#elif defined(__linux__) - // On Linux, search for asyncio debug in executable or DLL - address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); -#elif defined(__APPLE__) && TARGET_OS_OSX - // On macOS, try libpython first, then fall back to python - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); - if (address == 0) { - PyErr_Clear(); - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); - } -#else - Py_UNREACHABLE(); -#endif - - return address; -} +/* ============================================================================ + * MEMORY READING FUNCTIONS + * ============================================================================ */ static inline int read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) { - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void*), ptr_addr); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(void*), ptr_addr); if (result < 0) { return -1; } @@ -93,7 +244,7 @@ read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) static inline int read_Py_ssize_t(proc_handle_t *handle, uintptr_t address, Py_ssize_t *size) { - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size); if (result < 0) { return -1; } @@ -113,72 +264,53 @@ read_py_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) static int read_char(proc_handle_t *handle, uintptr_t address, char *result) { - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(char), result); - if (res < 0) { - return -1; - } - return 0; -} - -static int -read_sized_int(proc_handle_t *handle, uintptr_t address, void *result, size_t size) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, size, result); - if (res < 0) { - return -1; - } - return 0; -} - -static int -read_unsigned_long(proc_handle_t *handle, uintptr_t address, unsigned long *result) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(unsigned long), result); + int res = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(char), result); if (res < 0) { return -1; } return 0; } -static int -read_pyobj(proc_handle_t *handle, uintptr_t address, PyObject *ptr_addr) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(PyObject), ptr_addr); - if (res < 0) { - return -1; - } - return 0; -} +/* ============================================================================ + * PYTHON OBJECT READING FUNCTIONS + * ============================================================================ */ static PyObject * read_py_str( - proc_handle_t *handle, - _Py_DebugOffsets* debug_offsets, + RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t max_len ) { PyObject *result = NULL; char *buf = NULL; - Py_ssize_t len; - int res = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + debug_offsets->unicode_object.length, - sizeof(Py_ssize_t), - &len + // Read the entire PyUnicodeObject at once + char unicode_obj[SIZEOF_UNICODE_OBJ]; + int res = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_UNICODE_OBJ, + unicode_obj ); if (res < 0) { goto err; } + Py_ssize_t len = GET_MEMBER(Py_ssize_t, unicode_obj, unwinder->debug_offsets.unicode_object.length); + if (len < 0 || len > max_len) { + PyErr_Format(PyExc_RuntimeError, + "Invalid string length (%zd) at 0x%lx", len, address); + return NULL; + } + buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); return NULL; } - size_t offset = debug_offsets->unicode_object.asciiobject_size; - res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + size_t offset = unwinder->debug_offsets.unicode_object.asciiobject_size; + res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { goto err; } @@ -202,32 +334,40 @@ read_py_str( static PyObject * read_py_bytes( - proc_handle_t *handle, - _Py_DebugOffsets* debug_offsets, - uintptr_t address + RemoteUnwinderObject *unwinder, + uintptr_t address, + Py_ssize_t max_len ) { PyObject *result = NULL; char *buf = NULL; - Py_ssize_t len; - int res = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + debug_offsets->bytes_object.ob_size, - sizeof(Py_ssize_t), - &len + // Read the entire PyBytesObject at once + char bytes_obj[SIZEOF_BYTES_OBJ]; + int res = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_BYTES_OBJ, + bytes_obj ); if (res < 0) { goto err; } + Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size); + if (len < 0 || len > max_len) { + PyErr_Format(PyExc_RuntimeError, + "Invalid string length (%zd) at 0x%lx", len, address); + return NULL; + } + buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); return NULL; } - size_t offset = debug_offsets->bytes_object.ob_sval; - res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + size_t offset = unwinder->debug_offsets.bytes_object.ob_sval; + res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { goto err; } @@ -249,45 +389,60 @@ read_py_bytes( return NULL; } - - static long -read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address) +read_py_long( + RemoteUnwinderObject *unwinder, + uintptr_t address +) { unsigned int shift = PYLONG_BITS_IN_DIGIT; - Py_ssize_t size; - uintptr_t lv_tag; - - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, address + offsets->long_object.lv_tag, - sizeof(uintptr_t), - &lv_tag); + // Read the entire PyLongObject at once + char long_obj[SIZEOF_LONG_OBJ]; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + unwinder->debug_offsets.long_object.size, + long_obj); if (bytes_read < 0) { return -1; } + uintptr_t lv_tag = GET_MEMBER(uintptr_t, long_obj, unwinder->debug_offsets.long_object.lv_tag); int negative = (lv_tag & 3) == 2; - size = lv_tag >> 3; + Py_ssize_t size = lv_tag >> 3; if (size == 0) { return 0; } - digit *digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); - if (!digits) { - PyErr_NoMemory(); - return -1; - } + // If the long object has inline digits, use them directly + digit *digits; + if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) { + // For small integers, digits are inline in the long_value.ob_digit array + digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); + if (!digits) { + PyErr_NoMemory(); + return -1; + } + memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit)); + } else { + // For larger integers, we need to read the digits separately + digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); + if (!digits) { + PyErr_NoMemory(); + return -1; + } - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->long_object.ob_digit, - sizeof(digit) * size, - digits - ); - if (bytes_read < 0) { - goto error; + bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address + unwinder->debug_offsets.long_object.ob_digit, + sizeof(digit) * size, + digits + ); + if (bytes_read < 0) { + goto error; + } } long long value = 0; @@ -310,44 +465,115 @@ read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address return -1; } +/* ============================================================================ + * ASYNCIO DEBUG FUNCTIONS + * ============================================================================ */ + +// Get the PyAsyncioDebug section address for any platform +static uintptr_t +_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) +{ + uintptr_t address; + +#ifdef MS_WINDOWS + // On Windows, search for asyncio debug in executable or DLL + address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#elif defined(__linux__) + // On Linux, search for asyncio debug in executable or DLL + address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#elif defined(__APPLE__) && TARGET_OS_OSX + // On macOS, try libpython first, then fall back to python + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + if (address == 0) { + PyErr_Clear(); + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + } + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#else + Py_UNREACHABLE(); +#endif + + return address; +} + +static int +read_async_debug( + RemoteUnwinderObject *unwinder +) { + uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(&unwinder->handle); + if (!async_debug_addr) { + return -1; + } + + size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, async_debug_addr, size, &unwinder->async_debug_offsets); + return result; +} + +/* ============================================================================ + * ASYNCIO TASK PARSING FUNCTIONS + * ============================================================================ */ + static PyObject * parse_task_name( - proc_handle_t *handle, - _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, + RemoteUnwinderObject *unwinder, uintptr_t task_address ) { - uintptr_t task_name_addr; - int err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_name, - &task_name_addr); - if (err) { + // Read the entire TaskObj at once + char task_obj[SIZEOF_TASK_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj); + if (err < 0) { return NULL; } - // The task name can be a long or a string so we need to check the type + uintptr_t task_name_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name); + task_name_addr &= ~Py_TAG_BITS; - PyObject task_name_obj; - err = read_pyobj( - handle, + // The task name can be a long or a string so we need to check the type + char task_name_obj[SIZEOF_PYOBJECT]; + err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, task_name_addr, - &task_name_obj); - if (err) { + SIZEOF_PYOBJECT, + task_name_obj); + if (err < 0) { return NULL; } - unsigned long flags; - err = read_unsigned_long( - handle, - (uintptr_t)task_name_obj.ob_type + offsets->type_object.tp_flags, - &flags); - if (err) { + // Now read the type object to get the flags + char type_obj[SIZEOF_TYPE_OBJ]; + err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + GET_MEMBER(uintptr_t, task_name_obj, unwinder->debug_offsets.pyobject.ob_type), + SIZEOF_TYPE_OBJ, + type_obj); + if (err < 0) { return NULL; } - if ((flags & Py_TPFLAGS_LONG_SUBCLASS)) { - long res = read_py_long(handle, offsets, task_name_addr); + if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) { + long res = read_py_long(unwinder, task_name_addr); if (res == -1) { chain_exceptions(PyExc_RuntimeError, "Failed to get task name"); return NULL; @@ -355,355 +581,375 @@ parse_task_name( return PyUnicode_FromFormat("Task-%d", res); } - if(!(flags & Py_TPFLAGS_UNICODE_SUBCLASS)) { + if(!(GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_UNICODE_SUBCLASS)) { PyErr_SetString(PyExc_RuntimeError, "Invalid task name object"); return NULL; } return read_py_str( - handle, - offsets, + unwinder, task_name_addr, 255 ); } -static int -parse_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame -); - -static int -parse_coro_chain( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t coro_address, - PyObject *render_to +static int parse_task_awaited_by( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *awaited_by, + int recurse_task ) { - assert((void*)coro_address != NULL); - - uintptr_t gen_type_addr; - int err = read_ptr( - handle, - coro_address + offsets->pyobject.ob_type, - &gen_type_addr); - if (err) { + // Read the entire TaskObj at once + char task_obj[SIZEOF_TASK_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj) < 0) { return -1; } - PyObject* name = NULL; - uintptr_t prev_frame; - if (parse_frame_object( - handle, - &name, - offsets, - coro_address + offsets->gen_object.gi_iframe, - &prev_frame) - < 0) - { - return -1; - } + uintptr_t task_ab_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by); + task_ab_addr &= ~Py_TAG_BITS; - if (PyList_Append(render_to, name)) { - Py_DECREF(name); - return -1; + if ((void*)task_ab_addr == NULL) { + return 0; } - Py_DECREF(name); - int8_t gi_frame_state; - err = read_sized_int( - handle, - coro_address + offsets->gen_object.gi_frame_state, - &gi_frame_state, - sizeof(int8_t) - ); - if (err) { - return -1; - } + char awaited_by_is_a_set = GET_MEMBER(char, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by_is_set); - if (gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) { - char owner; - err = read_char( - handle, - coro_address + offsets->gen_object.gi_iframe + - offsets->interpreter_frame.owner, - &owner - ); - if (err) { + if (awaited_by_is_a_set) { + if (parse_tasks_in_set(unwinder, task_ab_addr, awaited_by, recurse_task)) { return -1; } - if (owner != FRAME_OWNED_BY_GENERATOR) { - PyErr_SetString( - PyExc_RuntimeError, - "generator doesn't own its frame \\_o_/"); + } else { + if (parse_task(unwinder, task_ab_addr, awaited_by, recurse_task)) { return -1; } + } + + return 0; +} + +static int +handle_yield_from_frame( + RemoteUnwinderObject *unwinder, + uintptr_t gi_iframe_addr, + uintptr_t gen_type_addr, + PyObject *render_to +) { + // Read the entire interpreter frame at once + char iframe[SIZEOF_INTERP_FRAME]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + gi_iframe_addr, + SIZEOF_INTERP_FRAME, + iframe); + if (err < 0) { + return -1; + } + + if (GET_MEMBER(char, iframe, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR) { + PyErr_SetString( + PyExc_RuntimeError, + "generator doesn't own its frame \\_o_/"); + return -1; + } + + uintptr_t stackpointer_addr = GET_MEMBER(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer); + stackpointer_addr &= ~Py_TAG_BITS; - uintptr_t stackpointer_addr; + if ((void*)stackpointer_addr != NULL) { + uintptr_t gi_await_addr; err = read_py_ptr( - handle, - coro_address + offsets->gen_object.gi_iframe + - offsets->interpreter_frame.stackpointer, - &stackpointer_addr); + &unwinder->handle, + stackpointer_addr - sizeof(void*), + &gi_await_addr); if (err) { return -1; } - if ((void*)stackpointer_addr != NULL) { - uintptr_t gi_await_addr; - err = read_py_ptr( - handle, - stackpointer_addr - sizeof(void*), - &gi_await_addr); + if ((void*)gi_await_addr != NULL) { + uintptr_t gi_await_addr_type_addr; + err = read_ptr( + &unwinder->handle, + gi_await_addr + unwinder->debug_offsets.pyobject.ob_type, + &gi_await_addr_type_addr); if (err) { return -1; } - if ((void*)gi_await_addr != NULL) { - uintptr_t gi_await_addr_type_addr; - int err = read_ptr( - handle, - gi_await_addr + offsets->pyobject.ob_type, - &gi_await_addr_type_addr); + if (gen_type_addr == gi_await_addr_type_addr) { + /* This needs an explanation. We always start with parsing + native coroutine / generator frames. Ultimately they + are awaiting on something. That something can be + a native coroutine frame or... an iterator. + If it's the latter -- we can't continue building + our chain. So the condition to bail out of this is + to do that when the type of the current coroutine + doesn't match the type of whatever it points to + in its cr_await. + */ + err = parse_coro_chain(unwinder, gi_await_addr, render_to); if (err) { return -1; } - - if (gen_type_addr == gi_await_addr_type_addr) { - /* This needs an explanation. We always start with parsing - native coroutine / generator frames. Ultimately they - are awaiting on something. That something can be - a native coroutine frame or... an iterator. - If it's the latter -- we can't continue building - our chain. So the condition to bail out of this is - to do that when the type of the current coroutine - doesn't match the type of whatever it points to - in its cr_await. - */ - err = parse_coro_chain( - handle, - offsets, - async_offsets, - gi_await_addr, - render_to - ); - if (err) { - return -1; - } - } } } - } return 0; } - static int -parse_task_awaited_by( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address, - PyObject *awaited_by, - int recurse_task -); +parse_coro_chain( + RemoteUnwinderObject *unwinder, + uintptr_t coro_address, + PyObject *render_to +) { + assert((void*)coro_address != NULL); + + // Read the entire generator object at once + char gen_object[SIZEOF_GEN_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + coro_address, + SIZEOF_GEN_OBJ, + gen_object); + if (err < 0) { + return -1; + } + uintptr_t gen_type_addr = GET_MEMBER(uintptr_t, gen_object, unwinder->debug_offsets.pyobject.ob_type); -static int -parse_task( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, + PyObject* name = NULL; + + // Parse the previous frame using the gi_iframe from local copy + uintptr_t prev_frame; + uintptr_t gi_iframe_addr = coro_address + unwinder->debug_offsets.gen_object.gi_iframe; + if (parse_frame_object(unwinder, &name, gi_iframe_addr, &prev_frame) < 0) { + return -1; + } + + if (PyList_Append(render_to, name)) { + Py_DECREF(name); + return -1; + } + Py_DECREF(name); + + if (GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state) == FRAME_SUSPENDED_YIELD_FROM) { + return handle_yield_from_frame(unwinder, gi_iframe_addr, gen_type_addr, render_to); + } + + return 0; +} + +static PyObject* +create_task_result( + RemoteUnwinderObject *unwinder, uintptr_t task_address, - PyObject *render_to, int recurse_task ) { - char is_task; - int err = read_char( - handle, - task_address + async_offsets->asyncio_task_object.task_is_task, - &is_task); - if (err) { - return -1; - } + PyObject* result = NULL; + PyObject *call_stack = NULL; + PyObject *tn = NULL; + char task_obj[SIZEOF_TASK_OBJ]; + uintptr_t coro_addr; - PyObject* result = PyList_New(0); + result = PyList_New(0); if (result == NULL) { - return -1; + goto error; } - PyObject *call_stack = PyList_New(0); + call_stack = PyList_New(0); if (call_stack == NULL) { - goto err; + goto error; } + if (PyList_Append(result, call_stack)) { - Py_DECREF(call_stack); - goto err; + goto error; } - /* we can operate on a borrowed one to simplify cleanup */ - Py_DECREF(call_stack); + Py_CLEAR(call_stack); - if (is_task) { - PyObject *tn = NULL; - if (recurse_task) { - tn = parse_task_name( - handle, offsets, async_offsets, task_address); - } else { - tn = PyLong_FromUnsignedLongLong(task_address); + if (recurse_task) { + tn = parse_task_name(unwinder, task_address); + } else { + tn = PyLong_FromUnsignedLongLong(task_address); + } + if (tn == NULL) { + goto error; + } + + if (PyList_Append(result, tn)) { + goto error; + } + Py_CLEAR(tn); + + // Parse coroutine chain + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj) < 0) { + goto error; + } + + coro_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro); + coro_addr &= ~Py_TAG_BITS; + + if ((void*)coro_addr != NULL) { + call_stack = PyList_New(0); + if (call_stack == NULL) { + goto error; } - if (tn == NULL) { - goto err; + + if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) { + Py_DECREF(call_stack); + goto error; } - if (PyList_Append(result, tn)) { - Py_DECREF(tn); - goto err; + + if (PyList_Reverse(call_stack)) { + Py_DECREF(call_stack); + goto error; } - Py_DECREF(tn); - uintptr_t coro_addr; - err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_coro, - &coro_addr); - if (err) { - goto err; + if (PyList_SetItem(result, 0, call_stack) < 0) { + Py_DECREF(call_stack); + goto error; } + } - if ((void*)coro_addr != NULL) { - err = parse_coro_chain( - handle, - offsets, - async_offsets, - coro_addr, - call_stack - ); - if (err) { - goto err; - } + return result; - if (PyList_Reverse(call_stack)) { - goto err; - } +error: + Py_XDECREF(result); + Py_XDECREF(call_stack); + Py_XDECREF(tn); + return NULL; +} + +static int +parse_task( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *render_to, + int recurse_task +) { + char is_task; + PyObject* result = NULL; + PyObject* awaited_by = NULL; + int err; + + err = read_char( + &unwinder->handle, + task_address + unwinder->async_debug_offsets.asyncio_task_object.task_is_task, + &is_task); + if (err) { + goto error; + } + + if (is_task) { + result = create_task_result(unwinder, task_address, recurse_task); + if (!result) { + goto error; + } + } else { + result = PyList_New(0); + if (result == NULL) { + goto error; } } if (PyList_Append(render_to, result)) { - goto err; + goto error; } if (recurse_task) { - PyObject *awaited_by = PyList_New(0); + awaited_by = PyList_New(0); if (awaited_by == NULL) { - goto err; + goto error; } + if (PyList_Append(result, awaited_by)) { - Py_DECREF(awaited_by); - goto err; + goto error; } - /* we can operate on a borrowed one to simplify cleanup */ Py_DECREF(awaited_by); - if (parse_task_awaited_by(handle, offsets, async_offsets, - task_address, awaited_by, 1) - ) { - goto err; + /* awaited_by is borrowed from 'result' to simplify cleanup */ + if (parse_task_awaited_by(unwinder, task_address, awaited_by, 1) < 0) { + // Clear the pointer so the cleanup doesn't try to decref it since + // it's borrowed from 'result' and will be decrefed when result is + // deleted. + awaited_by = NULL; + goto error; } } Py_DECREF(result); return 0; -err: - Py_DECREF(result); +error: + Py_XDECREF(result); + Py_XDECREF(awaited_by); return -1; } static int -parse_tasks_in_set( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t set_addr, +process_set_entry( + RemoteUnwinderObject *unwinder, + uintptr_t table_ptr, PyObject *awaited_by, int recurse_task ) { - uintptr_t set_obj; - if (read_py_ptr( - handle, - set_addr, - &set_obj) - ) { + uintptr_t key_addr; + if (read_py_ptr(&unwinder->handle, table_ptr, &key_addr)) { return -1; } - Py_ssize_t num_els; - if (read_Py_ssize_t( - handle, - set_obj + offsets->set_object.used, - &num_els) - ) { - return -1; - } + if ((void*)key_addr != NULL) { + Py_ssize_t ref_cnt; + if (read_Py_ssize_t(&unwinder->handle, table_ptr, &ref_cnt)) { + return -1; + } - Py_ssize_t set_len; - if (read_Py_ssize_t( - handle, - set_obj + offsets->set_object.mask, - &set_len) - ) { - return -1; + if (ref_cnt) { + // if 'ref_cnt=0' it's a set dummy marker + if (parse_task(unwinder, key_addr, awaited_by, recurse_task)) { + return -1; + } + return 1; // Successfully processed a valid entry + } } - set_len++; // The set contains the `mask+1` element slots. + return 0; // Entry was NULL or dummy marker +} - uintptr_t table_ptr; - if (read_ptr( - handle, - set_obj + offsets->set_object.table, - &table_ptr) - ) { +static int +parse_tasks_in_set( + RemoteUnwinderObject *unwinder, + uintptr_t set_addr, + PyObject *awaited_by, + int recurse_task +) { + char set_object[SIZEOF_SET_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + set_addr, + SIZEOF_SET_OBJ, + set_object); + if (err < 0) { return -1; } + Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used); + Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1; // The set contains the `mask+1` element slots. + uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table); + Py_ssize_t i = 0; Py_ssize_t els = 0; - while (i < set_len) { - uintptr_t key_addr; - if (read_py_ptr(handle, table_ptr, &key_addr)) { + while (i < set_len && els < num_els) { + int result = process_set_entry(unwinder, table_ptr, awaited_by, recurse_task); + + if (result < 0) { return -1; } - - if ((void*)key_addr != NULL) { - Py_ssize_t ref_cnt; - if (read_Py_ssize_t(handle, table_ptr, &ref_cnt)) { - return -1; - } - - if (ref_cnt) { - // if 'ref_cnt=0' it's a set dummy marker - - if (parse_task( - handle, - offsets, - async_offsets, - key_addr, - awaited_by, - recurse_task - ) - ) { - return -1; - } - - if (++els == num_els) { - break; - } - } + if (result > 0) { + els++; } table_ptr += sizeof(void*) * 2; @@ -714,81 +960,224 @@ parse_tasks_in_set( static int -parse_task_awaited_by( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address, - PyObject *awaited_by, - int recurse_task +setup_async_result_structure(PyObject **result, PyObject **calls) +{ + *result = PyList_New(1); + if (*result == NULL) { + return -1; + } + + *calls = PyList_New(0); + if (*calls == NULL) { + Py_DECREF(*result); + *result = NULL; + return -1; + } + + if (PyList_SetItem(*result, 0, *calls)) { /* steals ref to 'calls' */ + Py_DECREF(*calls); + Py_DECREF(*result); + *result = NULL; + *calls = NULL; + return -1; + } + + return 0; +} + +static int +add_task_info_to_result( + RemoteUnwinderObject *self, + PyObject *result, + uintptr_t running_task_addr ) { - uintptr_t task_ab_addr; - int err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - &task_ab_addr); - if (err) { + PyObject *tn = parse_task_name(self, running_task_addr); + if (tn == NULL) { return -1; } - if ((void*)task_ab_addr == NULL) { - return 0; + if (PyList_Append(result, tn)) { + Py_DECREF(tn); + return -1; } + Py_DECREF(tn); - char awaited_by_is_a_set; - err = read_char( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by_is_set, - &awaited_by_is_a_set); - if (err) { + PyObject* awaited_by = PyList_New(0); + if (awaited_by == NULL) { return -1; } - if (awaited_by_is_a_set) { - if (parse_tasks_in_set( - handle, - offsets, - async_offsets, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - awaited_by, - recurse_task - ) - ) { - return -1; - } - } else { - uintptr_t sub_task; - if (read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - &sub_task) - ) { - return -1; - } + if (PyList_Append(result, awaited_by)) { + Py_DECREF(awaited_by); + return -1; + } + Py_DECREF(awaited_by); - if (parse_task( - handle, - offsets, - async_offsets, - sub_task, - awaited_by, - recurse_task - ) - ) { - return -1; - } + if (parse_task_awaited_by( + self, running_task_addr, awaited_by, 1) < 0) { + return -1; } return 0; } -typedef struct +static int +process_single_task_node( + RemoteUnwinderObject *unwinder, + uintptr_t task_addr, + PyObject *result +) { + PyObject *tn = NULL; + PyObject *current_awaited_by = NULL; + PyObject *task_id = NULL; + PyObject *result_item = NULL; + + tn = parse_task_name(unwinder, task_addr); + if (tn == NULL) { + goto error; + } + + current_awaited_by = PyList_New(0); + if (current_awaited_by == NULL) { + goto error; + } + + task_id = PyLong_FromUnsignedLongLong(task_addr); + if (task_id == NULL) { + goto error; + } + + result_item = PyTuple_New(3); + if (result_item == NULL) { + goto error; + } + + PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref + PyTuple_SET_ITEM(result_item, 1, tn); // steals ref + PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref + + // References transferred to tuple + task_id = NULL; + tn = NULL; + current_awaited_by = NULL; + + if (PyList_Append(result, result_item)) { + Py_DECREF(result_item); + return -1; + } + Py_DECREF(result_item); + + // Get back current_awaited_by reference for parse_task_awaited_by + current_awaited_by = PyTuple_GET_ITEM(result_item, 2); + if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by, 0) < 0) { + return -1; + } + + return 0; + +error: + Py_XDECREF(tn); + Py_XDECREF(current_awaited_by); + Py_XDECREF(task_id); + Py_XDECREF(result_item); + return -1; +} + +/* ============================================================================ + * TLBC CACHING FUNCTIONS + * ============================================================================ */ + +#ifdef Py_GIL_DISABLED + +typedef struct { + void *tlbc_array; // Local copy of the TLBC array + Py_ssize_t tlbc_array_size; // Size of the TLBC array + uint32_t generation; // Generation when this was cached +} TLBCCacheEntry; + +static void +tlbc_cache_entry_destroy(void *ptr) { - int lineno; - int end_lineno; - int column; - int end_column; -} LocationInfo; + TLBCCacheEntry *entry = (TLBCCacheEntry *)ptr; + if (entry->tlbc_array) { + PyMem_RawFree(entry->tlbc_array); + } + PyMem_RawFree(entry); +} + +static TLBCCacheEntry * +get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation) +{ + void *key = (void *)code_addr; + TLBCCacheEntry *entry = _Py_hashtable_get(self->tlbc_cache, key); + + if (entry && entry->generation != current_generation) { + // Entry is stale, remove it by setting to NULL + _Py_hashtable_set(self->tlbc_cache, key, NULL); + entry = NULL; + } + + return entry; +} + +static int +cache_tlbc_array(RemoteUnwinderObject *self, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation) +{ + uintptr_t tlbc_array_ptr; + void *tlbc_array = NULL; + TLBCCacheEntry *entry = NULL; + + // Read the TLBC array pointer + if (read_ptr(&self->handle, tlbc_array_addr, &tlbc_array_ptr) != 0 || tlbc_array_ptr == 0) { + return 0; // No TLBC array + } + + // Read the TLBC array size + Py_ssize_t tlbc_size; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&self->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0 || tlbc_size <= 0) { + return 0; // Invalid size + } + + // Allocate and read the entire TLBC array + size_t array_data_size = tlbc_size * sizeof(void*); + tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size); + if (!tlbc_array) { + return -1; // Memory error + } + + if (_Py_RemoteDebug_PagedReadRemoteMemory(&self->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) { + PyMem_RawFree(tlbc_array); + return 0; // Read error + } + + // Create cache entry + entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry)); + if (!entry) { + PyMem_RawFree(tlbc_array); + return -1; // Memory error + } + + entry->tlbc_array = tlbc_array; + entry->tlbc_array_size = tlbc_size; + entry->generation = generation; + + // Store in cache + void *key = (void *)code_addr; + if (_Py_hashtable_set(self->tlbc_cache, key, entry) < 0) { + tlbc_cache_entry_destroy(entry); + return -1; // Cache error + } + + return 1; // Success +} + + + +#endif + +/* ============================================================================ + * LINE TABLE PARSING FUNCTIONS + * ============================================================================ */ static int scan_varint(const uint8_t **ptr) @@ -818,7 +1207,6 @@ scan_signed_varint(const uint8_t **ptr) } } - static bool parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info) { @@ -863,7 +1251,9 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L } default: { uint8_t second_byte = *(ptr++); - assert((second_byte & 128) == 0); + if ((second_byte & 128) != 0) { + return false; + } info->column = code << 3 | (second_byte >> 4); info->end_column = info->column + (second_byte & 15); break; @@ -877,240 +1267,387 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L return false; } +/* ============================================================================ + * CODE OBJECT AND FRAME PARSING FUNCTIONS + * ============================================================================ */ + static int -read_remote_pointer(proc_handle_t *handle, uintptr_t address, uintptr_t *out_ptr, const char *error_message) +parse_code_object(RemoteUnwinderObject *unwinder, + PyObject **result, + uintptr_t address, + uintptr_t instruction_pointer, + uintptr_t *previous_frame, + int32_t tlbc_index) { - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void *), out_ptr); - if (bytes_read < 0) { - return -1; + void *key = (void *)address; + CachedCodeMetadata *meta = NULL; + PyObject *func = NULL; + PyObject *file = NULL; + PyObject *linetable = NULL; + PyObject *lineno = NULL; + PyObject *tuple = NULL; + +#ifdef Py_GIL_DISABLED + // In free threading builds, code object addresses might have the low bit set + // as a flag, so we need to mask it off to get the real address + uintptr_t real_address = address & (~1); +#else + uintptr_t real_address = address; +#endif + + if (unwinder && unwinder->code_object_cache != NULL) { + meta = _Py_hashtable_get(unwinder->code_object_cache, key); } - if ((void *)(*out_ptr) == NULL) { - PyErr_SetString(PyExc_RuntimeError, error_message); - return -1; + if (meta == NULL) { + char code_object[SIZEOF_CODE_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0) + { + goto error; + } + + func = read_py_str(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024); + if (!func) { + goto error; + } + + file = read_py_str(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024); + if (!file) { + goto error; + } + + linetable = read_py_bytes(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096); + if (!linetable) { + goto error; + } + + meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata)); + if (!meta) { + goto error; + } + + meta->func_name = func; + meta->file_name = file; + meta->linetable = linetable; + meta->first_lineno = GET_MEMBER(int, code_object, unwinder->debug_offsets.code_object.firstlineno); + meta->addr_code_adaptive = real_address + unwinder->debug_offsets.code_object.co_code_adaptive; + + if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) { + cached_code_metadata_destroy(meta); + goto error; + } + + // Ownership transferred to meta + func = NULL; + file = NULL; + linetable = NULL; + } + + uintptr_t ip = instruction_pointer; + ptrdiff_t addrq; + +#ifdef Py_GIL_DISABLED + // Handle thread-local bytecode (TLBC) in free threading builds + if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) { + // No TLBC or no unwinder - use main bytecode directly + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; + goto done_tlbc; + } + + // Try to get TLBC data from cache (we'll get generation from the caller) + TLBCCacheEntry *tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); + + if (!tlbc_entry) { + // Cache miss - try to read and cache TLBC array + if (cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation) > 0) { + tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); + } + } + + if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) { + // Use cached TLBC data + uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t)); + uintptr_t tlbc_bytecode_addr = entries[tlbc_index]; + + if (tlbc_bytecode_addr != 0) { + // Calculate offset from TLBC bytecode + addrq = (uint16_t *)ip - (uint16_t *)tlbc_bytecode_addr; + goto done_tlbc; + } + } + + // Fall back to main bytecode + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; + +done_tlbc: +#else + // Non-free-threaded build, always use the main bytecode + (void)tlbc_index; // Suppress unused parameter warning + (void)unwinder; // Suppress unused parameter warning + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; +#endif + ; // Empty statement to avoid C23 extension warning + LocationInfo info = {0}; + bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable), + meta->first_lineno, &info); + if (!ok) { + info.lineno = -1; + } + + lineno = PyLong_FromLong(info.lineno); + if (!lineno) { + goto error; + } + + tuple = PyTuple_New(3); + if (!tuple) { + goto error; } + Py_INCREF(meta->func_name); + Py_INCREF(meta->file_name); + PyTuple_SET_ITEM(tuple, 0, meta->func_name); + PyTuple_SET_ITEM(tuple, 1, meta->file_name); + PyTuple_SET_ITEM(tuple, 2, lineno); + + *result = tuple; return 0; + +error: + Py_XDECREF(func); + Py_XDECREF(file); + Py_XDECREF(linetable); + Py_XDECREF(lineno); + Py_XDECREF(tuple); + return -1; } -static int -read_instruction_ptr(proc_handle_t *handle, struct _Py_DebugOffsets *offsets, - uintptr_t current_frame, uintptr_t *instruction_ptr) +/* ============================================================================ + * STACK CHUNK MANAGEMENT FUNCTIONS + * ============================================================================ */ + +static void +cleanup_stack_chunks(StackChunkList *chunks) { - return read_remote_pointer( - handle, - current_frame + offsets->interpreter_frame.instr_ptr, - instruction_ptr, - "No instruction ptr found" - ); + for (size_t i = 0; i < chunks->count; ++i) { + PyMem_RawFree(chunks->chunks[i].local_copy); + } + PyMem_RawFree(chunks->chunks); } static int -parse_code_object(proc_handle_t *handle, - PyObject **result, - struct _Py_DebugOffsets *offsets, - uintptr_t address, - uintptr_t current_frame, - uintptr_t *previous_frame) -{ - uintptr_t addr_func_name, addr_file_name, addr_linetable, instruction_ptr; +process_single_stack_chunk( + proc_handle_t *handle, + uintptr_t chunk_addr, + StackChunkInfo *chunk_info +) { + // Start with default size assumption + size_t current_size = _PY_DATA_STACK_CHUNK_SIZE; - if (read_remote_pointer(handle, address + offsets->code_object.qualname, &addr_func_name, "No function name found") < 0 || - read_remote_pointer(handle, address + offsets->code_object.filename, &addr_file_name, "No file name found") < 0 || - read_remote_pointer(handle, address + offsets->code_object.linetable, &addr_linetable, "No linetable found") < 0 || - read_instruction_ptr(handle, offsets, current_frame, &instruction_ptr) < 0) { + char *this_chunk = PyMem_RawMalloc(current_size); + if (!this_chunk) { + PyErr_NoMemory(); return -1; } - int firstlineno; - if (_Py_RemoteDebug_ReadRemoteMemory(handle, - address + offsets->code_object.firstlineno, - sizeof(int), - &firstlineno) < 0) { + if (_Py_RemoteDebug_PagedReadRemoteMemory(handle, chunk_addr, current_size, this_chunk) < 0) { + PyMem_RawFree(this_chunk); return -1; } - PyObject *py_linetable = read_py_bytes(handle, offsets, addr_linetable); - if (!py_linetable) { - return -1; + // Check actual size and reread if necessary + size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size)); + if (actual_size != current_size) { + this_chunk = PyMem_RawRealloc(this_chunk, actual_size); + if (!this_chunk) { + PyErr_NoMemory(); + return -1; + } + + if (_Py_RemoteDebug_PagedReadRemoteMemory(handle, chunk_addr, actual_size, this_chunk) < 0) { + PyMem_RawFree(this_chunk); + return -1; + } + current_size = actual_size; } - uintptr_t addr_code_adaptive = address + offsets->code_object.co_code_adaptive; - ptrdiff_t addrq = (uint16_t *)instruction_ptr - (uint16_t *)addr_code_adaptive; + chunk_info->remote_addr = chunk_addr; + chunk_info->size = current_size; + chunk_info->local_copy = this_chunk; + return 0; +} - LocationInfo info; - parse_linetable(addrq, PyBytes_AS_STRING(py_linetable), firstlineno, &info); - Py_DECREF(py_linetable); // Done with linetable +static int +copy_stack_chunks(RemoteUnwinderObject *unwinder, + uintptr_t tstate_addr, + StackChunkList *out_chunks) +{ + uintptr_t chunk_addr; + StackChunkInfo *chunks = NULL; + size_t count = 0; + size_t max_chunks = 16; - PyObject *py_line = PyLong_FromLong(info.lineno); - if (!py_line) { + if (read_ptr(&unwinder->handle, tstate_addr + unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) { return -1; } - PyObject *py_func_name = read_py_str(handle, offsets, addr_func_name, 256); - if (!py_func_name) { - Py_DECREF(py_line); + chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo)); + if (!chunks) { + PyErr_NoMemory(); return -1; } - PyObject *py_file_name = read_py_str(handle, offsets, addr_file_name, 256); - if (!py_file_name) { - Py_DECREF(py_line); - Py_DECREF(py_func_name); - return -1; + while (chunk_addr != 0) { + // Grow array if needed + if (count >= max_chunks) { + max_chunks *= 2; + StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo)); + if (!new_chunks) { + PyErr_NoMemory(); + goto error; + } + chunks = new_chunks; + } + + // Process this chunk + if (process_single_stack_chunk(&unwinder->handle, chunk_addr, &chunks[count]) < 0) { + goto error; + } + + // Get next chunk address and increment count + chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous)); + count++; } - PyObject *result_tuple = PyTuple_New(3); - if (!result_tuple) { - Py_DECREF(py_line); - Py_DECREF(py_func_name); - Py_DECREF(py_file_name); - return -1; + out_chunks->chunks = chunks; + out_chunks->count = count; + return 0; + +error: + for (size_t i = 0; i < count; ++i) { + PyMem_RawFree(chunks[i].local_copy); } + PyMem_RawFree(chunks); + return -1; +} - PyTuple_SET_ITEM(result_tuple, 0, py_func_name); // steals ref - PyTuple_SET_ITEM(result_tuple, 1, py_file_name); // steals ref - PyTuple_SET_ITEM(result_tuple, 2, py_line); // steals ref +static void * +find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr) +{ + for (size_t i = 0; i < chunks->count; ++i) { + uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data); + size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data); - *result = result_tuple; - return 0; + if (remote_ptr >= base && remote_ptr < base + payload) { + return (char *)chunks->chunks[i].local_copy + (remote_ptr - chunks->chunks[i].remote_addr); + } + } + return NULL; } static int -parse_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, +parse_frame_from_chunks( + RemoteUnwinderObject *unwinder, + PyObject **result, uintptr_t address, - uintptr_t* previous_frame + uintptr_t *previous_frame, + StackChunkList *chunks ) { - int err; - - Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->interpreter_frame.previous, - sizeof(void*), - previous_frame - ); - if (bytes_read < 0) { + void *frame_ptr = find_frame_in_chunks(chunks, address); + if (!frame_ptr) { return -1; } - char owner; - if (read_char(handle, address + offsets->interpreter_frame.owner, &owner)) { - return -1; - } + char *frame = (char *)frame_ptr; + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); - if (owner >= FRAME_OWNED_BY_INTERPRETER) { + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER || + !GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable)) { return 0; } - uintptr_t address_of_code_object; - err = read_py_ptr( - handle, - address + offsets->interpreter_frame.executable, - &address_of_code_object - ); - if (err) { - return -1; - } + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); - if ((void*)address_of_code_object == NULL) { - return 0; + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); } +#endif return parse_code_object( - handle, result, offsets, address_of_code_object, address, previous_frame); + unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable), + instruction_pointer, previous_frame, tlbc_index); } +/* ============================================================================ + * INTERPRETER STATE AND THREAD DISCOVERY FUNCTIONS + * ============================================================================ */ + static int -parse_async_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame, - uintptr_t* code_object +populate_initial_state_data( + int all_threads, + RemoteUnwinderObject *unwinder, + uintptr_t runtime_start_address, + uintptr_t *interpreter_state, + uintptr_t *tstate ) { - int err; - - Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->interpreter_frame.previous, - sizeof(void*), - previous_frame - ); - if (bytes_read < 0) { - return -1; - } + uint64_t interpreter_state_list_head = + unwinder->debug_offsets.runtime_state.interpreters_head; - char owner; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, address + offsets->interpreter_frame.owner, sizeof(char), &owner); + uintptr_t address_of_interpreter_state; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + runtime_start_address + interpreter_state_list_head, + sizeof(void*), + &address_of_interpreter_state); if (bytes_read < 0) { return -1; } - if (owner == FRAME_OWNED_BY_CSTACK || owner == FRAME_OWNED_BY_INTERPRETER) { - return 0; // C frame - } - - if (owner != FRAME_OWNED_BY_GENERATOR - && owner != FRAME_OWNED_BY_THREAD) { - PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner); + if (address_of_interpreter_state == 0) { + PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); return -1; } - err = read_py_ptr( - handle, - address + offsets->interpreter_frame.executable, - code_object - ); - if (err) { - return -1; - } + *interpreter_state = address_of_interpreter_state; - assert(code_object != NULL); - if ((void*)*code_object == NULL) { + if (all_threads) { + *tstate = 0; return 0; } - if (parse_code_object( - handle, result, offsets, *code_object, address, previous_frame)) { - return -1; - } - - return 1; -} + uintptr_t address_of_thread = address_of_interpreter_state + + unwinder->debug_offsets.interpreter_state.threads_main; -static int -read_async_debug( - proc_handle_t *handle, - struct _Py_AsyncioModuleDebugOffsets* async_debug -) { - uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(handle); - if (!async_debug_addr) { + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address_of_thread, + sizeof(void*), + tstate) < 0) { return -1; } - size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, async_debug_addr, size, async_debug); - return result; + return 0; } static int find_running_frame( - proc_handle_t *handle, + RemoteUnwinderObject *unwinder, uintptr_t runtime_start_address, - _Py_DebugOffsets* local_debug_offsets, uintptr_t *frame ) { uint64_t interpreter_state_list_head = - local_debug_offsets->runtime_state.interpreters_head; + unwinder->debug_offsets.runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); @@ -1124,10 +1661,10 @@ find_running_frame( } uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, + bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, address_of_interpreter_state + - local_debug_offsets->interpreter_state.threads_main, + unwinder->debug_offsets.interpreter_state.threads_main, sizeof(void*), &address_of_thread); if (bytes_read < 0) { @@ -1137,8 +1674,8 @@ find_running_frame( // No Python frames are available for us (can happen at tear-down). if ((void*)address_of_thread != NULL) { int err = read_ptr( - handle, - address_of_thread + local_debug_offsets->thread_state.current_frame, + &unwinder->handle, + address_of_thread + unwinder->debug_offsets.thread_state.current_frame, frame); if (err) { return -1; @@ -1152,21 +1689,18 @@ find_running_frame( static int find_running_task( - proc_handle_t *handle, - uintptr_t runtime_start_address, - _Py_DebugOffsets *local_debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, + RemoteUnwinderObject *unwinder, uintptr_t *running_task_addr ) { *running_task_addr = (uintptr_t)NULL; uint64_t interpreter_state_list_head = - local_debug_offsets->runtime_state.interpreters_head; + unwinder->debug_offsets.runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - runtime_start_address + interpreter_state_list_head, + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + unwinder->runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); if (bytes_read < 0) { @@ -1179,10 +1713,10 @@ find_running_task( } uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, + bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, address_of_interpreter_state + - local_debug_offsets->interpreter_state.threads_head, + unwinder->debug_offsets.interpreter_state.threads_head, sizeof(void*), &address_of_thread); if (bytes_read < 0) { @@ -1196,9 +1730,9 @@ find_running_task( } bytes_read = read_py_ptr( - handle, + &unwinder->handle, address_of_thread - + async_offsets->asyncio_thread_state.asyncio_running_loop, + + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop, &address_of_running_loop); if (bytes_read == -1) { return -1; @@ -1210,9 +1744,9 @@ find_running_task( } int err = read_ptr( - handle, + &unwinder->handle, address_of_thread - + async_offsets->asyncio_thread_state.asyncio_running_task, + + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task, running_task_addr); if (err) { return -1; @@ -1222,579 +1756,936 @@ find_running_task( } static int -append_awaited_by_for_thread( - proc_handle_t *handle, - uintptr_t head_addr, - struct _Py_DebugOffsets *debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, - PyObject *result +find_running_task_and_coro( + RemoteUnwinderObject *self, + uintptr_t *running_task_addr, + uintptr_t *running_coro_addr, + uintptr_t *running_task_code_obj ) { - struct llist_node task_node; - - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - head_addr, - sizeof(task_node), - &task_node)) - { + *running_task_addr = (uintptr_t)NULL; + if (find_running_task( + self, running_task_addr) < 0) { + chain_exceptions(PyExc_RuntimeError, "Failed to find running task"); return -1; } - size_t iteration_count = 0; - const size_t MAX_ITERATIONS = 2 << 15; // A reasonable upper bound - while ((uintptr_t)task_node.next != head_addr) { - if (++iteration_count > MAX_ITERATIONS) { - PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted"); - return -1; - } - - if (task_node.next == NULL) { - PyErr_SetString( - PyExc_RuntimeError, - "Invalid linked list structure reading remote memory"); - return -1; - } - - uintptr_t task_addr = (uintptr_t)task_node.next - - async_offsets->asyncio_task_object.task_node; - - PyObject *tn = parse_task_name( - handle, - debug_offsets, - async_offsets, - task_addr); - if (tn == NULL) { - return -1; - } - - PyObject *current_awaited_by = PyList_New(0); - if (current_awaited_by == NULL) { - Py_DECREF(tn); - return -1; - } - - PyObject* task_id = PyLong_FromUnsignedLongLong(task_addr); - if (task_id == NULL) { - Py_DECREF(tn); - Py_DECREF(current_awaited_by); - return -1; - } - - PyObject *result_item = PyTuple_New(3); - if (result_item == NULL) { - Py_DECREF(tn); - Py_DECREF(current_awaited_by); - Py_DECREF(task_id); - return -1; - } - - PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref - PyTuple_SET_ITEM(result_item, 1, tn); // steals ref - PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref - if (PyList_Append(result, result_item)) { - Py_DECREF(result_item); - return -1; - } - Py_DECREF(result_item); - - if (parse_task_awaited_by(handle, debug_offsets, async_offsets, - task_addr, current_awaited_by, 0)) - { - return -1; - } - - // onto the next one... - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - (uintptr_t)task_node.next, - sizeof(task_node), - &task_node)) - { - return -1; - } - } - - return 0; -} - -static int -append_awaited_by( - proc_handle_t *handle, - unsigned long tid, - uintptr_t head_addr, - struct _Py_DebugOffsets *debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, - PyObject *result) -{ - PyObject *tid_py = PyLong_FromUnsignedLong(tid); - if (tid_py == NULL) { + if ((void*)*running_task_addr == NULL) { + PyErr_SetString(PyExc_RuntimeError, "No running task found"); return -1; } - PyObject *result_item = PyTuple_New(2); - if (result_item == NULL) { - Py_DECREF(tid_py); + if (read_py_ptr( + &self->handle, + *running_task_addr + self->async_debug_offsets.asyncio_task_object.task_coro, + running_coro_addr) < 0) { + chain_exceptions(PyExc_RuntimeError, "Failed to read running task coro"); return -1; } - PyObject* awaited_by_for_thread = PyList_New(0); - if (awaited_by_for_thread == NULL) { - Py_DECREF(tid_py); - Py_DECREF(result_item); + if ((void*)*running_coro_addr == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); return -1; } - PyTuple_SET_ITEM(result_item, 0, tid_py); // steals ref - PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref - if (PyList_Append(result, result_item)) { - Py_DECREF(result_item); + // note: genobject's gi_iframe is an embedded struct so the address to + // the offset leads directly to its first field: f_executable + if (read_py_ptr( + &self->handle, + *running_coro_addr + self->debug_offsets.gen_object.gi_iframe, + running_task_code_obj) < 0) { return -1; } - Py_DECREF(result_item); - if (append_awaited_by_for_thread( - handle, - head_addr, - debug_offsets, - async_offsets, - awaited_by_for_thread)) - { + if ((void*)*running_task_code_obj == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); return -1; } return 0; } -static PyObject* -get_all_awaited_by(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_all_awaited_by is not implemented on this platform"); - return NULL; -#endif - int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; +/* ============================================================================ + * FRAME PARSING FUNCTIONS + * ============================================================================ */ + +static int +parse_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* previous_frame +) { + char frame[SIZEOF_INTERP_FRAME]; + + Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_INTERP_FRAME, + frame + ); + if (bytes_read < 0) { + return -1; } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); + + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER) { return 0; } - PyObject *result = NULL; - - uintptr_t runtime_start_addr = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_addr == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } - goto result_err; + if ((void*)GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable) == NULL) { + return 0; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_addr, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); + + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); } +#endif - struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(handle, &local_async_debug)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); - goto result_err; + return parse_code_object( + unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable), + instruction_pointer, previous_frame, tlbc_index); +} + +static int +parse_async_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* previous_frame, + uintptr_t* code_object +) { + char frame[SIZEOF_INTERP_FRAME]; + + Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_INTERP_FRAME, + frame + ); + if (bytes_read < 0) { + return -1; } - result = PyList_New(0); - if (result == NULL) { - goto result_err; + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); + + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK || + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { + return 0; // C frame } - uint64_t interpreter_state_list_head = - local_debug_offsets.runtime_state.interpreters_head; + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR + && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) { + PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner)); + return -1; + } - uintptr_t interpreter_state_addr; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - runtime_start_addr + interpreter_state_list_head, - sizeof(void*), - &interpreter_state_addr)) - { - goto result_err; + *code_object = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); + // Strip tag bits for consistent comparison + *code_object &= ~Py_TAG_BITS; + + assert(code_object != NULL); + if ((void*)*code_object == NULL) { + return 0; } - uintptr_t thread_state_addr; - unsigned long tid = 0; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - interpreter_state_addr - + local_debug_offsets.interpreter_state.threads_head, - sizeof(void*), - &thread_state_addr)) - { - goto result_err; + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); + + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); } +#endif - uintptr_t head_addr; - while (thread_state_addr != 0) { - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - thread_state_addr - + local_debug_offsets.thread_state.native_thread_id, - sizeof(tid), - &tid)) - { - goto result_err; + if (parse_code_object( + unwinder, result, *code_object, instruction_pointer, previous_frame, tlbc_index)) { + return -1; + } + + return 1; +} + +static int +parse_async_frame_chain( + RemoteUnwinderObject *self, + PyObject *calls, + uintptr_t running_task_code_obj +) { + uintptr_t address_of_current_frame; + if (find_running_frame(self, self->runtime_start_address, &address_of_current_frame) < 0) { + chain_exceptions(PyExc_RuntimeError, "Failed to find running frame"); + return -1; + } + + uintptr_t address_of_code_object; + while ((void*)address_of_current_frame != NULL) { + PyObject* frame_info = NULL; + int res = parse_async_frame_object( + self, + &frame_info, + address_of_current_frame, + &address_of_current_frame, + &address_of_code_object + ); + + if (res < 0) { + chain_exceptions(PyExc_RuntimeError, "Failed to parse async frame object"); + return -1; } - head_addr = thread_state_addr - + local_async_debug.asyncio_thread_state.asyncio_tasks_head; + if (!frame_info) { + continue; + } - if (append_awaited_by(handle, tid, head_addr, &local_debug_offsets, - &local_async_debug, result)) - { - goto result_err; + if (PyList_Append(calls, frame_info) == -1) { + Py_DECREF(frame_info); + return -1; } - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - thread_state_addr + local_debug_offsets.thread_state.next, - sizeof(void*), - &thread_state_addr)) - { - goto result_err; + Py_DECREF(frame_info); + + if (address_of_code_object == running_task_code_obj) { + break; } } - head_addr = interpreter_state_addr - + local_async_debug.asyncio_interpreter_state.asyncio_tasks_head; + return 0; +} - // On top of a per-thread task lists used by default by asyncio to avoid - // contention, there is also a fallback per-interpreter list of tasks; - // any tasks still pending when a thread is destroyed will be moved to the - // per-interpreter task list. It's unlikely we'll find anything here, but - // interesting for debugging. - if (append_awaited_by(handle, 0, head_addr, &local_debug_offsets, - &local_async_debug, result)) - { - goto result_err; +/* ============================================================================ + * AWAITED BY PARSING FUNCTIONS + * ============================================================================ */ + +static int +append_awaited_by_for_thread( + RemoteUnwinderObject *unwinder, + uintptr_t head_addr, + PyObject *result +) { + char task_node[SIZEOF_LLIST_NODE]; + + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, head_addr, + sizeof(task_node), task_node) < 0) { + return -1; } - _Py_RemoteDebug_CleanupProcHandle(handle); - return result; + size_t iteration_count = 0; + const size_t MAX_ITERATIONS = 2 << 15; // A reasonable upper bound -result_err: - Py_XDECREF(result); - _Py_RemoteDebug_CleanupProcHandle(handle); - return NULL; -} + while (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) != head_addr) { + if (++iteration_count > MAX_ITERATIONS) { + PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted"); + return -1; + } -static PyObject* -get_stack_trace(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_stack_trace is not supported on this platform"); - return NULL; -#endif + if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) { + PyErr_SetString(PyExc_RuntimeError, + "Invalid linked list structure reading remote memory"); + return -1; + } - int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; - } + uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) + - unwinder->async_debug_offsets.asyncio_task_object.task_node; - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; + if (process_single_task_node(unwinder, task_addr, result) < 0) { + return -1; + } + + // Read next node + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next), + sizeof(task_node), + task_node) < 0) { + return -1; + } } - PyObject* result = NULL; + return 0; +} - uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_address == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } - goto result_err; +static int +append_awaited_by( + RemoteUnwinderObject *unwinder, + unsigned long tid, + uintptr_t head_addr, + PyObject *result) +{ + PyObject *tid_py = PyLong_FromUnsignedLong(tid); + if (tid_py == NULL) { + return -1; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; + PyObject *result_item = PyTuple_New(2); + if (result_item == NULL) { + Py_DECREF(tid_py); + return -1; } - uintptr_t address_of_current_frame; - if (find_running_frame( - handle, runtime_start_address, &local_debug_offsets, - &address_of_current_frame) - ) { - goto result_err; + PyObject* awaited_by_for_thread = PyList_New(0); + if (awaited_by_for_thread == NULL) { + Py_DECREF(tid_py); + Py_DECREF(result_item); + return -1; } - result = PyList_New(0); - if (result == NULL) { - goto result_err; + PyTuple_SET_ITEM(result_item, 0, tid_py); // steals ref + PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref + if (PyList_Append(result, result_item)) { + Py_DECREF(result_item); + return -1; } + Py_DECREF(result_item); - while ((void*)address_of_current_frame != NULL) { - PyObject* frame_info = NULL; - if (parse_frame_object( - handle, - &frame_info, - &local_debug_offsets, - address_of_current_frame, - &address_of_current_frame) - < 0) - { - Py_CLEAR(result); - goto result_err; + if (append_awaited_by_for_thread(unwinder, head_addr, awaited_by_for_thread)) + { + return -1; + } + + return 0; +} + +/* ============================================================================ + * STACK UNWINDING FUNCTIONS + * ============================================================================ */ + +static int +process_frame_chain( + RemoteUnwinderObject *unwinder, + uintptr_t initial_frame_addr, + StackChunkList *chunks, + PyObject *frame_info +) { + uintptr_t frame_addr = initial_frame_addr; + uintptr_t prev_frame_addr = 0; + const size_t MAX_FRAMES = 1024; + size_t frame_count = 0; + + while ((void*)frame_addr != NULL) { + PyObject *frame = NULL; + uintptr_t next_frame_addr = 0; + + if (++frame_count > MAX_FRAMES) { + PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)"); + return -1; } - if (!frame_info) { - continue; + // Try chunks first, fallback to direct memory read + if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, chunks) < 0) { + PyErr_Clear(); + if (parse_frame_object(unwinder, &frame, frame_addr, &next_frame_addr) < 0) { + return -1; + } } - if (PyList_Append(result, frame_info) == -1) { - Py_CLEAR(result); - goto result_err; + if (!frame) { + break; } - Py_DECREF(frame_info); - frame_info = NULL; + if (prev_frame_addr && frame_addr != prev_frame_addr) { + PyErr_Format(PyExc_RuntimeError, + "Broken frame chain: expected frame at 0x%lx, got 0x%lx", + prev_frame_addr, frame_addr); + Py_DECREF(frame); + return -1; + } + if (PyList_Append(frame_info, frame) == -1) { + Py_DECREF(frame); + return -1; + } + Py_DECREF(frame); + + prev_frame_addr = next_frame_addr; + frame_addr = next_frame_addr; } -result_err: - _Py_RemoteDebug_CleanupProcHandle(handle); - return result; + return 0; } static PyObject* -get_async_stack_trace(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_stack_trace is not supported on this platform"); - return NULL; -#endif - int pid; +unwind_stack_for_thread( + RemoteUnwinderObject *unwinder, + uintptr_t *current_tstate +) { + PyObject *frame_info = NULL; + PyObject *thread_id = NULL; + PyObject *result = NULL; + StackChunkList chunks = {0}; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; + char ts[SIZEOF_THREAD_STATE]; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, *current_tstate, unwinder->debug_offsets.thread_state.size, ts); + if (bytes_read < 0) { + goto error; } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; - } + uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame); - PyObject *result = NULL; + frame_info = PyList_New(0); + if (!frame_info) { + goto error; + } - uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_address == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } - goto result_err; + if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) { + goto error; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; + if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) { + goto error; } - struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(handle, &local_async_debug)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); - goto result_err; + *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next); + + thread_id = PyLong_FromLongLong( + GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id)); + if (thread_id == NULL) { + goto error; } - result = PyList_New(1); + result = PyTuple_New(2); if (result == NULL) { - goto result_err; + goto error; } - PyObject* calls = PyList_New(0); - if (calls == NULL) { - goto result_err; + + PyTuple_SET_ITEM(result, 0, thread_id); // Steals reference + PyTuple_SET_ITEM(result, 1, frame_info); // Steals reference + + cleanup_stack_chunks(&chunks); + return result; + +error: + Py_XDECREF(frame_info); + Py_XDECREF(thread_id); + Py_XDECREF(result); + cleanup_stack_chunks(&chunks); + return NULL; +} + + +/* ============================================================================ + * REMOTEUNWINDER CLASS IMPLEMENTATION + * ============================================================================ */ + +/*[clinic input] +class _remote_debugging.RemoteUnwinder "RemoteUnwinderObject *" "&RemoteUnwinder_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=55f164d8803318be]*/ + +/*[clinic input] +_remote_debugging.RemoteUnwinder.__init__ + pid: int + * + all_threads: bool = False + +Initialize a new RemoteUnwinder object for debugging a remote Python process. + +Args: + pid: Process ID of the target Python process to debug + all_threads: If True, initialize state for all threads in the process. + If False, only initialize for the main thread. + +The RemoteUnwinder provides functionality to inspect and debug a running Python +process, including examining thread states, stack frames and other runtime data. + +Raises: + PermissionError: If access to the target process is denied + OSError: If unable to attach to the target process or access its memory + RuntimeError: If unable to read debug information from the target process +[clinic start generated code]*/ + +static int +_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, + int pid, int all_threads) +/*[clinic end generated code: output=b8027cb247092081 input=6a2056b04e6f050e]*/ +{ + if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { + return -1; } - if (PyList_SetItem(result, 0, calls)) { /* steals ref to 'calls' */ - Py_DECREF(calls); - goto result_err; + + self->runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&self->handle); + if (self->runtime_start_address == 0) { + return -1; } - uintptr_t running_task_addr = (uintptr_t)NULL; - if (find_running_task( - handle, runtime_start_address, &local_debug_offsets, &local_async_debug, - &running_task_addr) - ) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running task"); - goto result_err; + if (_Py_RemoteDebug_ReadDebugOffsets(&self->handle, + &self->runtime_start_address, + &self->debug_offsets) < 0) + { + return -1; } - if ((void*)running_task_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "No running task found"); - goto result_err; + // Try to read async debug offsets, but don't fail if they're not available + self->async_debug_offsets_available = 1; + if (read_async_debug(self) < 0) { + PyErr_Clear(); + memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets)); + self->async_debug_offsets_available = 0; } - uintptr_t running_coro_addr; - if (read_py_ptr( - handle, - running_task_addr + local_async_debug.asyncio_task_object.task_coro, - &running_coro_addr - )) { - chain_exceptions(PyExc_RuntimeError, "Failed to read running task coro"); - goto result_err; + if (populate_initial_state_data(all_threads, self, self->runtime_start_address, + &self->interpreter_addr ,&self->tstate_addr) < 0) + { + return -1; } - if ((void*)running_coro_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); - goto result_err; + self->code_object_cache = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, // keys are stable pointers, don't destroy + cached_code_metadata_destroy, + NULL + ); + if (self->code_object_cache == NULL) { + PyErr_NoMemory(); + return -1; } - // note: genobject's gi_iframe is an embedded struct so the address to - // the offset leads directly to its first field: f_executable - uintptr_t address_of_running_task_code_obj; - if (read_py_ptr( - handle, - running_coro_addr + local_debug_offsets.gen_object.gi_iframe, - &address_of_running_task_code_obj - )) { - goto result_err; +#ifdef Py_GIL_DISABLED + // Initialize TLBC cache + self->tlbc_generation = 0; + self->tlbc_cache = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, // keys are stable pointers, don't destroy + tlbc_cache_entry_destroy, + NULL + ); + if (self->tlbc_cache == NULL) { + _Py_hashtable_destroy(self->code_object_cache); + PyErr_NoMemory(); + return -1; } +#endif - if ((void*)address_of_running_task_code_obj == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); - goto result_err; + return 0; +} + +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_stack_trace + +Returns a list of stack traces for all threads in the target process. + +Each element in the returned list is a tuple of (thread_id, frame_list), where: +- thread_id is the OS thread identifier +- frame_list is a list of tuples (function_name, filename, line_number) representing + the Python stack frames for that thread, ordered from most recent to oldest + +Example: + [ + (1234, [ + ('process_data', 'worker.py', 127), + ('run_worker', 'worker.py', 45), + ('main', 'app.py', 23) + ]), + (1235, [ + ('handle_request', 'server.py', 89), + ('serve_forever', 'server.py', 52) + ]) + ] + +Raises: + RuntimeError: If there is an error copying memory from the target process + OSError: If there is an error accessing the target process + PermissionError: If access to the target process is denied + UnicodeDecodeError: If there is an error decoding strings from the target process + +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=666192b90c69d567 input=331dbe370578badf]*/ +{ + PyObject* result = NULL; + // Read interpreter state into opaque buffer + char interp_state_buffer[INTERP_STATE_BUFFER_SIZE]; + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + self->interpreter_addr, + INTERP_STATE_BUFFER_SIZE, + interp_state_buffer) < 0) { + goto exit; } - uintptr_t address_of_current_frame; - if (find_running_frame( - handle, runtime_start_address, &local_debug_offsets, - &address_of_current_frame) - ) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running frame"); - goto result_err; + // Get code object generation from buffer + uint64_t code_object_generation = GET_MEMBER(uint64_t, interp_state_buffer, + self->debug_offsets.interpreter_state.code_object_generation); + + if (code_object_generation != self->code_object_generation) { + self->code_object_generation = code_object_generation; + _Py_hashtable_clear(self->code_object_cache); } - uintptr_t address_of_code_object; - while ((void*)address_of_current_frame != NULL) { - PyObject* frame_info = NULL; - int res = parse_async_frame_object( - handle, - &frame_info, - &local_debug_offsets, - address_of_current_frame, - &address_of_current_frame, - &address_of_code_object - ); +#ifdef Py_GIL_DISABLED + // Check TLBC generation and invalidate cache if needed + uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer, + self->debug_offsets.interpreter_state.tlbc_generation); + if (current_tlbc_generation != self->tlbc_generation) { + self->tlbc_generation = current_tlbc_generation; + _Py_hashtable_clear(self->tlbc_cache); + } +#endif - if (res < 0) { - chain_exceptions(PyExc_RuntimeError, "Failed to parse async frame object"); - goto result_err; - } + uintptr_t current_tstate; + if (self->tstate_addr == 0) { + // Get threads head from buffer + current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer, + self->debug_offsets.interpreter_state.threads_head); + } else { + current_tstate = self->tstate_addr; + } + result = PyList_New(0); + if (!result) { + goto exit; + } + + while (current_tstate != 0) { + PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate); if (!frame_info) { - continue; + Py_CLEAR(result); + goto exit; } - if (PyList_Append(calls, frame_info) == -1) { - Py_DECREF(calls); - goto result_err; + if (PyList_Append(result, frame_info) == -1) { + Py_DECREF(frame_info); + Py_CLEAR(result); + goto exit; } - Py_DECREF(frame_info); - frame_info = NULL; - if (address_of_code_object == address_of_running_task_code_obj) { + // We are targeting a single tstate, break here + if (self->tstate_addr) { break; } } - PyObject *tn = parse_task_name( - handle, &local_debug_offsets, &local_async_debug, running_task_addr); - if (tn == NULL) { - goto result_err; +exit: + _Py_RemoteDebug_ClearCache(&self->handle); + return result; +} + +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_all_awaited_by + +Get all tasks and their awaited_by relationships from the remote process. + +This provides a tree structure showing which tasks are waiting for other tasks. + +For each task, returns: +1. The call stack frames leading to where the task is currently executing +2. The name of the task +3. A list of tasks that this task is waiting for, with their own frames/names/etc + +Returns a list of [frames, task_name, subtasks] where: +- frames: List of (func_name, filename, lineno) showing the call stack +- task_name: String identifier for the task +- subtasks: List of tasks being awaited by this task, in same format + +Raises: + RuntimeError: If AsyncioDebug section is not available in the remote process + MemoryError: If memory allocation fails + OSError: If reading from the remote process fails + +Example output: +[ + # Task c2_root waiting for two subtasks + [ + # Call stack of c2_root + [("c5", "script.py", 10), ("c4", "script.py", 14)], + "c2_root", + [ + # First subtask (sub_main_2) and what it's waiting for + [ + [("c1", "script.py", 23)], + "sub_main_2", + [...] + ], + # Second subtask and its waiters + [...] + ] + ] +] +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=6a49cd345e8aec53 input=a452c652bb00701a]*/ +{ + if (!self->async_debug_offsets_available) { + PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + return NULL; } - if (PyList_Append(result, tn)) { - Py_DECREF(tn); + + PyObject *result = PyList_New(0); + if (result == NULL) { goto result_err; } - Py_DECREF(tn); - PyObject* awaited_by = PyList_New(0); - if (awaited_by == NULL) { + uintptr_t thread_state_addr; + unsigned long tid = 0; + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + self->interpreter_addr + + self->debug_offsets.interpreter_state.threads_main, + sizeof(void*), + &thread_state_addr)) + { goto result_err; } - if (PyList_Append(result, awaited_by)) { - Py_DECREF(awaited_by); - goto result_err; + + uintptr_t head_addr; + while (thread_state_addr != 0) { + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + thread_state_addr + + self->debug_offsets.thread_state.native_thread_id, + sizeof(tid), + &tid)) + { + goto result_err; + } + + head_addr = thread_state_addr + + self->async_debug_offsets.asyncio_thread_state.asyncio_tasks_head; + + if (append_awaited_by(self, tid, head_addr, result)) + { + goto result_err; + } + + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + thread_state_addr + self->debug_offsets.thread_state.next, + sizeof(void*), + &thread_state_addr)) + { + goto result_err; + } } - Py_DECREF(awaited_by); - if (parse_task_awaited_by( - handle, &local_debug_offsets, &local_async_debug, - running_task_addr, awaited_by, 1) - ) { + head_addr = self->interpreter_addr + + self->async_debug_offsets.asyncio_interpreter_state.asyncio_tasks_head; + + // On top of a per-thread task lists used by default by asyncio to avoid + // contention, there is also a fallback per-interpreter list of tasks; + // any tasks still pending when a thread is destroyed will be moved to the + // per-interpreter task list. It's unlikely we'll find anything here, but + // interesting for debugging. + if (append_awaited_by(self, 0, head_addr, result)) + { goto result_err; } - _Py_RemoteDebug_CleanupProcHandle(handle); + _Py_RemoteDebug_ClearCache(&self->handle); return result; result_err: - _Py_RemoteDebug_CleanupProcHandle(handle); + _Py_RemoteDebug_ClearCache(&self->handle); Py_XDECREF(result); return NULL; } +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_async_stack_trace -static PyMethodDef methods[] = { - {"get_stack_trace", get_stack_trace, METH_VARARGS, - "Get the Python stack from a given pid"}, - {"get_async_stack_trace", get_async_stack_trace, METH_VARARGS, - "Get the asyncio stack from a given pid"}, - {"get_all_awaited_by", get_all_awaited_by, METH_VARARGS, - "Get all tasks and their awaited_by from a given pid"}, - {NULL, NULL, 0, NULL}, +Returns information about the currently running async task and its stack trace. + +Returns a tuple of (task_info, stack_frames) where: +- task_info is a tuple of (task_id, task_name) identifying the task +- stack_frames is a list of tuples (function_name, filename, line_number) representing + the Python stack frames for the task, ordered from most recent to oldest + +Example: + ((4345585712, 'Task-1'), [ + ('run_echo_server', 'server.py', 127), + ('serve_forever', 'server.py', 45), + ('main', 'app.py', 23) + ]) + +Raises: + RuntimeError: If AsyncioDebug section is not available in the target process + RuntimeError: If there is an error copying memory from the target process + OSError: If there is an error accessing the target process + PermissionError: If access to the target process is denied + UnicodeDecodeError: If there is an error decoding strings from the target process + +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=6433d52b55e87bbe input=11b7150c59d4c60f]*/ +{ + if (!self->async_debug_offsets_available) { + PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + return NULL; + } + + PyObject *result = NULL; + PyObject *calls = NULL; + + if (setup_async_result_structure(&result, &calls) < 0) { + goto cleanup; + } + + uintptr_t running_task_addr, running_coro_addr, running_task_code_obj; + if (find_running_task_and_coro(self, &running_task_addr, + &running_coro_addr, &running_task_code_obj) < 0) { + goto cleanup; + } + + if (parse_async_frame_chain(self, calls, running_task_code_obj) < 0) { + goto cleanup; + } + + if (add_task_info_to_result(self, result, running_task_addr) < 0) { + goto cleanup; + } + + _Py_RemoteDebug_ClearCache(&self->handle); + return result; + +cleanup: + _Py_RemoteDebug_ClearCache(&self->handle); + Py_XDECREF(result); + return NULL; +} + +static PyMethodDef RemoteUnwinder_methods[] = { + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF + {NULL, NULL} }; -static struct PyModuleDef module = { - .m_base = PyModuleDef_HEAD_INIT, - .m_name = "_remote_debugging", - .m_size = -1, - .m_methods = methods, +static void +RemoteUnwinder_dealloc(RemoteUnwinderObject *self) +{ + PyTypeObject *tp = Py_TYPE(self); + if (self->code_object_cache) { + _Py_hashtable_destroy(self->code_object_cache); + } +#ifdef Py_GIL_DISABLED + if (self->tlbc_cache) { + _Py_hashtable_destroy(self->tlbc_cache); + } +#endif + if (self->handle.pid != 0) { + _Py_RemoteDebug_ClearCache(&self->handle); + _Py_RemoteDebug_CleanupProcHandle(&self->handle); + } + PyObject_Del(self); + Py_DECREF(tp); +} + +static PyType_Slot RemoteUnwinder_slots[] = { + {Py_tp_doc, (void *)"RemoteUnwinder(pid): Inspect stack of a remote Python process."}, + {Py_tp_methods, RemoteUnwinder_methods}, + {Py_tp_init, _remote_debugging_RemoteUnwinder___init__}, + {Py_tp_dealloc, RemoteUnwinder_dealloc}, + {0, NULL} }; -PyMODINIT_FUNC -PyInit__remote_debugging(void) +static PyType_Spec RemoteUnwinder_spec = { + .name = "_remote_debugging.RemoteUnwinder", + .basicsize = sizeof(RemoteUnwinderObject), + .flags = Py_TPFLAGS_DEFAULT, + .slots = RemoteUnwinder_slots, +}; + +/* ============================================================================ + * MODULE INITIALIZATION + * ============================================================================ */ + +static int +_remote_debugging_exec(PyObject *m) { - PyObject* mod = PyModule_Create(&module); - if (mod == NULL) { - return NULL; + RemoteDebuggingState *st = RemoteDebugging_GetState(m); +#define CREATE_TYPE(mod, type, spec) \ + do { \ + type = (PyTypeObject *)PyType_FromMetaclass(NULL, mod, spec, NULL); \ + if (type == NULL) { \ + return -1; \ + } \ + } while (0) + + CREATE_TYPE(m, st->RemoteDebugging_Type, &RemoteUnwinder_spec); + + if (PyModule_AddType(m, st->RemoteDebugging_Type) < 0) { + return -1; } #ifdef Py_GIL_DISABLED - PyUnstable_Module_SetGIL(mod, Py_MOD_GIL_NOT_USED); + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif - int rc = PyModule_AddIntConstant( - mod, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV); + int rc = PyModule_AddIntConstant(m, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV); if (rc < 0) { - Py_DECREF(mod); - return NULL; + return -1; } - return mod; + if (RemoteDebugging_InitState(st) < 0) { + return -1; + } + return 0; } + +static int +remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg) +{ + RemoteDebuggingState *state = RemoteDebugging_GetState(mod); + Py_VISIT(state->RemoteDebugging_Type); + return 0; +} + +static int +remote_debugging_clear(PyObject *mod) +{ + RemoteDebuggingState *state = RemoteDebugging_GetState(mod); + Py_CLEAR(state->RemoteDebugging_Type); + return 0; +} + +static void +remote_debugging_free(void *mod) +{ + (void)remote_debugging_clear((PyObject *)mod); +} + +static PyModuleDef_Slot remote_debugging_slots[] = { + {Py_mod_exec, _remote_debugging_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL}, +}; + +static PyMethodDef remote_debugging_methods[] = { + {NULL, NULL, 0, NULL}, +}; + +static struct PyModuleDef remote_debugging_module = { + PyModuleDef_HEAD_INIT, + .m_name = "_remote_debugging", + .m_size = sizeof(RemoteDebuggingState), + .m_methods = remote_debugging_methods, + .m_slots = remote_debugging_slots, + .m_traverse = remote_debugging_traverse, + .m_clear = remote_debugging_clear, + .m_free = remote_debugging_free, +}; + +PyMODINIT_FUNC +PyInit__remote_debugging(void) +{ + return PyModuleDef_Init(&remote_debugging_module); +} + diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h new file mode 100644 index 00000000000000..e83e2fd7fd2b5b --- /dev/null +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -0,0 +1,243 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, +"RemoteUnwinder(pid, *, all_threads=False)\n" +"--\n" +"\n" +"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" +"\n" +"Args:\n" +" pid: Process ID of the target Python process to debug\n" +" all_threads: If True, initialize state for all threads in the process.\n" +" If False, only initialize for the main thread.\n" +"\n" +"The RemoteUnwinder provides functionality to inspect and debug a running Python\n" +"process, including examining thread states, stack frames and other runtime data.\n" +"\n" +"Raises:\n" +" PermissionError: If access to the target process is denied\n" +" OSError: If unable to attach to the target process or access its memory\n" +" RuntimeError: If unable to read debug information from the target process"); + +static int +_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, + int pid, int all_threads); + +static int +_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pid", "all_threads", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "RemoteUnwinder", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; + int pid; + int all_threads = 0; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + pid = PyLong_AsInt(fastargs[0]); + if (pid == -1 && PyErr_Occurred()) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + all_threads = PyObject_IsTrue(fastargs[1]); + if (all_threads < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads); + +exit: + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_stack_trace__doc__, +"get_stack_trace($self, /)\n" +"--\n" +"\n" +"Returns a list of stack traces for all threads in the target process.\n" +"\n" +"Each element in the returned list is a tuple of (thread_id, frame_list), where:\n" +"- thread_id is the OS thread identifier\n" +"- frame_list is a list of tuples (function_name, filename, line_number) representing\n" +" the Python stack frames for that thread, ordered from most recent to oldest\n" +"\n" +"Example:\n" +" [\n" +" (1234, [\n" +" (\'process_data\', \'worker.py\', 127),\n" +" (\'run_worker\', \'worker.py\', 45),\n" +" (\'main\', \'app.py\', 23)\n" +" ]),\n" +" (1235, [\n" +" (\'handle_request\', \'server.py\', 89),\n" +" (\'serve_forever\', \'server.py\', 52)\n" +" ])\n" +" ]\n" +"\n" +"Raises:\n" +" RuntimeError: If there is an error copying memory from the target process\n" +" OSError: If there is an error accessing the target process\n" +" PermissionError: If access to the target process is denied\n" +" UnicodeDecodeError: If there is an error decoding strings from the target process"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF \ + {"get_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_stack_trace__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_stack_trace_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__, +"get_all_awaited_by($self, /)\n" +"--\n" +"\n" +"Get all tasks and their awaited_by relationships from the remote process.\n" +"\n" +"This provides a tree structure showing which tasks are waiting for other tasks.\n" +"\n" +"For each task, returns:\n" +"1. The call stack frames leading to where the task is currently executing\n" +"2. The name of the task\n" +"3. A list of tasks that this task is waiting for, with their own frames/names/etc\n" +"\n" +"Returns a list of [frames, task_name, subtasks] where:\n" +"- frames: List of (func_name, filename, lineno) showing the call stack\n" +"- task_name: String identifier for the task\n" +"- subtasks: List of tasks being awaited by this task, in same format\n" +"\n" +"Raises:\n" +" RuntimeError: If AsyncioDebug section is not available in the remote process\n" +" MemoryError: If memory allocation fails\n" +" OSError: If reading from the remote process fails\n" +"\n" +"Example output:\n" +"[\n" +" [\n" +" [(\"c5\", \"script.py\", 10), (\"c4\", \"script.py\", 14)],\n" +" \"c2_root\",\n" +" [\n" +" [\n" +" [(\"c1\", \"script.py\", 23)],\n" +" \"sub_main_2\",\n" +" [...]\n" +" ],\n" +" [...]\n" +" ]\n" +" ]\n" +"]"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF \ + {"get_all_awaited_by", (PyCFunction)_remote_debugging_RemoteUnwinder_get_all_awaited_by, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__, +"get_async_stack_trace($self, /)\n" +"--\n" +"\n" +"Returns information about the currently running async task and its stack trace.\n" +"\n" +"Returns a tuple of (task_info, stack_frames) where:\n" +"- task_info is a tuple of (task_id, task_name) identifying the task\n" +"- stack_frames is a list of tuples (function_name, filename, line_number) representing\n" +" the Python stack frames for the task, ordered from most recent to oldest\n" +"\n" +"Example:\n" +" ((4345585712, \'Task-1\'), [\n" +" (\'run_echo_server\', \'server.py\', 127),\n" +" (\'serve_forever\', \'server.py\', 45),\n" +" (\'main\', \'app.py\', 23)\n" +" ])\n" +"\n" +"Raises:\n" +" RuntimeError: If AsyncioDebug section is not available in the target process\n" +" RuntimeError: If there is an error copying memory from the target process\n" +" OSError: If there is an error accessing the target process\n" +" PermissionError: If access to the target process is denied\n" +" UnicodeDecodeError: If there is an error decoding strings from the target process"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF \ + {"get_async_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_async_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} +/*[clinic end generated code: output=654772085f1f4bf6 input=a9049054013a1b77]*/ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 4f06a36a130207..ee869d991d93cd 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2364,6 +2364,8 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyObject *self) { + PyThreadState *tstate = PyThreadState_GET(); + _Py_atomic_add_uint64(&tstate->interp->_code_object_generation, 1); PyCodeObject *co = _PyCodeObject_CAST(self); _PyObject_ResurrectStart(self); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); diff --git a/Python/index_pool.c b/Python/index_pool.c index 007c81a0fc16ec..520a65938ec6c7 100644 --- a/Python/index_pool.c +++ b/Python/index_pool.c @@ -172,6 +172,9 @@ _PyIndexPool_AllocIndex(_PyIndexPool *pool) else { index = heap_pop(free_indices); } + + pool->tlbc_generation++; + UNLOCK_POOL(pool); return index; } @@ -180,6 +183,7 @@ void _PyIndexPool_FreeIndex(_PyIndexPool *pool, int32_t index) { LOCK_POOL(pool); + pool->tlbc_generation++; heap_add(&pool->free_indices, index); UNLOCK_POOL(pool); } diff --git a/Python/pystate.c b/Python/pystate.c index 4144e6edefc073..0544b15aad1cc8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -567,6 +567,7 @@ init_interpreter(PyInterpreterState *interp, } interp->sys_profile_initialized = false; interp->sys_trace_initialized = false; + interp->_code_object_generation = 0; interp->jit = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; @@ -777,6 +778,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) for (int t = 0; t < PY_MONITORING_TOOL_IDS; t++) { Py_CLEAR(interp->monitoring_tool_names[t]); } + interp->_code_object_generation = 0; +#ifdef Py_GIL_DISABLED + interp->tlbc_indices.tlbc_generation = 0; +#endif PyConfig_Clear(&interp->config); _PyCodec_Fini(interp); @@ -1346,9 +1351,6 @@ tstate_is_alive(PyThreadState *tstate) // lifecycle //---------- -/* Minimum size of data stack chunk */ -#define DATA_STACK_CHUNK_SIZE (16*1024) - static _PyStackChunk* allocate_chunk(int size_in_bytes, _PyStackChunk* previous) { @@ -2897,7 +2899,7 @@ _PyInterpreterState_HasFeature(PyInterpreterState *interp, unsigned long feature static PyObject ** push_chunk(PyThreadState *tstate, int size) { - int allocate_size = DATA_STACK_CHUNK_SIZE; + int allocate_size = _PY_DATA_STACK_CHUNK_SIZE; while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { allocate_size *= 2; } diff --git a/Python/remote_debug.h b/Python/remote_debug.h index edc77c302916ca..dbc6bdd09a693f 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -73,19 +73,71 @@ extern "C" { # define HAVE_PROCESS_VM_READV 0 #endif +static inline size_t +get_page_size(void) { + size_t page_size = 0; + if (page_size == 0) { +#ifdef MS_WINDOWS + SYSTEM_INFO si; + GetSystemInfo(&si); + page_size = si.dwPageSize; +#else + page_size = (size_t)getpagesize(); +#endif + } + return page_size; +} + +typedef struct page_cache_entry { + uintptr_t page_addr; // page-aligned base address + char *data; + int valid; + struct page_cache_entry *next; +} page_cache_entry_t; + +#define MAX_PAGES 1024 + // Define a platform-independent process handle structure typedef struct { pid_t pid; -#ifdef MS_WINDOWS +#if defined(__APPLE__) + mach_port_t task; +#elif defined(MS_WINDOWS) HANDLE hProcess; #endif + page_cache_entry_t pages[MAX_PAGES]; + Py_ssize_t page_size; } proc_handle_t; +static void +_Py_RemoteDebug_FreePageCache(proc_handle_t *handle) +{ + for (int i = 0; i < MAX_PAGES; i++) { + PyMem_RawFree(handle->pages[i].data); + handle->pages[i].data = NULL; + handle->pages[i].valid = 0; + } +} + +void +_Py_RemoteDebug_ClearCache(proc_handle_t *handle) +{ + for (int i = 0; i < MAX_PAGES; i++) { + handle->pages[i].valid = 0; + } +} + +#if defined(__APPLE__) && TARGET_OS_OSX +static mach_port_t pid_to_task(pid_t pid); +#endif + // Initialize the process handle static int _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { handle->pid = pid; -#ifdef MS_WINDOWS +#if defined(__APPLE__) + handle->task = pid_to_task(handle->pid); +#elif defined(MS_WINDOWS) handle->hProcess = OpenProcess( PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION, FALSE, pid); @@ -94,6 +146,11 @@ _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { return -1; } #endif + handle->page_size = get_page_size(); + for (int i = 0; i < MAX_PAGES; i++) { + handle->pages[i].data = NULL; + handle->pages[i].valid = 0; + } return 0; } @@ -107,6 +164,7 @@ _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) { } #endif handle->pid = 0; + _Py_RemoteDebug_FreePageCache(handle); } #if defined(__APPLE__) && TARGET_OS_OSX @@ -755,7 +813,7 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address #elif defined(__APPLE__) && TARGET_OS_OSX Py_ssize_t result = -1; kern_return_t kr = mach_vm_read_overwrite( - pid_to_task(handle->pid), + handle->task, (mach_vm_address_t)remote_address, len, (mach_vm_address_t)dst, @@ -780,6 +838,59 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address #endif } +int +_Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle, + uintptr_t addr, + size_t size, + void *out) +{ + size_t page_size = handle->page_size; + uintptr_t page_base = addr & ~(page_size - 1); + size_t offset_in_page = addr - page_base; + + if (offset_in_page + size > page_size) { + return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out); + } + + // Search for valid cached page + for (int i = 0; i < MAX_PAGES; i++) { + page_cache_entry_t *entry = &handle->pages[i]; + if (entry->valid && entry->page_addr == page_base) { + memcpy(out, entry->data + offset_in_page, size); + return 0; + } + } + + // Find reusable slot + for (int i = 0; i < MAX_PAGES; i++) { + page_cache_entry_t *entry = &handle->pages[i]; + if (!entry->valid) { + if (entry->data == NULL) { + entry->data = PyMem_RawMalloc(page_size); + if (entry->data == NULL) { + PyErr_NoMemory(); + return -1; + } + } + + if (_Py_RemoteDebug_ReadRemoteMemory(handle, page_base, page_size, entry->data) < 0) { + // Try to just copy the exact ammount as a fallback + PyErr_Clear(); + goto fallback; + } + + entry->page_addr = page_base; + entry->valid = 1; + memcpy(out, entry->data + offset_in_page, size); + return 0; + } + } + +fallback: + // Cache full — fallback to uncached read + return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out); +} + static int _Py_RemoteDebug_ReadDebugOffsets( proc_handle_t *handle, From 7b1a70023127666e98a1fc7f7a3ee04f7bdc4028 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 25 May 2025 21:37:15 +0100 Subject: [PATCH 342/989] Heavily comment Python/perf_jit_trampoline.c to improve maintainability (#134527) Signed-off-by: Pablo Galindo --- Python/perf_jit_trampoline.c | 1414 +++++++++++++++++++++++++--------- 1 file changed, 1029 insertions(+), 385 deletions(-) diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index 1211e0e9f112b7..5c7cb5b0a9913c 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -1,241 +1,354 @@ +/* + * Python Perf Trampoline Support - JIT Dump Implementation + * + * This file implements the perf jitdump API for Python's performance profiling + * integration. It allows perf (Linux performance analysis tool) to understand + * and profile dynamically generated Python bytecode by creating JIT dump files + * that perf can inject into its analysis. + * + * + * IMPORTANT: This file exports specific callback functions that are part of + * Python's internal API. Do not modify the function signatures or behavior + * of exported functions without coordinating with the Python core team. + * + * Usually the binary and libraries are mapped in separate region like below: + * + * address -> + * --+---------------------+--//--+---------------------+-- + * | .text | .data | ... | | .text | .data | ... | + * --+---------------------+--//--+---------------------+-- + * myprog libc.so + * + * So it'd be easy and straight-forward to find a mapped binary or library from an + * address. + * + * But for JIT code, the code arena only cares about the code section. But the + * resulting DSOs (which is generated by perf inject -j) contain ELF headers and + * unwind info too. Then it'd generate following address space with synthesized + * MMAP events. Let's say it has a sample between address B and C. + * + * sample + * | + * address -> A B v C + * --------------------------------------------------------------------------------------------------- + * /tmp/jitted-PID-0.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-1.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-2.so | (headers) | .text | unwind info | + * ... + * --------------------------------------------------------------------------------------------------- + * + * If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see + * the unwind info. If it maps both .text section and unwind sections, the sample + * could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing + * which one is right. So to make perf happy we have non-overlapping ranges for each + * DSO: + * + * address -> + * ------------------------------------------------------------------------------------------------------- + * /tmp/jitted-PID-0.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-1.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-2.so | (headers) | .text | unwind info | + * ... + * ------------------------------------------------------------------------------------------------------- + * + * As the trampolines are constant, we add a constant padding but in general the padding needs to have the + * size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50 + */ + + + #include "Python.h" #include "pycore_ceval.h" // _PyPerf_Callbacks #include "pycore_frame.h" #include "pycore_interp.h" #include "pycore_runtime.h" // _PyRuntime - #ifdef PY_HAVE_PERF_TRAMPOLINE -#include -#include -#include -#include // mmap() -#include -#include // sysconf() -#include // gettimeofday() -#include - -// ---------------------------------- -// Perf jitdump API -// ---------------------------------- - -typedef struct { - FILE* perf_map; - PyThread_type_lock map_lock; - void* mapped_buffer; - size_t mapped_size; - int code_id; -} PerfMapJitState; - -static PerfMapJitState perf_jit_map_state; +/* Standard library includes for perf jitdump implementation */ +#include // ELF architecture constants +#include // File control operations +#include // Standard I/O operations +#include // Standard library functions +#include // Memory mapping functions (mmap) +#include // System data types +#include // System calls (sysconf, getpid) +#include // Time functions (gettimeofday) +#include // System call interface + +// ============================================================================= +// CONSTANTS AND CONFIGURATION +// ============================================================================= /* -Usually the binary and libraries are mapped in separate region like below: - - address -> - --+---------------------+--//--+---------------------+-- - | .text | .data | ... | | .text | .data | ... | - --+---------------------+--//--+---------------------+-- - myprog libc.so - -So it'd be easy and straight-forward to find a mapped binary or library from an -address. - -But for JIT code, the code arena only cares about the code section. But the -resulting DSOs (which is generated by perf inject -j) contain ELF headers and -unwind info too. Then it'd generate following address space with synthesized -MMAP events. Let's say it has a sample between address B and C. - - sample - | - address -> A B v C - --------------------------------------------------------------------------------------------------- - /tmp/jitted-PID-0.so | (headers) | .text | unwind info | - /tmp/jitted-PID-1.so | (headers) | .text | unwind info | - /tmp/jitted-PID-2.so | (headers) | .text | unwind info | - ... - --------------------------------------------------------------------------------------------------- - -If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see -the unwind info. If it maps both .text section and unwind sections, the sample -could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing -which one is right. So to make perf happy we have non-overlapping ranges for each -DSO: - - address -> - ------------------------------------------------------------------------------------------------------- - /tmp/jitted-PID-0.so | (headers) | .text | unwind info | - /tmp/jitted-PID-1.so | (headers) | .text | unwind info | - /tmp/jitted-PID-2.so | (headers) | .text | unwind info | - ... - ------------------------------------------------------------------------------------------------------- - -As the trampolines are constant, we add a constant padding but in general the padding needs to have the -size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50 + * Memory layout considerations for perf jitdump: + * + * Perf expects non-overlapping memory regions for each JIT-compiled function. + * When perf processes the jitdump file, it creates synthetic DSO (Dynamic + * Shared Object) files that contain: + * - ELF headers + * - .text section (actual machine code) + * - Unwind information (for stack traces) + * + * To ensure proper address space layout, we add padding between code regions. + * This prevents address conflicts when perf maps the synthesized DSOs. + * + * Memory layout example: + * /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding] + * /tmp/jitted-PID-1.so: [headers][.text][unwind_info][padding] + * + * The padding size (0x100) is chosen to accommodate typical unwind info sizes + * while maintaining 16-byte alignment requirements. */ - #define PERF_JIT_CODE_PADDING 0x100 -#define trampoline_api _PyRuntime.ceval.perf.trampoline_api - -typedef uint64_t uword; -typedef const char* CodeComments; -#define Pd "d" -#define MB (1024 * 1024) - -#define EM_386 3 -#define EM_X86_64 62 -#define EM_ARM 40 -#define EM_AARCH64 183 -#define EM_RISCV 243 +/* Convenient access to the global trampoline API state */ +#define trampoline_api _PyRuntime.ceval.perf.trampoline_api -#define TARGET_ARCH_IA32 0 -#define TARGET_ARCH_X64 0 -#define TARGET_ARCH_ARM 0 -#define TARGET_ARCH_ARM64 0 -#define TARGET_ARCH_RISCV32 0 -#define TARGET_ARCH_RISCV64 0 +/* Type aliases for clarity and portability */ +typedef uint64_t uword; // Word-sized unsigned integer +typedef const char* CodeComments; // Code comment strings -#define FLAG_generate_perf_jitdump 0 -#define FLAG_write_protect_code 0 -#define FLAG_write_protect_vm_isolate 0 -#define FLAG_code_comments 0 +/* Memory size constants */ +#define MB (1024 * 1024) // 1 Megabyte for buffer sizing -#define UNREACHABLE() +// ============================================================================= +// ARCHITECTURE-SPECIFIC DEFINITIONS +// ============================================================================= -static uword GetElfMachineArchitecture(void) { -#if TARGET_ARCH_IA32 - return EM_386; -#elif TARGET_ARCH_X64 +/* + * Returns the ELF machine architecture constant for the current platform. + * This is required for the jitdump header to correctly identify the target + * architecture for perf processing. + * + */ +static uint64_t GetElfMachineArchitecture(void) { +#if defined(__x86_64__) || defined(_M_X64) return EM_X86_64; -#elif TARGET_ARCH_ARM - return EM_ARM; -#elif TARGET_ARCH_ARM64 +#elif defined(__i386__) || defined(_M_IX86) + return EM_386; +#elif defined(__aarch64__) return EM_AARCH64; -#elif TARGET_ARCH_RISCV32 || TARGET_ARCH_RISCV64 +#elif defined(__arm__) || defined(_M_ARM) + return EM_ARM; +#elif defined(__riscv) return EM_RISCV; #else - UNREACHABLE(); + Py_UNREACHABLE(); // Unsupported architecture - should never reach here return 0; #endif } +// ============================================================================= +// PERF JITDUMP DATA STRUCTURES +// ============================================================================= + +/* + * Perf jitdump file format structures + * + * These structures define the binary format that perf expects for JIT dump files. + * The format is documented in the Linux perf tools source code and must match + * exactly for proper perf integration. + */ + +/* + * Jitdump file header - written once at the beginning of each jitdump file + * Contains metadata about the process and jitdump format version + */ typedef struct { - uint32_t magic; - uint32_t version; - uint32_t size; - uint32_t elf_mach_target; - uint32_t reserved; - uint32_t process_id; - uint64_t time_stamp; - uint64_t flags; + uint32_t magic; // Magic number (0x4A695444 = "JiTD") + uint32_t version; // Jitdump format version (currently 1) + uint32_t size; // Size of this header structure + uint32_t elf_mach_target; // Target architecture (from GetElfMachineArchitecture) + uint32_t reserved; // Reserved field (must be 0) + uint32_t process_id; // Process ID of the JIT compiler + uint64_t time_stamp; // Timestamp when jitdump was created + uint64_t flags; // Feature flags (currently unused) } Header; - enum PerfEvent { - PerfLoad = 0, - PerfMove = 1, - PerfDebugInfo = 2, - PerfClose = 3, - PerfUnwindingInfo = 4 +/* + * Perf event types supported by the jitdump format + * Each event type has a corresponding structure format + */ +enum PerfEvent { + PerfLoad = 0, // Code load event (new JIT function) + PerfMove = 1, // Code move event (function relocated) + PerfDebugInfo = 2, // Debug information event + PerfClose = 3, // JIT session close event + PerfUnwindingInfo = 4 // Stack unwinding information event }; +/* + * Base event structure - common header for all perf events + * Every event in the jitdump file starts with this structure + */ struct BaseEvent { - uint32_t event; - uint32_t size; - uint64_t time_stamp; - }; + uint32_t event; // Event type (from PerfEvent enum) + uint32_t size; // Total size of this event including payload + uint64_t time_stamp; // Timestamp when event occurred +}; +/* + * Code load event - indicates a new JIT-compiled function is available + * This is the most important event type for Python profiling + */ typedef struct { - struct BaseEvent base; - uint32_t process_id; - uint32_t thread_id; - uint64_t vma; - uint64_t code_address; - uint64_t code_size; - uint64_t code_id; + struct BaseEvent base; // Common event header + uint32_t process_id; // Process ID where code was generated + uint32_t thread_id; // Thread ID where code was generated + uint64_t vma; // Virtual memory address where code is loaded + uint64_t code_address; // Address of the actual machine code + uint64_t code_size; // Size of the machine code in bytes + uint64_t code_id; // Unique identifier for this code region + /* Followed by: + * - null-terminated function name string + * - raw machine code bytes + */ } CodeLoadEvent; +/* + * Code unwinding information event - provides DWARF data for stack traces + * Essential for proper stack unwinding during profiling + */ typedef struct { - struct BaseEvent base; - uint64_t unwind_data_size; - uint64_t eh_frame_hdr_size; - uint64_t mapped_size; + struct BaseEvent base; // Common event header + uint64_t unwind_data_size; // Size of the unwinding data + uint64_t eh_frame_hdr_size; // Size of the EH frame header + uint64_t mapped_size; // Total mapped size (with padding) + /* Followed by: + * - EH frame header + * - DWARF unwinding information + * - Padding to alignment boundary + */ } CodeUnwindingInfoEvent; -static const intptr_t nanoseconds_per_second = 1000000000; - -// Dwarf encoding constants +// ============================================================================= +// GLOBAL STATE MANAGEMENT +// ============================================================================= -static const uint8_t DwarfUData4 = 0x03; -static const uint8_t DwarfSData4 = 0x0b; -static const uint8_t DwarfPcRel = 0x10; -static const uint8_t DwarfDataRel = 0x30; -// static uint8_t DwarfOmit = 0xff; +/* + * Global state for the perf jitdump implementation + * + * This structure maintains all the state needed for generating jitdump files. + * It's designed as a singleton since there's typically only one jitdump file + * per Python process. + */ typedef struct { - unsigned char version; - unsigned char eh_frame_ptr_enc; - unsigned char fde_count_enc; - unsigned char table_enc; - int32_t eh_frame_ptr; - int32_t eh_fde_count; - int32_t from; - int32_t to; -} EhFrameHeader; + FILE* perf_map; // File handle for the jitdump file + PyThread_type_lock map_lock; // Thread synchronization lock + void* mapped_buffer; // Memory-mapped region (signals perf we're active) + size_t mapped_size; // Size of the mapped region + int code_id; // Counter for unique code region identifiers +} PerfMapJitState; + +/* Global singleton instance */ +static PerfMapJitState perf_jit_map_state; + +// ============================================================================= +// TIME UTILITIES +// ============================================================================= +/* Time conversion constant */ +static const intptr_t nanoseconds_per_second = 1000000000; + +/* + * Get current monotonic time in nanoseconds + * + * Monotonic time is preferred for event timestamps because it's not affected + * by system clock adjustments. This ensures consistent timing relationships + * between events even if the system clock is changed. + * + * Returns: Current monotonic time in nanoseconds since an arbitrary epoch + */ static int64_t get_current_monotonic_ticks(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { - UNREACHABLE(); + Py_UNREACHABLE(); // Should never fail on supported systems return 0; } - // Convert to nanoseconds. + + /* Convert to nanoseconds for maximum precision */ int64_t result = ts.tv_sec; result *= nanoseconds_per_second; result += ts.tv_nsec; return result; } +/* + * Get current wall clock time in microseconds + * + * Used for the jitdump file header timestamp. Unlike monotonic time, + * this represents actual wall clock time that can be correlated with + * other system events. + * + * Returns: Current time in microseconds since Unix epoch + */ static int64_t get_current_time_microseconds(void) { - // gettimeofday has microsecond resolution. - struct timeval tv; - if (gettimeofday(&tv, NULL) < 0) { - UNREACHABLE(); - return 0; - } - return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec; + struct timeval tv; + if (gettimeofday(&tv, NULL) < 0) { + Py_UNREACHABLE(); // Should never fail on supported systems + return 0; + } + return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec; } +// ============================================================================= +// UTILITY FUNCTIONS +// ============================================================================= +/* + * Round up a value to the next multiple of a given number + * + * This is essential for maintaining proper alignment requirements in the + * jitdump format. Many structures need to be aligned to specific boundaries + * (typically 8 or 16 bytes) for efficient processing by perf. + * + * Args: + * value: The value to round up + * multiple: The multiple to round up to + * + * Returns: The smallest value >= input that is a multiple of 'multiple' + */ static size_t round_up(int64_t value, int64_t multiple) { if (multiple == 0) { - // Avoid division by zero - return value; + return value; // Avoid division by zero } int64_t remainder = value % multiple; if (remainder == 0) { - // Value is already a multiple of 'multiple' - return value; + return value; // Already aligned } - // Calculate the difference to the next multiple + /* Calculate how much to add to reach the next multiple */ int64_t difference = multiple - remainder; - - // Add the difference to the value int64_t rounded_up_value = value + difference; return rounded_up_value; } +// ============================================================================= +// FILE I/O UTILITIES +// ============================================================================= +/* + * Write data to the jitdump file with error handling + * + * This function ensures that all data is written to the file, handling + * partial writes that can occur with large buffers or when the system + * is under load. + * + * Args: + * buffer: Pointer to data to write + * size: Number of bytes to write + */ static void perf_map_jit_write_fully(const void* buffer, size_t size) { FILE* out_file = perf_jit_map_state.perf_map; const char* ptr = (const char*)(buffer); + while (size > 0) { const size_t written = fwrite(ptr, 1, size, out_file); if (written == 0) { - UNREACHABLE(); + Py_UNREACHABLE(); // Write failure - should be very rare break; } size -= written; @@ -243,284 +356,720 @@ static void perf_map_jit_write_fully(const void* buffer, size_t size) { } } +/* + * Write the jitdump file header + * + * The header must be written exactly once at the beginning of each jitdump + * file. It provides metadata that perf uses to parse the rest of the file. + * + * Args: + * pid: Process ID to include in the header + * out_file: File handle to write to (currently unused, uses global state) + */ static void perf_map_jit_write_header(int pid, FILE* out_file) { Header header; - header.magic = 0x4A695444; - header.version = 1; - header.size = sizeof(Header); - header.elf_mach_target = GetElfMachineArchitecture(); - header.process_id = pid; - header.time_stamp = get_current_time_microseconds(); - header.flags = 0; - perf_map_jit_write_fully(&header, sizeof(header)); -} -static void* perf_map_jit_init(void) { - char filename[100]; - int pid = getpid(); - snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid); - const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); - if (fd == -1) { - return NULL; - } + /* Initialize header with required values */ + header.magic = 0x4A695444; // "JiTD" magic number + header.version = 1; // Current jitdump version + header.size = sizeof(Header); // Header size for validation + header.elf_mach_target = GetElfMachineArchitecture(); // Target architecture + header.process_id = pid; // Process identifier + header.time_stamp = get_current_time_microseconds(); // Creation time + header.flags = 0; // No special flags currently used - const long page_size = sysconf(_SC_PAGESIZE); // NOLINT(runtime/int) - if (page_size == -1) { - close(fd); - return NULL; - } - - // The perf jit interface forces us to map the first page of the file - // to signal that we are using the interface. - perf_jit_map_state.mapped_buffer = mmap(NULL, page_size, PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); - if (perf_jit_map_state.mapped_buffer == NULL) { - close(fd); - return NULL; - } - perf_jit_map_state.mapped_size = page_size; - perf_jit_map_state.perf_map = fdopen(fd, "w+"); - if (perf_jit_map_state.perf_map == NULL) { - close(fd); - return NULL; - } - setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB); - perf_map_jit_write_header(pid, perf_jit_map_state.perf_map); - - perf_jit_map_state.map_lock = PyThread_allocate_lock(); - if (perf_jit_map_state.map_lock == NULL) { - fclose(perf_jit_map_state.perf_map); - return NULL; - } - perf_jit_map_state.code_id = 0; - - trampoline_api.code_padding = PERF_JIT_CODE_PADDING; - return &perf_jit_map_state; + perf_map_jit_write_fully(&header, sizeof(header)); } -/* DWARF definitions. */ +// ============================================================================= +// DWARF CONSTANTS AND UTILITIES +// ============================================================================= + +/* + * DWARF (Debug With Arbitrary Record Formats) constants + * + * DWARF is a debugging data format used to provide stack unwinding information. + * These constants define the various encoding types and opcodes used in + * DWARF Call Frame Information (CFI) records. + */ +/* DWARF Call Frame Information version */ #define DWRF_CIE_VERSION 1 +/* DWARF CFA (Call Frame Address) opcodes */ enum { - DWRF_CFA_nop = 0x0, - DWRF_CFA_offset_extended = 0x5, - DWRF_CFA_def_cfa = 0xc, - DWRF_CFA_def_cfa_offset = 0xe, - DWRF_CFA_offset_extended_sf = 0x11, - DWRF_CFA_advance_loc = 0x40, - DWRF_CFA_offset = 0x80 + DWRF_CFA_nop = 0x0, // No operation + DWRF_CFA_offset_extended = 0x5, // Extended offset instruction + DWRF_CFA_def_cfa = 0xc, // Define CFA rule + DWRF_CFA_def_cfa_offset = 0xe, // Define CFA offset + DWRF_CFA_offset_extended_sf = 0x11, // Extended signed offset + DWRF_CFA_advance_loc = 0x40, // Advance location counter + DWRF_CFA_offset = 0x80 // Simple offset instruction }; -enum - { - DWRF_EH_PE_absptr = 0x00, - DWRF_EH_PE_omit = 0xff, - - /* FDE data encoding. */ - DWRF_EH_PE_uleb128 = 0x01, - DWRF_EH_PE_udata2 = 0x02, - DWRF_EH_PE_udata4 = 0x03, - DWRF_EH_PE_udata8 = 0x04, - DWRF_EH_PE_sleb128 = 0x09, - DWRF_EH_PE_sdata2 = 0x0a, - DWRF_EH_PE_sdata4 = 0x0b, - DWRF_EH_PE_sdata8 = 0x0c, - DWRF_EH_PE_signed = 0x08, - - /* FDE flags. */ - DWRF_EH_PE_pcrel = 0x10, - DWRF_EH_PE_textrel = 0x20, - DWRF_EH_PE_datarel = 0x30, - DWRF_EH_PE_funcrel = 0x40, - DWRF_EH_PE_aligned = 0x50, - - DWRF_EH_PE_indirect = 0x80 - }; +/* DWARF Exception Handling pointer encodings */ +enum { + DWRF_EH_PE_absptr = 0x00, // Absolute pointer + DWRF_EH_PE_omit = 0xff, // Omitted value + + /* Data type encodings */ + DWRF_EH_PE_uleb128 = 0x01, // Unsigned LEB128 + DWRF_EH_PE_udata2 = 0x02, // Unsigned 2-byte + DWRF_EH_PE_udata4 = 0x03, // Unsigned 4-byte + DWRF_EH_PE_udata8 = 0x04, // Unsigned 8-byte + DWRF_EH_PE_sleb128 = 0x09, // Signed LEB128 + DWRF_EH_PE_sdata2 = 0x0a, // Signed 2-byte + DWRF_EH_PE_sdata4 = 0x0b, // Signed 4-byte + DWRF_EH_PE_sdata8 = 0x0c, // Signed 8-byte + DWRF_EH_PE_signed = 0x08, // Signed flag + + /* Reference type encodings */ + DWRF_EH_PE_pcrel = 0x10, // PC-relative + DWRF_EH_PE_textrel = 0x20, // Text-relative + DWRF_EH_PE_datarel = 0x30, // Data-relative + DWRF_EH_PE_funcrel = 0x40, // Function-relative + DWRF_EH_PE_aligned = 0x50, // Aligned + DWRF_EH_PE_indirect = 0x80 // Indirect +}; +/* Additional DWARF constants for debug information */ enum { DWRF_TAG_compile_unit = 0x11 }; - enum { DWRF_children_no = 0, DWRF_children_yes = 1 }; +enum { + DWRF_AT_name = 0x03, // Name attribute + DWRF_AT_stmt_list = 0x10, // Statement list + DWRF_AT_low_pc = 0x11, // Low PC address + DWRF_AT_high_pc = 0x12 // High PC address +}; +enum { + DWRF_FORM_addr = 0x01, // Address form + DWRF_FORM_data4 = 0x06, // 4-byte data + DWRF_FORM_string = 0x08 // String form +}; -enum { DWRF_AT_name = 0x03, DWRF_AT_stmt_list = 0x10, DWRF_AT_low_pc = 0x11, DWRF_AT_high_pc = 0x12 }; - -enum { DWRF_FORM_addr = 0x01, DWRF_FORM_data4 = 0x06, DWRF_FORM_string = 0x08 }; - -enum { DWRF_LNS_extended_op = 0, DWRF_LNS_copy = 1, DWRF_LNS_advance_pc = 2, DWRF_LNS_advance_line = 3 }; +/* Line number program opcodes */ +enum { + DWRF_LNS_extended_op = 0, // Extended opcode + DWRF_LNS_copy = 1, // Copy operation + DWRF_LNS_advance_pc = 2, // Advance program counter + DWRF_LNS_advance_line = 3 // Advance line number +}; -enum { DWRF_LNE_end_sequence = 1, DWRF_LNE_set_address = 2 }; +/* Line number extended opcodes */ +enum { + DWRF_LNE_end_sequence = 1, // End of sequence + DWRF_LNE_set_address = 2 // Set address +}; +/* + * Architecture-specific DWARF register numbers + * + * These constants define the register numbering scheme used by DWARF + * for each supported architecture. The numbers must match the ABI + * specification for proper stack unwinding. + */ enum { #ifdef __x86_64__ - /* Yes, the order is strange, but correct. */ - DWRF_REG_AX, - DWRF_REG_DX, - DWRF_REG_CX, - DWRF_REG_BX, - DWRF_REG_SI, - DWRF_REG_DI, - DWRF_REG_BP, - DWRF_REG_SP, - DWRF_REG_8, - DWRF_REG_9, - DWRF_REG_10, - DWRF_REG_11, - DWRF_REG_12, - DWRF_REG_13, - DWRF_REG_14, - DWRF_REG_15, - DWRF_REG_RA, + /* x86_64 register numbering (note: order is defined by x86_64 ABI) */ + DWRF_REG_AX, // RAX + DWRF_REG_DX, // RDX + DWRF_REG_CX, // RCX + DWRF_REG_BX, // RBX + DWRF_REG_SI, // RSI + DWRF_REG_DI, // RDI + DWRF_REG_BP, // RBP + DWRF_REG_SP, // RSP + DWRF_REG_8, // R8 + DWRF_REG_9, // R9 + DWRF_REG_10, // R10 + DWRF_REG_11, // R11 + DWRF_REG_12, // R12 + DWRF_REG_13, // R13 + DWRF_REG_14, // R14 + DWRF_REG_15, // R15 + DWRF_REG_RA, // Return address (RIP) #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) - DWRF_REG_SP = 31, - DWRF_REG_RA = 30, + /* AArch64 register numbering */ + DWRF_REG_FP = 29, // Frame Pointer + DWRF_REG_RA = 30, // Link register (return address) + DWRF_REG_SP = 31, // Stack pointer #else # error "Unsupported target architecture" #endif }; -typedef struct ELFObjectContext -{ - uint8_t* p; /* Pointer to next address in obj.space. */ - uint8_t* startp; /* Pointer to start address in obj.space. */ - uint8_t* eh_frame_p; /* Pointer to start address in obj.space. */ - uint32_t code_size; /* Size of machine code. */ +/* DWARF encoding constants used in EH frame headers */ +static const uint8_t DwarfUData4 = 0x03; // Unsigned 4-byte data +static const uint8_t DwarfSData4 = 0x0b; // Signed 4-byte data +static const uint8_t DwarfPcRel = 0x10; // PC-relative encoding +static const uint8_t DwarfDataRel = 0x30; // Data-relative encoding + +// ============================================================================= +// ELF OBJECT CONTEXT +// ============================================================================= + +/* + * Context for building ELF/DWARF structures + * + * This structure maintains state while constructing DWARF unwind information. + * It acts as a simple buffer manager with pointers to track current position + * and important landmarks within the buffer. + */ +typedef struct ELFObjectContext { + uint8_t* p; // Current write position in buffer + uint8_t* startp; // Start of buffer (for offset calculations) + uint8_t* eh_frame_p; // Start of EH frame data (for relative offsets) + uint32_t code_size; // Size of the code being described } ELFObjectContext; -/* Append a null-terminated string. */ -static uint32_t -elfctx_append_string(ELFObjectContext* ctx, const char* str) -{ +/* + * EH Frame Header structure for DWARF unwinding + * + * This structure provides metadata about the DWARF unwinding information + * that follows. It's required by the perf jitdump format to enable proper + * stack unwinding during profiling. + */ +typedef struct { + unsigned char version; // EH frame version (always 1) + unsigned char eh_frame_ptr_enc; // Encoding of EH frame pointer + unsigned char fde_count_enc; // Encoding of FDE count + unsigned char table_enc; // Encoding of table entries + int32_t eh_frame_ptr; // Pointer to EH frame data + int32_t eh_fde_count; // Number of FDEs (Frame Description Entries) + int32_t from; // Start address of code range + int32_t to; // End address of code range +} EhFrameHeader; + +// ============================================================================= +// DWARF GENERATION UTILITIES +// ============================================================================= + +/* + * Append a null-terminated string to the ELF context buffer + * + * Args: + * ctx: ELF object context + * str: String to append (must be null-terminated) + * + * Returns: Offset from start of buffer where string was written + */ +static uint32_t elfctx_append_string(ELFObjectContext* ctx, const char* str) { uint8_t* p = ctx->p; uint32_t ofs = (uint32_t)(p - ctx->startp); + + /* Copy string including null terminator */ do { *p++ = (uint8_t)*str; } while (*str++); + ctx->p = p; return ofs; } -/* Append a SLEB128 value. */ -static void -elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v) -{ +/* + * Append a SLEB128 (Signed Little Endian Base 128) value + * + * SLEB128 is a variable-length encoding used extensively in DWARF. + * It efficiently encodes small numbers in fewer bytes. + * + * Args: + * ctx: ELF object context + * v: Signed value to encode + */ +static void elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v) { uint8_t* p = ctx->p; + + /* Encode 7 bits at a time, with continuation bit in MSB */ for (; (uint32_t)(v + 0x40) >= 0x80; v >>= 7) { - *p++ = (uint8_t)((v & 0x7f) | 0x80); + *p++ = (uint8_t)((v & 0x7f) | 0x80); // Set continuation bit } - *p++ = (uint8_t)(v & 0x7f); + *p++ = (uint8_t)(v & 0x7f); // Final byte without continuation bit + ctx->p = p; } -/* Append a ULEB128 to buffer. */ -static void -elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) -{ +/* + * Append a ULEB128 (Unsigned Little Endian Base 128) value + * + * Similar to SLEB128 but for unsigned values. + * + * Args: + * ctx: ELF object context + * v: Unsigned value to encode + */ +static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) { uint8_t* p = ctx->p; + + /* Encode 7 bits at a time, with continuation bit in MSB */ for (; v >= 0x80; v >>= 7) { - *p++ = (char)((v & 0x7f) | 0x80); + *p++ = (char)((v & 0x7f) | 0x80); // Set continuation bit } - *p++ = (char)v; + *p++ = (char)v; // Final byte without continuation bit + ctx->p = p; } -/* Shortcuts to generate DWARF structures. */ -#define DWRF_U8(x) (*p++ = (x)) -#define DWRF_I8(x) (*(int8_t*)p = (x), p++) -#define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2) -#define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4) -#define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t)) -#define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p) -#define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p) -#define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p) -#define DWRF_ALIGNNOP(s) \ - while ((uintptr_t)p & ((s)-1)) { \ - *p++ = DWRF_CFA_nop; \ +/* + * Macros for generating DWARF structures + * + * These macros provide a convenient way to write various data types + * to the DWARF buffer while automatically advancing the pointer. + */ +#define DWRF_U8(x) (*p++ = (x)) // Write unsigned 8-bit +#define DWRF_I8(x) (*(int8_t*)p = (x), p++) // Write signed 8-bit +#define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2) // Write unsigned 16-bit +#define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4) // Write unsigned 32-bit +#define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t)) // Write address +#define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p) // Write ULEB128 +#define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p) // Write SLEB128 +#define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p) // Write string + +/* Align to specified boundary with NOP instructions */ +#define DWRF_ALIGNNOP(s) \ + while ((uintptr_t)p & ((s)-1)) { \ + *p++ = DWRF_CFA_nop; \ } -#define DWRF_SECTION(name, stmt) \ - { \ - uint32_t* szp_##name = (uint32_t*)p; \ - p += 4; \ - stmt; \ - *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4); \ + +/* Write a DWARF section with automatic size calculation */ +#define DWRF_SECTION(name, stmt) \ + { \ + uint32_t* szp_##name = (uint32_t*)p; \ + p += 4; \ + stmt; \ + *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4); \ } -/* Initialize .eh_frame section. */ -static void -elf_init_ehframe(ELFObjectContext* ctx) -{ +// ============================================================================= +// DWARF EH FRAME GENERATION +// ============================================================================= + +/* + * Initialize DWARF .eh_frame section for a code region + * + * The .eh_frame section contains Call Frame Information (CFI) that describes + * how to unwind the stack at any point in the code. This is essential for + * proper profiling as it allows perf to generate accurate call graphs. + * + * The function generates two main components: + * 1. CIE (Common Information Entry) - describes calling conventions + * 2. FDE (Frame Description Entry) - describes specific function unwinding + * + * Args: + * ctx: ELF object context containing code size and buffer pointers + */ +static void elf_init_ehframe(ELFObjectContext* ctx) { uint8_t* p = ctx->p; - uint8_t* framep = p; - - /* Emit DWARF EH CIE. */ - DWRF_SECTION(CIE, DWRF_U32(0); /* Offset to CIE itself. */ - DWRF_U8(DWRF_CIE_VERSION); - DWRF_STR("zR"); /* Augmentation. */ - DWRF_UV(1); /* Code alignment factor. */ - DWRF_SV(-(int64_t)sizeof(uintptr_t)); /* Data alignment factor. */ - DWRF_U8(DWRF_REG_RA); /* Return address register. */ - DWRF_UV(1); - DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); /* Augmentation data. */ - DWRF_U8(DWRF_CFA_def_cfa); DWRF_UV(DWRF_REG_SP); DWRF_UV(sizeof(uintptr_t)); - DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); DWRF_UV(1); - DWRF_ALIGNNOP(sizeof(uintptr_t)); + uint8_t* framep = p; // Remember start of frame data + + /* + * DWARF Unwind Table for Trampoline Function + * + * This section defines DWARF Call Frame Information (CFI) using encoded macros + * like `DWRF_U8`, `DWRF_UV`, and `DWRF_SECTION` to describe how the trampoline function + * preserves and restores registers. This is used by profiling tools (e.g., `perf`) + * and debuggers for stack unwinding in JIT-compiled code. + * + * ------------------------------------------------- + * TO REGENERATE THIS TABLE FROM GCC OBJECTS: + * ------------------------------------------------- + * + * 1. Create a trampoline source file (e.g., `trampoline.c`): + * + * #include + * typedef PyObject* (*py_evaluator)(void*, void*, int); + * PyObject* trampoline(void *ts, void *f, int throwflag, py_evaluator evaluator) { + * return evaluator(ts, f, throwflag); + * } + * + * 2. Compile to an object file with frame pointer preservation: + * + * gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c + * + * 3. Extract DWARF unwind info from the object file: + * + * readelf -w trampoline.o + * + * Example output from `.eh_frame`: + * + * 00000000 CIE + * Version: 1 + * Augmentation: "zR" + * Code alignment factor: 4 + * Data alignment factor: -8 + * Return address column: 30 + * DW_CFA_def_cfa: r31 (sp) ofs 0 + * + * 00000014 FDE cie=00000000 pc=0..14 + * DW_CFA_advance_loc: 4 + * DW_CFA_def_cfa_offset: 16 + * DW_CFA_offset: r29 at cfa-16 + * DW_CFA_offset: r30 at cfa-8 + * DW_CFA_advance_loc: 12 + * DW_CFA_restore: r30 + * DW_CFA_restore: r29 + * DW_CFA_def_cfa_offset: 0 + * + * -- These values can be verified by comparing with `readelf -w` or `llvm-dwarfdump --eh-frame`. + * + * ---------------------------------- + * HOW TO TRANSLATE TO DWRF_* MACROS: + * ---------------------------------- + * + * After compiling your trampoline with: + * + * gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c + * + * run: + * + * readelf -w trampoline.o + * + * to inspect the generated `.eh_frame` data. You will see two main components: + * + * 1. A CIE (Common Information Entry): shared configuration used by all FDEs. + * 2. An FDE (Frame Description Entry): function-specific unwind instructions. + * + * --------------------- + * Translating the CIE: + * --------------------- + * From `readelf -w`, you might see: + * + * 00000000 0000000000000010 00000000 CIE + * Version: 1 + * Augmentation: "zR" + * Code alignment factor: 4 + * Data alignment factor: -8 + * Return address column: 30 + * Augmentation data: 1b + * DW_CFA_def_cfa: r31 (sp) ofs 0 + * + * Map this to: + * + * DWRF_SECTION(CIE, + * DWRF_U32(0); // CIE ID (always 0 for CIEs) + * DWRF_U8(DWRF_CIE_VERSION); // Version: 1 + * DWRF_STR("zR"); // Augmentation string "zR" + * DWRF_UV(4); // Code alignment factor = 4 + * DWRF_SV(-8); // Data alignment factor = -8 + * DWRF_U8(DWRF_REG_RA); // Return address register (e.g., x30 = 30) + * DWRF_UV(1); // Augmentation data length = 1 + * DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // Encoding for FDE pointers + * + * DWRF_U8(DWRF_CFA_def_cfa); // DW_CFA_def_cfa + * DWRF_UV(DWRF_REG_SP); // Register: SP (r31) + * DWRF_UV(0); // Offset = 0 + * + * DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer size boundary + * ) + * + * Notes: + * - Use `DWRF_UV` for unsigned LEB128, `DWRF_SV` for signed LEB128. + * - `DWRF_REG_RA` and `DWRF_REG_SP` are architecture-defined constants. + * + * --------------------- + * Translating the FDE: + * --------------------- + * From `readelf -w`: + * + * 00000014 0000000000000020 00000018 FDE cie=00000000 pc=0000000000000000..0000000000000014 + * DW_CFA_advance_loc: 4 + * DW_CFA_def_cfa_offset: 16 + * DW_CFA_offset: r29 at cfa-16 + * DW_CFA_offset: r30 at cfa-8 + * DW_CFA_advance_loc: 12 + * DW_CFA_restore: r30 + * DW_CFA_restore: r29 + * DW_CFA_def_cfa_offset: 0 + * + * Map the FDE header and instructions to: + * + * DWRF_SECTION(FDE, + * DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (relative from here) + * DWRF_U32(-0x30); // Initial PC-relative location of the code + * DWRF_U32(ctx->code_size); // Code range covered by this FDE + * DWRF_U8(0); // Augmentation data length (none) + * + * DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 unit (1 * 4 = 4 bytes) + * DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 16 + * DWRF_UV(16); + * + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Save x29 (frame pointer) + * DWRF_UV(2); // At offset 2 * 8 = 16 bytes + * + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Save x30 (return address) + * DWRF_UV(1); // At offset 1 * 8 = 8 bytes + * + * DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance location by 3 units (3 * 4 = 12 bytes) + * + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Restore x30 + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Restore x29 + * + * DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + * DWRF_UV(0); + * ) + * + * To regenerate: + * 1. Get the `code alignment factor`, `data alignment factor`, and `RA column` from the CIE. + * 2. Note the range of the function from the FDE's `pc=...` line and map it to the JIT code as + * the code is in a different address space every time. + * 3. For each `DW_CFA_*` entry, use the corresponding `DWRF_*` macro: + * - `DW_CFA_def_cfa_offset` → DWRF_U8(DWRF_CFA_def_cfa_offset), DWRF_UV(value) + * - `DW_CFA_offset: rX` → DWRF_U8(DWRF_CFA_offset | reg), DWRF_UV(offset) + * - `DW_CFA_restore: rX` → DWRF_U8(DWRF_CFA_offset | reg) // restore is same as reusing offset + * - `DW_CFA_advance_loc: N` → DWRF_U8(DWRF_CFA_advance_loc | (N / code_alignment_factor)) + * 4. Use `DWRF_REG_FP`, `DWRF_REG_RA`, etc., for register numbers. + * 5. Use `sizeof(uintptr_t)` (typically 8) for pointer size calculations and alignment. + */ + + /* + * Emit DWARF EH CIE (Common Information Entry) + * + * The CIE describes the calling conventions and basic unwinding rules + * that apply to all functions in this compilation unit. + */ + DWRF_SECTION(CIE, + DWRF_U32(0); // CIE ID (0 indicates this is a CIE) + DWRF_U8(DWRF_CIE_VERSION); // CIE version (1) + DWRF_STR("zR"); // Augmentation string ("zR" = has LSDA) + DWRF_UV(1); // Code alignment factor + DWRF_SV(-(int64_t)sizeof(uintptr_t)); // Data alignment factor (negative) + DWRF_U8(DWRF_REG_RA); // Return address register number + DWRF_UV(1); // Augmentation data length + DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // FDE pointer encoding + + /* Initial CFI instructions - describe default calling convention */ + DWRF_U8(DWRF_CFA_def_cfa); // Define CFA (Call Frame Address) + DWRF_UV(DWRF_REG_SP); // CFA = SP register + DWRF_UV(sizeof(uintptr_t)); // CFA = SP + pointer_size + DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); // Return address is saved + DWRF_UV(1); // At offset 1 from CFA + + DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary ) - ctx->eh_frame_p = p; - - /* Emit DWARF EH FDE. */ - DWRF_SECTION(FDE, DWRF_U32((uint32_t)(p - framep)); /* Offset to CIE. */ - DWRF_U32(-0x30); /* Machine code offset relative to .text. */ - DWRF_U32(ctx->code_size); /* Machine code length. */ - DWRF_U8(0); /* Augmentation data. */ - /* Registers saved in CFRAME. */ + ctx->eh_frame_p = p; // Remember start of FDE data + + /* + * Emit DWARF EH FDE (Frame Description Entry) + * + * The FDE describes unwinding information specific to this function. + * It references the CIE and provides function-specific CFI instructions. + */ + DWRF_SECTION(FDE, + DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference) + DWRF_U32(-0x30); // Machine code offset relative to .text + DWRF_U32(ctx->code_size); // Address range covered by this FDE (code lenght) + DWRF_U8(0); // Augmentation data length (none) + + /* + * Architecture-specific CFI instructions + * + * These instructions describe how registers are saved and restored + * during function calls. Each architecture has different calling + * conventions and register usage patterns. + */ #ifdef __x86_64__ - DWRF_U8(DWRF_CFA_advance_loc | 4); - DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16); - DWRF_U8(DWRF_CFA_advance_loc | 6); - DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(8); - /* Extra registers saved for JIT-compiled code. */ + /* x86_64 calling convention unwinding rules */ + DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance location by 4 bytes + DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset + DWRF_UV(16); // New offset: SP + 16 + DWRF_U8(DWRF_CFA_advance_loc | 6); // Advance location by 6 bytes + DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset + DWRF_UV(8); // New offset: SP + 8 #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) - DWRF_U8(DWRF_CFA_advance_loc | 1); - DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16); - DWRF_U8(DWRF_CFA_offset | 29); DWRF_UV(2); - DWRF_U8(DWRF_CFA_offset | 30); DWRF_UV(1); - DWRF_U8(DWRF_CFA_advance_loc | 3); - DWRF_U8(DWRF_CFA_offset | -(64 - 29)); - DWRF_U8(DWRF_CFA_offset | -(64 - 30)); - DWRF_U8(DWRF_CFA_def_cfa_offset); - DWRF_UV(0); + /* AArch64 calling convention unwinding rules */ + DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 instruction (stp x29, x30) + DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset + DWRF_UV(16); // CFA = SP + 16 (stack pointer after push) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Frame pointer (x29) saved + DWRF_UV(2); // At offset 2 from CFA (2 * 8 = 16 bytes) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Link register (x30) saved + DWRF_UV(1); // At offset 1 from CFA (1 * 8 = 8 bytes) + DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance by 3 instructions (mov x16, x3; mov x29, sp; ldp...) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Restore frame pointer (x29) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Restore link register (x30) + DWRF_U8(DWRF_CFA_def_cfa_offset); // Final CFA adjustment + DWRF_UV(0); // CFA = SP + 0 (stack restored) + #else # error "Unsupported target architecture" #endif - DWRF_ALIGNNOP(sizeof(uintptr_t));) - ctx->p = p; + DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary + ) + + ctx->p = p; // Update context pointer to end of generated data +} + +// ============================================================================= +// JITDUMP INITIALIZATION +// ============================================================================= + +/* + * Initialize the perf jitdump interface + * + * This function sets up everything needed to generate jitdump files: + * 1. Creates the jitdump file with a unique name + * 2. Maps the first page to signal perf that we're using the interface + * 3. Writes the jitdump header + * 4. Initializes synchronization primitives + * + * The memory mapping is crucial - perf detects jitdump files by scanning + * for processes that have mapped files matching the pattern /tmp/jit-*.dump + * + * Returns: Pointer to initialized state, or NULL on failure + */ +static void* perf_map_jit_init(void) { + char filename[100]; + int pid = getpid(); + + /* Create unique filename based on process ID */ + snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid); + + /* Create/open the jitdump file with appropriate permissions */ + const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); + if (fd == -1) { + return NULL; // Failed to create file + } + + /* Get system page size for memory mapping */ + const long page_size = sysconf(_SC_PAGESIZE); + if (page_size == -1) { + close(fd); + return NULL; // Failed to get page size + } + + /* + * Map the first page of the jitdump file + * + * This memory mapping serves as a signal to perf that this process + * is generating JIT code. Perf scans /proc/.../maps looking for mapped + * files that match the jitdump naming pattern. + * + * The mapping must be PROT_READ | PROT_EXEC to be detected by perf. + */ + perf_jit_map_state.mapped_buffer = mmap( + NULL, // Let kernel choose address + page_size, // Map one page + PROT_READ | PROT_EXEC, // Read and execute permissions (required by perf) + MAP_PRIVATE, // Private mapping + fd, // File descriptor + 0 // Offset 0 (first page) + ); + + if (perf_jit_map_state.mapped_buffer == NULL) { + close(fd); + return NULL; // Memory mapping failed + } + + perf_jit_map_state.mapped_size = page_size; + + /* Convert file descriptor to FILE* for easier I/O operations */ + perf_jit_map_state.perf_map = fdopen(fd, "w+"); + if (perf_jit_map_state.perf_map == NULL) { + close(fd); + return NULL; // Failed to create FILE* + } + + /* + * Set up file buffering for better performance + * + * We use a large buffer (2MB) because jitdump files can be written + * frequently during program execution. Buffering reduces system call + * overhead and improves overall performance. + */ + setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB); + + /* Write the jitdump file header */ + perf_map_jit_write_header(pid, perf_jit_map_state.perf_map); + + /* + * Initialize thread synchronization lock + * + * Multiple threads may attempt to write to the jitdump file + * simultaneously. This lock ensures thread-safe access to the + * global jitdump state. + */ + perf_jit_map_state.map_lock = PyThread_allocate_lock(); + if (perf_jit_map_state.map_lock == NULL) { + fclose(perf_jit_map_state.perf_map); + return NULL; // Failed to create lock + } + + /* Initialize code ID counter */ + perf_jit_map_state.code_id = 0; + + /* Configure trampoline API with padding information */ + trampoline_api.code_padding = PERF_JIT_CODE_PADDING; + + return &perf_jit_map_state; } +// ============================================================================= +// MAIN JITDUMP ENTRY WRITING +// ============================================================================= + +/* + * Write a complete jitdump entry for a Python function + * + * This is the main function called by Python's trampoline system whenever + * a new piece of JIT-compiled code needs to be recorded. It writes both + * the unwinding information and the code load event to the jitdump file. + * + * The function performs these steps: + * 1. Initialize jitdump system if not already done + * 2. Extract function name and filename from Python code object + * 3. Generate DWARF unwinding information + * 4. Write unwinding info event to jitdump file + * 5. Write code load event to jitdump file + * + * Args: + * state: Jitdump state (currently unused, uses global state) + * code_addr: Address where the compiled code resides + * code_size: Size of the compiled code in bytes + * co: Python code object containing metadata + * + * IMPORTANT: This function signature is part of Python's internal API + * and must not be changed without coordinating with core Python development. + */ static void perf_map_jit_write_entry(void *state, const void *code_addr, - unsigned int code_size, PyCodeObject *co) + unsigned int code_size, PyCodeObject *co) { - + /* Initialize jitdump system on first use */ if (perf_jit_map_state.perf_map == NULL) { void* ret = perf_map_jit_init(); if(ret == NULL){ - return; + return; // Initialization failed, silently abort } } + /* + * Extract function information from Python code object + * + * We create a human-readable function name by combining the qualified + * name (includes class/module context) with the filename. This helps + * developers identify functions in perf reports. + */ const char *entry = ""; if (co->co_qualname != NULL) { entry = PyUnicode_AsUTF8(co->co_qualname); } + const char *filename = ""; if (co->co_filename != NULL) { filename = PyUnicode_AsUTF8(co->co_filename); } - + /* + * Create formatted function name for perf display + * + * Format: "py:::" + * The "py::" prefix helps identify Python functions in mixed-language + * profiles (e.g., when profiling C extensions alongside Python code). + */ size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size); if (perf_map_entry == NULL) { - return; + return; // Memory allocation failed } snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename); @@ -528,90 +1077,185 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr, uword base = (uword)code_addr; uword size = code_size; - // Write the code unwinding info event. - - // Create unwinding information (eh frame) + /* + * Generate DWARF unwinding information + * + * DWARF data is essential for proper stack unwinding during profiling. + * Without it, perf cannot generate accurate call graphs, especially + * in optimized code where frame pointers may be omitted. + */ ELFObjectContext ctx; - char buffer[1024]; + char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient) ctx.code_size = code_size; ctx.startp = ctx.p = (uint8_t*)buffer; + + /* Generate EH frame (Exception Handling frame) data */ elf_init_ehframe(&ctx); int eh_frame_size = ctx.p - ctx.startp; - // Populate the unwind info event for perf + /* + * Write Code Unwinding Information Event + * + * This event must be written before the code load event to ensure + * perf has the unwinding information available when it processes + * the code region. + */ CodeUnwindingInfoEvent ev2; ev2.base.event = PerfUnwindingInfo; ev2.base.time_stamp = get_current_monotonic_ticks(); ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; - // Ensure we have enough space between DSOs when perf maps them + + /* Verify we don't exceed our padding budget */ assert(ev2.unwind_data_size <= PERF_JIT_CODE_PADDING); + ev2.eh_frame_hdr_size = sizeof(EhFrameHeader); - ev2.mapped_size = round_up(ev2.unwind_data_size, 16); + ev2.mapped_size = round_up(ev2.unwind_data_size, 16); // 16-byte alignment + + /* Calculate total event size with padding */ int content_size = sizeof(ev2) + sizeof(EhFrameHeader) + eh_frame_size; - int padding_size = round_up(content_size, 8) - content_size; + int padding_size = round_up(content_size, 8) - content_size; // 8-byte align ev2.base.size = content_size + padding_size; - perf_map_jit_write_fully(&ev2, sizeof(ev2)); + /* Write the unwinding info event header */ + perf_map_jit_write_fully(&ev2, sizeof(ev2)); - // Populate the eh Frame header + /* + * Write EH Frame Header + * + * The EH frame header provides metadata about the DWARF unwinding + * information that follows. It includes pointers and counts that + * help perf navigate the unwinding data efficiently. + */ EhFrameHeader f; f.version = 1; - f.eh_frame_ptr_enc = DwarfSData4 | DwarfPcRel; - f.fde_count_enc = DwarfUData4; - f.table_enc = DwarfSData4 | DwarfDataRel; + f.eh_frame_ptr_enc = DwarfSData4 | DwarfPcRel; // PC-relative signed 4-byte + f.fde_count_enc = DwarfUData4; // Unsigned 4-byte count + f.table_enc = DwarfSData4 | DwarfDataRel; // Data-relative signed 4-byte + + /* Calculate relative offsets for EH frame navigation */ f.eh_frame_ptr = -(eh_frame_size + 4 * sizeof(unsigned char)); - f.eh_fde_count = 1; + f.eh_fde_count = 1; // We generate exactly one FDE per function f.from = -(round_up(code_size, 8) + eh_frame_size); + int cie_size = ctx.eh_frame_p - ctx.startp; f.to = -(eh_frame_size - cie_size); + /* Write EH frame data and header */ perf_map_jit_write_fully(ctx.startp, eh_frame_size); perf_map_jit_write_fully(&f, sizeof(f)); + /* Write padding to maintain alignment */ char padding_bytes[] = "\0\0\0\0\0\0\0\0"; perf_map_jit_write_fully(&padding_bytes, padding_size); - // Write the code load event. + /* + * Write Code Load Event + * + * This event tells perf about the new code region. It includes: + * - Memory addresses and sizes + * - Process and thread identification + * - Function name for symbol resolution + * - The actual machine code bytes + */ CodeLoadEvent ev; ev.base.event = PerfLoad; ev.base.size = sizeof(ev) + (name_length+1) + size; ev.base.time_stamp = get_current_monotonic_ticks(); ev.process_id = getpid(); - ev.thread_id = syscall(SYS_gettid); - ev.vma = base; - ev.code_address = base; + ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call + ev.vma = base; // Virtual memory address + ev.code_address = base; // Same as VMA for our use case ev.code_size = size; + + /* Assign unique code ID and increment counter */ perf_jit_map_state.code_id += 1; ev.code_id = perf_jit_map_state.code_id; + /* Write code load event and associated data */ perf_map_jit_write_fully(&ev, sizeof(ev)); - perf_map_jit_write_fully(perf_map_entry, name_length+1); - perf_map_jit_write_fully((void*)(base), size); - return; + perf_map_jit_write_fully(perf_map_entry, name_length+1); // Include null terminator + perf_map_jit_write_fully((void*)(base), size); // Copy actual machine code + + /* Clean up allocated memory */ + PyMem_RawFree(perf_map_entry); } +// ============================================================================= +// CLEANUP AND FINALIZATION +// ============================================================================= + +/* + * Finalize and cleanup the perf jitdump system + * + * This function is called when Python is shutting down or when the + * perf trampoline system is being disabled. It ensures all resources + * are properly released and all buffered data is flushed to disk. + * + * Args: + * state: Jitdump state (currently unused, uses global state) + * + * Returns: 0 on success + * + * IMPORTANT: This function signature is part of Python's internal API + * and must not be changed without coordinating with core Python development. + */ static int perf_map_jit_fini(void* state) { + /* + * Close jitdump file with proper synchronization + * + * We need to acquire the lock to ensure no other threads are + * writing to the file when we close it. This prevents corruption + * and ensures all data is properly flushed. + */ if (perf_jit_map_state.perf_map != NULL) { - // close the file PyThread_acquire_lock(perf_jit_map_state.map_lock, 1); - fclose(perf_jit_map_state.perf_map); + fclose(perf_jit_map_state.perf_map); // This also flushes buffers PyThread_release_lock(perf_jit_map_state.map_lock); - // clean up the lock and state + /* Clean up synchronization primitive */ PyThread_free_lock(perf_jit_map_state.map_lock); perf_jit_map_state.perf_map = NULL; } + + /* + * Unmap the memory region + * + * This removes the signal to perf that we were generating JIT code. + * After this point, perf will no longer detect this process as + * having JIT capabilities. + */ if (perf_jit_map_state.mapped_buffer != NULL) { munmap(perf_jit_map_state.mapped_buffer, perf_jit_map_state.mapped_size); + perf_jit_map_state.mapped_buffer = NULL; } + + /* Clear global state reference */ trampoline_api.state = NULL; - return 0; + + return 0; // Success } +// ============================================================================= +// PUBLIC API EXPORT +// ============================================================================= + +/* + * Python Perf Callbacks Structure + * + * This structure defines the callback interface that Python's trampoline + * system uses to integrate with perf profiling. It contains function + * pointers for initialization, event writing, and cleanup. + * + * CRITICAL: This structure and its contents are part of Python's internal + * API. The function signatures and behavior must remain stable to maintain + * compatibility with the Python interpreter's perf integration system. + * + * Used by: Python's _PyPerf_Callbacks system in pycore_ceval.h + */ _PyPerf_Callbacks _Py_perfmap_jit_callbacks = { - &perf_map_jit_init, - &perf_map_jit_write_entry, - &perf_map_jit_fini, + &perf_map_jit_init, // Initialization function + &perf_map_jit_write_entry, // Event writing function + &perf_map_jit_fini, // Cleanup function }; -#endif +#endif /* PY_HAVE_PERF_TRAMPOLINE */ \ No newline at end of file From a32ea456992fedfc9ce61561c88056de3c18cffd Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Sun, 25 May 2025 18:09:32 -0400 Subject: [PATCH 343/989] gh-134152: Fix UnboundLocalError in email._header_value_parser _get_ptext_to_endchars (#134233) Fix an UnboundLocalError that can occur when parsing certain delimited constructs in headers (domain literals, quoted strings, comments). After the fix the _get_ptext_to_endchars returns an empty string if there is no content after the opening delimiter. The calling code is responsible for handling the lack of the trailing delimiter, which it already does; this edge case was the header ending immediately after the opening delimiter. --- Lib/email/_header_value_parser.py | 2 ++ .../test_email/test__header_value_parser.py | 33 +++++++++++++++++++ ...-05-19-10-32-11.gh-issue-134152.INJC2j.rst | 2 ++ 3 files changed, 37 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-05-19-10-32-11.gh-issue-134152.INJC2j.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 9a51b9437333db..f11fa83d45ed2d 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1020,6 +1020,8 @@ def _get_ptext_to_endchars(value, endchars): a flag that is True iff there were any quoted printables decoded. """ + if not value: + return '', '', False fragment, *remainder = _wsp_splitter(value, 1) vchars = [] escape = False diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index ac12c3b2306f7d..fd4ac2c404ce47 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -463,6 +463,19 @@ def test_get_qp_ctext_non_printables(self): [errors.NonPrintableDefect], ')') self.assertEqual(ptext.defects[0].non_printables[0], '\x00') + def test_get_qp_ctext_close_paren_only(self): + self._test_get_x(parser.get_qp_ctext, + ')', '', ' ', [], ')') + + def test_get_qp_ctext_open_paren_only(self): + self._test_get_x(parser.get_qp_ctext, + '(', '', ' ', [], '(') + + def test_get_qp_ctext_no_end_char(self): + self._test_get_x(parser.get_qp_ctext, + '', '', ' ', [], '') + + # get_qcontent def test_get_qcontent_only(self): @@ -503,6 +516,14 @@ def test_get_qcontent_non_printables(self): [errors.NonPrintableDefect], '"') self.assertEqual(ptext.defects[0].non_printables[0], '\x00') + def test_get_qcontent_empty(self): + self._test_get_x(parser.get_qcontent, + '"', '', '', [], '"') + + def test_get_qcontent_no_end_char(self): + self._test_get_x(parser.get_qcontent, + '', '', '', [], '') + # get_atext def test_get_atext_only(self): @@ -1283,6 +1304,18 @@ def test_get_dtext_open_bracket_mid_word(self): self._test_get_x(parser.get_dtext, 'foo[bar', 'foo', 'foo', [], '[bar') + def test_get_dtext_open_bracket_only(self): + self._test_get_x(parser.get_dtext, + '[', '', '', [], '[') + + def test_get_dtext_close_bracket_only(self): + self._test_get_x(parser.get_dtext, + ']', '', '', [], ']') + + def test_get_dtext_empty(self): + self._test_get_x(parser.get_dtext, + '', '', '', [], '') + # get_domain_literal def test_get_domain_literal_only(self): diff --git a/Misc/NEWS.d/next/Library/2025-05-19-10-32-11.gh-issue-134152.INJC2j.rst b/Misc/NEWS.d/next/Library/2025-05-19-10-32-11.gh-issue-134152.INJC2j.rst new file mode 100644 index 00000000000000..6da3d4147dd960 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-19-10-32-11.gh-issue-134152.INJC2j.rst @@ -0,0 +1,2 @@ +Fixed :exc:`UnboundLocalError` that could occur during :mod:`email` header +parsing if an expected trailing delimiter is missing in some contexts. From 0e3bc962c6462f836751e35ef35fa837fd952550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Simon?= Date: Mon, 26 May 2025 01:05:08 +0200 Subject: [PATCH 344/989] gh-69605: Disable PyREPL module autocomplete fallback on regular completion (gh-134181) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Loïc Simon --- Lib/_pyrepl/_module_completer.py | 4 +-- Lib/_pyrepl/readline.py | 5 ++-- Lib/test/test_pyrepl/test_pyrepl.py | 28 +++++++++++++------ ...5-05-18-14-33-23.gh-issue-69605.ZMO49F.rst | 2 ++ 4 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-18-14-33-23.gh-issue-69605.ZMO49F.rst diff --git a/Lib/_pyrepl/_module_completer.py b/Lib/_pyrepl/_module_completer.py index 494a501101a9b2..1e9462a42156d4 100644 --- a/Lib/_pyrepl/_module_completer.py +++ b/Lib/_pyrepl/_module_completer.py @@ -42,11 +42,11 @@ def __init__(self, namespace: Mapping[str, Any] | None = None) -> None: self._global_cache: list[pkgutil.ModuleInfo] = [] self._curr_sys_path: list[str] = sys.path[:] - def get_completions(self, line: str) -> list[str]: + def get_completions(self, line: str) -> list[str] | None: """Return the next possible import completions for 'line'.""" result = ImportParser(line).parse() if not result: - return [] + return None try: return self.complete(*result) except Exception: diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 572eee520e53f3..9560ae779abfea 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -134,7 +134,8 @@ def get_stem(self) -> str: return "".join(b[p + 1 : self.pos]) def get_completions(self, stem: str) -> list[str]: - if module_completions := self.get_module_completions(): + module_completions = self.get_module_completions() + if module_completions is not None: return module_completions if len(stem) == 0 and self.more_lines is not None: b = self.buffer @@ -165,7 +166,7 @@ def get_completions(self, stem: str) -> list[str]: result.sort() return result - def get_module_completions(self) -> list[str]: + def get_module_completions(self) -> list[str] | None: line = self.get_line() return self.config.module_completer.get_completions(line) diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index abb4bd1bc25fb1..aa3a592766d6d1 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -918,7 +918,14 @@ def test_func(self): class TestPyReplModuleCompleter(TestCase): def setUp(self): + import importlib + # Make iter_modules() search only the standard library. + # This makes the test more reliable in case there are + # other user packages/scripts on PYTHONPATH which can + # interfere with the completions. + lib_path = os.path.dirname(importlib.__path__[0]) self._saved_sys_path = sys.path + sys.path = [lib_path] def tearDown(self): sys.path = self._saved_sys_path @@ -932,14 +939,6 @@ def prepare_reader(self, events, namespace): return reader def test_import_completions(self): - import importlib - # Make iter_modules() search only the standard library. - # This makes the test more reliable in case there are - # other user packages/scripts on PYTHONPATH which can - # intefere with the completions. - lib_path = os.path.dirname(importlib.__path__[0]) - sys.path = [lib_path] - cases = ( ("import path\t\n", "import pathlib"), ("import importlib.\t\tres\t\n", "import importlib.resources"), @@ -1052,6 +1051,19 @@ def test_invalid_identifiers(self): output = reader.readline() self.assertEqual(output, expected) + def test_no_fallback_on_regular_completion(self): + cases = ( + ("import pri\t\n", "import pri"), + ("from pri\t\n", "from pri"), + ("from typing import Na\t\n", "from typing import Na"), + ) + for code, expected in cases: + with self.subTest(code=code): + events = code_to_events(code) + reader = self.prepare_reader(events, namespace={}) + output = reader.readline() + self.assertEqual(output, expected) + def test_get_path_and_prefix(self): cases = ( ('', ('', '')), diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-18-14-33-23.gh-issue-69605.ZMO49F.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-18-14-33-23.gh-issue-69605.ZMO49F.rst new file mode 100644 index 00000000000000..7b7275fee69b9b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-18-14-33-23.gh-issue-69605.ZMO49F.rst @@ -0,0 +1,2 @@ +When auto-completing an import in the :term:`REPL`, finding no candidates +now issues no suggestion, rather than suggestions from the current namespace. From cf8941c60356acdd00055e5583a2d64761c34af4 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 26 May 2025 05:44:33 +0300 Subject: [PATCH 345/989] gh-132876: workaround broken ldexp() on Windows 10 (#133135) * gh-132876: workaround broken ldexp() on Windows 10 ldexp() fails to round subnormal results before Windows 11, so hide their bug. Co-authored-by: Tim Peters --- Lib/test/test_math.py | 6 ++++++ ...-04-29-11-48-46.gh-issue-132876.lyTQGZ.rst | 4 ++++ Modules/mathmodule.c | 21 +++++++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-04-29-11-48-46.gh-issue-132876.lyTQGZ.rst diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 913a60bf9e04e3..d14336f8bac498 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1214,6 +1214,12 @@ def testLdexp(self): self.assertEqual(math.ldexp(NINF, n), NINF) self.assertTrue(math.isnan(math.ldexp(NAN, n))) + @requires_IEEE_754 + def testLdexp_denormal(self): + # Denormal output incorrectly rounded (truncated) + # on some Windows. + self.assertEqual(math.ldexp(6993274598585239, -1126), 1e-323) + def testLog(self): self.assertRaises(TypeError, math.log) self.assertRaises(TypeError, math.log, 1, 2, 3) diff --git a/Misc/NEWS.d/next/Library/2025-04-29-11-48-46.gh-issue-132876.lyTQGZ.rst b/Misc/NEWS.d/next/Library/2025-04-29-11-48-46.gh-issue-132876.lyTQGZ.rst new file mode 100644 index 00000000000000..cb3ca3321e3d26 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-29-11-48-46.gh-issue-132876.lyTQGZ.rst @@ -0,0 +1,4 @@ +``ldexp()`` on Windows doesn't round subnormal results before Windows 11, +but should. Python's :func:`math.ldexp` wrapper now does round them, so +results may change slightly, in rare cases of very small results, on +Windows versions before 11. diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 40abd69f0a6600..71d9c1387f5780 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2161,6 +2161,27 @@ math_ldexp_impl(PyObject *module, double x, PyObject *i) } else { errno = 0; r = ldexp(x, (int)exp); +#ifdef _MSC_VER + if (DBL_MIN > r && r > -DBL_MIN) { + /* Denormal (or zero) results can be incorrectly rounded here (rather, + truncated). Fixed in newer versions of the C runtime, included + with Windows 11. */ + int original_exp; + frexp(x, &original_exp); + if (original_exp > DBL_MIN_EXP) { + /* Shift down to the smallest normal binade. No bits lost. */ + int shift = DBL_MIN_EXP - original_exp; + x = ldexp(x, shift); + exp -= shift; + } + /* Multiplying by 2**exp finishes the job, and the HW will round as + appropriate. Note: if exp < -DBL_MANT_DIG, all of x is shifted + to be < 0.5ULP of smallest denorm, so should be thrown away. If + exp is so very negative that ldexp underflows to 0, that's fine; + no need to check in advance. */ + r = x*ldexp(1.0, (int)exp); + } +#endif if (isinf(r)) errno = ERANGE; } From fb09db1b934a51e52e46dc64d7e5e84fe69e6160 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Mon, 26 May 2025 10:51:12 +0200 Subject: [PATCH 346/989] gh-129594: Remove redundant check on varargs in `_PyArg_CheckPositional` (#129595) --- Include/internal/pycore_modsupport.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Include/internal/pycore_modsupport.h b/Include/internal/pycore_modsupport.h index 614e9f93751834..d90f42e9cd8b07 100644 --- a/Include/internal/pycore_modsupport.h +++ b/Include/internal/pycore_modsupport.h @@ -27,9 +27,8 @@ PyAPI_FUNC(int) _PyArg_NoKeywords(const char *funcname, PyObject *kwargs); // Export for 'zlib' shared extension PyAPI_FUNC(int) _PyArg_CheckPositional(const char *, Py_ssize_t, Py_ssize_t, Py_ssize_t); -#define _Py_ANY_VARARGS(n) ((n) == PY_SSIZE_T_MAX) #define _PyArg_CheckPositional(funcname, nargs, min, max) \ - ((!_Py_ANY_VARARGS(max) && (min) <= (nargs) && (nargs) <= (max)) \ + (((min) <= (nargs) && (nargs) <= (max)) \ || _PyArg_CheckPositional((funcname), (nargs), (min), (max))) extern PyObject ** _Py_VaBuildStack( From 29e81159644cf78d958e30aaef208e22a04a8b25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 May 2025 10:52:19 +0200 Subject: [PATCH 347/989] gh-134208: remove dead AC directives for `_curses.window.{chgat,getstr,instr}` (#134325) --- Modules/_cursesmodule.c | 321 ++++++++++++++++++---------------------- 1 file changed, 148 insertions(+), 173 deletions(-) diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index 5e1eccee3e4a89..55d664cebe31ec 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -1429,32 +1429,27 @@ int py_mvwdelch(WINDOW *w, int y, int x) /* chgat, added by Fabian Kreutz */ #ifdef HAVE_CURSES_WCHGAT -/*[-clinic input] -_curses.window.chgat - [ - y: int - Y-coordinate. - x: int - X-coordinate. - ] - - n: int = -1 - Number of characters. - - attr: long - Attributes for characters. - / - -Set the attributes of characters. +PyDoc_STRVAR(_curses_window_chgat__doc__, +"chgat([y, x,] [n=-1,] attr)\n" +"Set the attributes of characters.\n" +"\n" +" y\n" +" Y-coordinate.\n" +" x\n" +" X-coordinate.\n" +" n\n" +" Number of characters.\n" +" attr\n" +" Attributes for characters.\n" +"\n" +"Set the attributes of num characters at the current cursor position, or at\n" +"position (y, x) if supplied. If no value of num is given or num = -1, the\n" +"attribute will be set on all the characters to the end of the line. This\n" +"function does not move the cursor. The changed line will be touched using\n" +"the touchline() method so that the contents will be redisplayed by the next\n" +"window refresh."); -Set the attributes of num characters at the current cursor position, or at -position (y, x) if supplied. If no value of num is given or num = -1, the -attribute will be set on all the characters to the end of the line. This -function does not move the cursor. The changed line will be touched using -the touchline() method so that the contents will be redisplayed by the next -window refresh. -[-clinic start generated code]*/ static PyObject * PyCursesWindow_ChgAt(PyObject *op, PyObject *args) { @@ -1481,19 +1476,20 @@ PyCursesWindow_ChgAt(PyObject *op, PyObject *args) attr = lattr; break; case 3: - if (!PyArg_ParseTuple(args,"iil;int,int,attr", &y, &x, &lattr)) + if (!PyArg_ParseTuple(args,"iil;y,x,attr", &y, &x, &lattr)) return NULL; attr = lattr; use_xy = TRUE; break; case 4: - if (!PyArg_ParseTuple(args,"iiil;int,int,n,attr", &y, &x, &num, &lattr)) + if (!PyArg_ParseTuple(args,"iiil;y,x,n,attr", &y, &x, &num, &lattr)) return NULL; attr = lattr; use_xy = TRUE; break; default: - PyErr_SetString(PyExc_TypeError, "chgat requires 1 to 4 arguments"); + PyErr_SetString(PyExc_TypeError, + "_curses.window.chgat requires 1 to 4 arguments"); return NULL; } @@ -1807,102 +1803,102 @@ _curses_window_get_wch_impl(PyCursesWindowObject *self, int group_right_1, } #endif -/*[-clinic input] -_curses.window.getstr - - [ - y: int - Y-coordinate. - x: int - X-coordinate. - ] - n: int = 2047 - Maximal number of characters. - / +/* + * Helper function for parsing parameters from getstr() and instr(). + * This function is necessary because Argument Clinic does not know + * how to handle nested optional groups with default values inside. + * + * Return 1 on success and 0 on failure, similar to PyArg_ParseTuple(). + */ +static int +curses_clinic_parse_optional_xy_n(PyObject *args, + int *y, int *x, unsigned int *n, int *use_xy, + const char *qualname) +{ + switch (PyTuple_GET_SIZE(args)) { + case 0: { + *use_xy = 0; + return 1; + } + case 1: { + *use_xy = 0; + return PyArg_ParseTuple(args, "O&;n", + _PyLong_UnsignedInt_Converter, n); + } + case 2: { + *use_xy = 1; + return PyArg_ParseTuple(args, "ii;y,x", y, x); + } + case 3: { + *use_xy = 1; + return PyArg_ParseTuple(args, "iiO&;y,x,n", y, x, + _PyLong_UnsignedInt_Converter, n); + } + default: { + *use_xy = 0; + PyErr_Format(PyExc_TypeError, "%s requires 0 to 3 arguments", + qualname); + return 0; + } + } +} -Read a string from the user, with primitive line editing capacity. -[-clinic start generated code]*/ +PyDoc_STRVAR(_curses_window_getstr__doc__, +"getstr([[y, x,] n=2047])\n" +"Read a string from the user, with primitive line editing capacity.\n" +"\n" +" y\n" +" Y-coordinate.\n" +" x\n" +" X-coordinate.\n" +" n\n" +" Maximal number of characters."); static PyObject * -PyCursesWindow_GetStr(PyObject *op, PyObject *args) +PyCursesWindow_getstr(PyObject *op, PyObject *args) { PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); + int rtn, use_xy = 0, y = 0, x = 0; + unsigned int max_buf_size = 2048; + unsigned int n = max_buf_size - 1; + PyObject *res; - int x, y, n; - int rtn; + if (!curses_clinic_parse_optional_xy_n(args, &y, &x, &n, &use_xy, + "_curses.window.instr")) + { + return NULL; + } - /* could make the buffer size larger/dynamic */ - Py_ssize_t max_buf_size = 2048; - PyObject *result = PyBytes_FromStringAndSize(NULL, max_buf_size); - if (result == NULL) + n = Py_MIN(n, max_buf_size - 1); + res = PyBytes_FromStringAndSize(NULL, n + 1); + if (res == NULL) { return NULL; - char *buf = PyBytes_AS_STRING(result); + } + char *buf = PyBytes_AS_STRING(res); - switch (PyTuple_Size(args)) { - case 0: - Py_BEGIN_ALLOW_THREADS - rtn = wgetnstr(self->win, buf, max_buf_size - 1); - Py_END_ALLOW_THREADS - break; - case 1: - if (!PyArg_ParseTuple(args,"i;n", &n)) - goto error; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - goto error; - } - Py_BEGIN_ALLOW_THREADS - rtn = wgetnstr(self->win, buf, Py_MIN(n, max_buf_size - 1)); - Py_END_ALLOW_THREADS - break; - case 2: - if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) - goto error; + if (use_xy) { Py_BEGIN_ALLOW_THREADS #ifdef STRICT_SYSV_CURSES - rtn = wmove(self->win,y,x)==ERR ? ERR : wgetnstr(self->win, rtn, max_buf_size - 1); + rtn = wmove(self->win, y, x) == ERR + ? ERR + : wgetnstr(self->win, buf, n); #else - rtn = mvwgetnstr(self->win,y,x,buf, max_buf_size - 1); + rtn = mvwgetnstr(self->win, y, x, buf, n); #endif Py_END_ALLOW_THREADS - break; - case 3: - if (!PyArg_ParseTuple(args,"iii;y,x,n", &y, &x, &n)) - goto error; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - goto error; - } -#ifdef STRICT_SYSV_CURSES - Py_BEGIN_ALLOW_THREADS - rtn = wmove(self->win,y,x)==ERR ? ERR : - wgetnstr(self->win, rtn, Py_MIN(n, max_buf_size - 1)); - Py_END_ALLOW_THREADS -#else + } + else { Py_BEGIN_ALLOW_THREADS - rtn = mvwgetnstr(self->win, y, x, buf, Py_MIN(n, max_buf_size - 1)); + rtn = wgetnstr(self->win, buf, n); Py_END_ALLOW_THREADS -#endif - break; - default: - PyErr_SetString(PyExc_TypeError, "getstr requires 0 to 3 arguments"); - goto error; } if (rtn == ERR) { - Py_DECREF(result); + Py_DECREF(res); return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } - - if (_PyBytes_Resize(&result, strlen(buf)) < 0) { - return NULL; - } - - return result; - -error: - Py_DECREF(result); - return NULL; + _PyBytes_Resize(&res, strlen(buf)); // 'res' is set to NULL on failure + return res; } /*[clinic input] @@ -2032,87 +2028,57 @@ _curses_window_inch_impl(PyCursesWindowObject *self, int group_right_1, return rtn; } -/*[-clinic input] -_curses.window.instr - - [ - y: int - Y-coordinate. - x: int - X-coordinate. - ] - n: int = 2047 - Maximal number of characters. - / - -Return a string of characters, extracted from the window. +PyDoc_STRVAR(_curses_window_instr__doc__, +"instr([y, x,] n=2047)\n" +"Return a string of characters, extracted from the window.\n" +"\n" +" y\n" +" Y-coordinate.\n" +" x\n" +" X-coordinate.\n" +" n\n" +" Maximal number of characters.\n" +"\n" +"Return a string of characters, extracted from the window starting at the\n" +"current cursor position, or at y, x if specified. Attributes are stripped\n" +"from the characters. If n is specified, instr() returns a string at most\n" +"n characters long (exclusive of the trailing NUL)."); -Return a string of characters, extracted from the window starting at the -current cursor position, or at y, x if specified. Attributes are stripped -from the characters. If n is specified, instr() returns a string at most -n characters long (exclusive of the trailing NUL). -[-clinic start generated code]*/ static PyObject * -PyCursesWindow_InStr(PyObject *op, PyObject *args) +PyCursesWindow_instr(PyObject *op, PyObject *args) { PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); + int rtn, use_xy = 0, y = 0, x = 0; + unsigned int max_buf_size = 2048; + unsigned int n = max_buf_size - 1; + PyObject *res; - int x, y, n; - int rtn; + if (!curses_clinic_parse_optional_xy_n(args, &y, &x, &n, &use_xy, + "_curses.window.instr")) + { + return NULL; + } - /* could make the buffer size larger/dynamic */ - Py_ssize_t max_buf_size = 2048; - PyObject *result = PyBytes_FromStringAndSize(NULL, max_buf_size); - if (result == NULL) + n = Py_MIN(n, max_buf_size - 1); + res = PyBytes_FromStringAndSize(NULL, n + 1); + if (res == NULL) { return NULL; - char *buf = PyBytes_AS_STRING(result); + } + char *buf = PyBytes_AS_STRING(res); - switch (PyTuple_Size(args)) { - case 0: - rtn = winnstr(self->win, buf, max_buf_size - 1); - break; - case 1: - if (!PyArg_ParseTuple(args,"i;n", &n)) - goto error; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - goto error; - } - rtn = winnstr(self->win, buf, Py_MIN(n, max_buf_size - 1)); - break; - case 2: - if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) - goto error; - rtn = mvwinnstr(self->win, y, x, buf, max_buf_size - 1); - break; - case 3: - if (!PyArg_ParseTuple(args, "iii;y,x,n", &y, &x, &n)) - goto error; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - goto error; - } - rtn = mvwinnstr(self->win, y, x, buf, Py_MIN(n, max_buf_size - 1)); - break; - default: - PyErr_SetString(PyExc_TypeError, "instr requires 0 or 3 arguments"); - goto error; + if (use_xy) { + rtn = mvwinnstr(self->win, y, x, buf, n); + } + else { + rtn = winnstr(self->win, buf, n); } if (rtn == ERR) { - Py_DECREF(result); + Py_DECREF(res); return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } - - if (_PyBytes_Resize(&result, strlen(buf)) < 0) { - return NULL; - } - - return result; - -error: - Py_DECREF(result); - return NULL; + _PyBytes_Resize(&res, strlen(buf)); // 'res' is set to NULL on failure + return res; } /*[clinic input] @@ -2854,7 +2820,10 @@ static PyMethodDef PyCursesWindow_methods[] = { _CURSES_WINDOW_ATTRSET_METHODDEF _CURSES_WINDOW_BKGD_METHODDEF #ifdef HAVE_CURSES_WCHGAT - {"chgat", PyCursesWindow_ChgAt, METH_VARARGS}, + { + "chgat", PyCursesWindow_ChgAt, METH_VARARGS, + _curses_window_chgat__doc__ + }, #endif _CURSES_WINDOW_BKGDSET_METHODDEF _CURSES_WINDOW_BORDER_METHODDEF @@ -2877,7 +2846,10 @@ static PyMethodDef PyCursesWindow_methods[] = { _CURSES_WINDOW_GET_WCH_METHODDEF {"getmaxyx", PyCursesWindow_getmaxyx, METH_NOARGS}, {"getparyx", PyCursesWindow_getparyx, METH_NOARGS}, - {"getstr", PyCursesWindow_GetStr, METH_VARARGS}, + { + "getstr", PyCursesWindow_getstr, METH_VARARGS, + _curses_window_getstr__doc__ + }, {"getyx", PyCursesWindow_getyx, METH_NOARGS}, _CURSES_WINDOW_HLINE_METHODDEF {"idcok", PyCursesWindow_idcok, METH_VARARGS}, @@ -2891,7 +2863,10 @@ static PyMethodDef PyCursesWindow_methods[] = { {"insertln", PyCursesWindow_winsertln, METH_NOARGS}, _CURSES_WINDOW_INSNSTR_METHODDEF _CURSES_WINDOW_INSSTR_METHODDEF - {"instr", PyCursesWindow_InStr, METH_VARARGS}, + { + "instr", PyCursesWindow_instr, METH_VARARGS, + _curses_window_instr__doc__ + }, _CURSES_WINDOW_IS_LINETOUCHED_METHODDEF {"is_wintouched", PyCursesWindow_is_wintouched, METH_NOARGS}, {"keypad", PyCursesWindow_keypad, METH_VARARGS}, From 3bffada46728e51f84c41ecbb0d3722595693e63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 May 2025 10:56:31 +0200 Subject: [PATCH 348/989] gh-132710: only use stable `_uuid.generate_time_safe()` to deduce MAC address (#132901) --- Lib/test/test_uuid.py | 28 ++++++ Lib/uuid.py | 6 +- Modules/_uuidmodule.c | 40 ++++++-- configure | 209 ++++++++++++++++++++++++++++++++++++++++-- configure.ac | 99 ++++++++++++++++++-- pyconfig.h.in | 3 + 6 files changed, 357 insertions(+), 28 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 958be5408ce90a..7ddacf07a2c789 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -14,6 +14,7 @@ from test import support from test.support import import_helper +from test.support.script_helper import assert_python_ok py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) c_uuid = import_helper.import_fresh_module('uuid', fresh=['_uuid']) @@ -1217,10 +1218,37 @@ def test_cli_uuid5_ouputted_with_valid_namespace_and_name(self): class TestUUIDWithoutExtModule(BaseTestUUID, unittest.TestCase): uuid = py_uuid + @unittest.skipUnless(c_uuid, 'requires the C _uuid module') class TestUUIDWithExtModule(BaseTestUUID, unittest.TestCase): uuid = c_uuid + def check_has_stable_libuuid_extractable_node(self): + if not self.uuid._has_stable_extractable_node: + self.skipTest("libuuid cannot deduce MAC address") + + @unittest.skipUnless(os.name == 'posix', 'POSIX only') + def test_unix_getnode_from_libuuid(self): + self.check_has_stable_libuuid_extractable_node() + script = 'import uuid; print(uuid._unix_getnode())' + _, n_a, _ = assert_python_ok('-c', script) + _, n_b, _ = assert_python_ok('-c', script) + n_a, n_b = n_a.decode().strip(), n_b.decode().strip() + self.assertTrue(n_a.isdigit()) + self.assertTrue(n_b.isdigit()) + self.assertEqual(n_a, n_b) + + @unittest.skipUnless(os.name == 'nt', 'Windows only') + def test_windows_getnode_from_libuuid(self): + self.check_has_stable_libuuid_extractable_node() + script = 'import uuid; print(uuid._windll_getnode())' + _, n_a, _ = assert_python_ok('-c', script) + _, n_b, _ = assert_python_ok('-c', script) + n_a, n_b = n_a.decode().strip(), n_b.decode().strip() + self.assertTrue(n_a.isdigit()) + self.assertTrue(n_b.isdigit()) + self.assertEqual(n_a, n_b) + class BaseTestInternals: _uuid = py_uuid diff --git a/Lib/uuid.py b/Lib/uuid.py index 036ffebf67a0be..06f81a7c338372 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -633,22 +633,24 @@ def _netstat_getnode(): try: import _uuid _generate_time_safe = getattr(_uuid, "generate_time_safe", None) + _has_stable_extractable_node = _uuid.has_stable_extractable_node _UuidCreate = getattr(_uuid, "UuidCreate", None) except ImportError: _uuid = None _generate_time_safe = None + _has_stable_extractable_node = False _UuidCreate = None def _unix_getnode(): """Get the hardware address on Unix using the _uuid extension module.""" - if _generate_time_safe: + if _generate_time_safe and _has_stable_extractable_node: uuid_time, _ = _generate_time_safe() return UUID(bytes=uuid_time).node def _windll_getnode(): """Get the hardware address on Windows using the _uuid extension module.""" - if _UuidCreate: + if _UuidCreate and _has_stable_extractable_node: uuid_bytes = _UuidCreate() return UUID(bytes_le=uuid_bytes).node diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index c5e78b1510b5e3..c31a7e8fea5608 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -78,23 +78,47 @@ py_UuidCreate(PyObject *Py_UNUSED(context), return NULL; } +static int +py_windows_has_stable_node(void) +{ + UUID uuid; + RPC_STATUS res; + Py_BEGIN_ALLOW_THREADS + res = UuidCreateSequential(&uuid); + Py_END_ALLOW_THREADS + return res == RPC_S_OK; +} #endif /* MS_WINDOWS */ static int -uuid_exec(PyObject *module) { +uuid_exec(PyObject *module) +{ +#define ADD_INT(NAME, VALUE) \ + do { \ + if (PyModule_AddIntConstant(module, (NAME), (VALUE)) < 0) { \ + return -1; \ + } \ + } while (0) + assert(sizeof(uuid_t) == 16); #if defined(MS_WINDOWS) - int has_uuid_generate_time_safe = 0; + ADD_INT("has_uuid_generate_time_safe", 0); #elif defined(HAVE_UUID_GENERATE_TIME_SAFE) - int has_uuid_generate_time_safe = 1; + ADD_INT("has_uuid_generate_time_safe", 1); #else - int has_uuid_generate_time_safe = 0; + ADD_INT("has_uuid_generate_time_safe", 0); #endif - if (PyModule_AddIntConstant(module, "has_uuid_generate_time_safe", - has_uuid_generate_time_safe) < 0) { - return -1; - } + +#if defined(MS_WINDOWS) + ADD_INT("has_stable_extractable_node", py_windows_has_stable_node()); +#elif defined(HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC) + ADD_INT("has_stable_extractable_node", 1); +#else + ADD_INT("has_stable_extractable_node", 0); +#endif + +#undef ADD_INT return 0; } diff --git a/configure b/configure index abdd28fcabf769..c9ce9b4a73527d 100755 --- a/configure +++ b/configure @@ -14052,6 +14052,7 @@ fi + have_uuid=missing for ac_header in uuid.h @@ -14061,6 +14062,7 @@ if test "x$ac_cv_header_uuid_h" = xyes then : printf "%s\n" "#define HAVE_UUID_H 1" >>confdefs.h + for ac_func in uuid_create uuid_enc_be do : as_ac_var=`printf "%s\n" "ac_cv_func_$ac_func" | sed "$as_sed_sh"` @@ -14070,7 +14072,9 @@ then : cat >>confdefs.h <<_ACEOF #define `printf "%s\n" "HAVE_$ac_func" | sed "$as_sed_cpp"` 1 _ACEOF - have_uuid=yes + + have_uuid=yes + ac_cv_have_uuid_h=yes LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-""} @@ -14160,6 +14164,7 @@ if test "x$ac_cv_header_uuid_uuid_h" = xyes then : printf "%s\n" "#define HAVE_UUID_UUID_H 1" >>confdefs.h + ac_cv_have_uuid_uuid_h=yes py_check_lib_save_LIBS=$LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for uuid_generate_time in -luuid" >&5 printf %s "checking for uuid_generate_time in -luuid... " >&6; } @@ -14257,8 +14262,9 @@ fi printf "%s\n" "$ac_cv_lib_uuid_uuid_generate_time_safe" >&6; } if test "x$ac_cv_lib_uuid_uuid_generate_time_safe" = xyes then : - have_uuid=yes - printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE 1" >>confdefs.h + + have_uuid=yes + ac_cv_have_uuid_generate_time_safe=yes fi @@ -14302,6 +14308,7 @@ if test "x$ac_cv_header_uuid_uuid_h" = xyes then : printf "%s\n" "#define HAVE_UUID_UUID_H 1" >>confdefs.h + ac_cv_have_uuid_uuid_h=yes py_check_lib_save_LIBS=$LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for uuid_generate_time in -luuid" >&5 printf %s "checking for uuid_generate_time in -luuid... " >&6; } @@ -14399,8 +14406,9 @@ fi printf "%s\n" "$ac_cv_lib_uuid_uuid_generate_time_safe" >&6; } if test "x$ac_cv_lib_uuid_uuid_generate_time_safe" = xyes then : - have_uuid=yes - printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE 1" >>confdefs.h + + have_uuid=yes + ac_cv_have_uuid_generate_time_safe=yes fi @@ -14431,10 +14439,16 @@ else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } have_uuid=yes - printf "%s\n" "#define HAVE_UUID_H 1" >>confdefs.h - - printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE 1" >>confdefs.h - + ac_cv_have_uuid_generate_time_safe=yes + # The uuid.h file to include may be *or* . + # Since pkg-config --cflags uuid may return -I/usr/include/uuid, + # it's possible to write '#include ' in _uuidmodule.c, + # assuming that the compiler flags are properly updated. + # + # Ideally, we should have defined HAVE_UUID_H if and only if + # #include can be written, *without* assuming extra + # include path. + ac_cv_have_uuid_h=yes fi @@ -14455,6 +14469,7 @@ if test "x$ac_cv_func_uuid_generate_time" = xyes then : have_uuid=yes + ac_cv_have_uuid_uuid_h=yes LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-""} @@ -14465,6 +14480,24 @@ fi done +fi + +if test "x$ac_cv_have_uuid_h" = xyes +then : + printf "%s\n" "#define HAVE_UUID_H 1" >>confdefs.h + +fi +if test "x$ac_cv_have_uuid_uuid_h" = xyes +then : + printf "%s\n" "#define HAVE_UUID_UUID_H 1" >>confdefs.h + +fi +if test "x$ac_cv_have_uuid_generate_time_safe" = xyes +then : + + printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE 1" >>confdefs.h + + fi # gh-124228: While the libuuid library is available on NetBSD, it supports only UUID version 4. @@ -14480,6 +14513,164 @@ then : have_uuid=no fi +# gh-132710: The UUID node is fetched by using libuuid when possible +# and cached. While the node is constant within the same process, +# different interpreters may have different values as libuuid may +# randomize the node value if the latter cannot be deduced. +# +# Consumers may define HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC +# to indicate that libuuid is unstable and should not be relied +# upon to deduce the MAC address. + + +if test "$have_uuid" = "yes" -a "$HAVE_UUID_GENERATE_TIME_SAFE" = "1" +then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if uuid_generate_time_safe() node value is stable" >&5 +printf %s "checking if uuid_generate_time_safe() node value is stable... " >&6; } + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + # Be sure to add the extra include path if we used pkg-config + # as HAVE_UUID_H may be set even though is only reachable + # by adding extra -I flags. + # + # If the following script does not compile, we simply assume that + # libuuid is missing. + CFLAGS="$CFLAGS $LIBUUID_CFLAGS" + LIBS="$LIBS $LIBUUID_LIBS" + if test "$cross_compiling" = yes +then : + + +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include // PRIu64 + #include // uint64_t + #include // fopen(), fclose() + + #ifdef HAVE_UUID_H + #include + #else + #include + #endif + + #define ERR 1 + int main(void) { + uuid_t uuid; // unsigned char[16] + (void)uuid_generate_time_safe(uuid); + uint64_t node = 0; + for (size_t i = 0; i < 6; i++) { + node |= (uint64_t)uuid[15 - i] << (8 * i); + } + FILE *fp = fopen("conftest.out", "w"); + if (fp == NULL) { + return ERR; + } + int rc = fprintf(fp, "%" PRIu64 "\n", node) >= 0; + rc |= fclose(fp); + return rc == 0 ? 0 : ERR; + } +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + + py_cv_uuid_node1=`cat conftest.out` + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + # Be sure to add the extra include path if we used pkg-config + # as HAVE_UUID_H may be set even though is only reachable + # by adding extra -I flags. + # + # If the following script does not compile, we simply assume that + # libuuid is missing. + CFLAGS="$CFLAGS $LIBUUID_CFLAGS" + LIBS="$LIBS $LIBUUID_LIBS" + if test "$cross_compiling" = yes +then : + + +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include // PRIu64 + #include // uint64_t + #include // fopen(), fclose() + + #ifdef HAVE_UUID_H + #include + #else + #include + #endif + + #define ERR 1 + int main(void) { + uuid_t uuid; // unsigned char[16] + (void)uuid_generate_time_safe(uuid); + uint64_t node = 0; + for (size_t i = 0; i < 6; i++) { + node |= (uint64_t)uuid[15 - i] << (8 * i); + } + FILE *fp = fopen("conftest.out", "w"); + if (fp == NULL) { + return ERR; + } + int rc = fprintf(fp, "%" PRIu64 "\n", node) >= 0; + rc |= fclose(fp); + return rc == 0 ? 0 : ERR; + } +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + + py_cv_uuid_node2=`cat conftest.out` + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + + if test -n "$py_cv_uuid_node1" -a "$py_cv_uuid_node1" = "$py_cv_uuid_node2" + then + printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: stable" >&5 +printf "%s\n" "stable" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unstable" >&5 +printf "%s\n" "unstable" >&6; } + fi +fi + # 'Real Time' functions on Solaris # posix4 on Solaris 2.6 # pthread (first!) on Linux diff --git a/configure.ac b/configure.ac index 8d939f075058bf..eb52365d95be21 100644 --- a/configure.ac +++ b/configure.ac @@ -3740,15 +3740,17 @@ dnl check for uuid dependencies AH_TEMPLATE([HAVE_UUID_H], [Define to 1 if you have the header file.]) AH_TEMPLATE([HAVE_UUID_UUID_H], [Define to 1 if you have the header file.]) AH_TEMPLATE([HAVE_UUID_GENERATE_TIME_SAFE], [Define if uuid_generate_time_safe() exists.]) +AH_TEMPLATE([HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC], [Define if uuid_generate_time_safe() is able to deduce a MAC address.]) have_uuid=missing dnl AIX provides support for RFC4122 (uuid) in libc.a starting with AIX 6.1 dnl (anno 2007). FreeBSD and OpenBSD provides support in libc as well. dnl Little-endian FreeBSD, OpenBSD and NetBSD needs encoding into an octet dnl stream in big-endian byte-order -AC_CHECK_HEADERS([uuid.h], - [AC_CHECK_FUNCS([uuid_create uuid_enc_be], - [have_uuid=yes +AC_CHECK_HEADERS([uuid.h], [ + AC_CHECK_FUNCS([uuid_create uuid_enc_be], [ + have_uuid=yes + ac_cv_have_uuid_h=yes LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-""} ]) @@ -3758,19 +3760,29 @@ AS_VAR_IF([have_uuid], [missing], [ PKG_CHECK_MODULES( [LIBUUID], [uuid >= 2.20], [dnl linux-util's libuuid has uuid_generate_time_safe() since v2.20 (2011) - dnl and provides . + dnl and provides assuming specific include paths are given have_uuid=yes - AC_DEFINE([HAVE_UUID_H], [1]) - AC_DEFINE([HAVE_UUID_GENERATE_TIME_SAFE], [1]) + ac_cv_have_uuid_generate_time_safe=yes + # The uuid.h file to include may be *or* . + # Since pkg-config --cflags uuid may return -I/usr/include/uuid, + # it's possible to write '#include ' in _uuidmodule.c, + # assuming that the compiler flags are properly updated. + # + # Ideally, we should have defined HAVE_UUID_H if and only if + # #include can be written, *without* assuming extra + # include path. + ac_cv_have_uuid_h=yes ], [ WITH_SAVE_ENV([ CPPFLAGS="$CPPFLAGS $LIBUUID_CFLAGS" LIBS="$LIBS $LIBUUID_LIBS" AC_CHECK_HEADERS([uuid/uuid.h], [ + ac_cv_have_uuid_uuid_h=yes PY_CHECK_LIB([uuid], [uuid_generate_time], [have_uuid=yes]) - PY_CHECK_LIB([uuid], [uuid_generate_time_safe], - [have_uuid=yes - AC_DEFINE([HAVE_UUID_GENERATE_TIME_SAFE], [1]) ]) ]) + PY_CHECK_LIB([uuid], [uuid_generate_time_safe], [ + have_uuid=yes + ac_cv_have_uuid_generate_time_safe=yes + ])]) AS_VAR_IF([have_uuid], [yes], [ LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-"-luuid"} @@ -3785,12 +3797,19 @@ AS_VAR_IF([have_uuid], [missing], [ AC_CHECK_HEADERS([uuid/uuid.h], [ AC_CHECK_FUNC([uuid_generate_time], [ have_uuid=yes + ac_cv_have_uuid_uuid_h=yes LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-""} ]) ]) ]) +AS_VAR_IF([ac_cv_have_uuid_h], [yes], [AC_DEFINE([HAVE_UUID_H], [1])]) +AS_VAR_IF([ac_cv_have_uuid_uuid_h], [yes], [AC_DEFINE([HAVE_UUID_UUID_H], [1])]) +AS_VAR_IF([ac_cv_have_uuid_generate_time_safe], [yes], [ + AC_DEFINE([HAVE_UUID_GENERATE_TIME_SAFE], [1]) +]) + # gh-124228: While the libuuid library is available on NetBSD, it supports only UUID version 4. # This restriction inhibits the proper generation of time-based UUIDs. if test "$ac_sys_system" = "NetBSD"; then @@ -3800,6 +3819,68 @@ fi AS_VAR_IF([have_uuid], [missing], [have_uuid=no]) +# gh-132710: The UUID node is fetched by using libuuid when possible +# and cached. While the node is constant within the same process, +# different interpreters may have different values as libuuid may +# randomize the node value if the latter cannot be deduced. +# +# Consumers may define HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC +# to indicate that libuuid is unstable and should not be relied +# upon to deduce the MAC address. +AC_DEFUN([PY_EXTRACT_UUID_GENERATE_TIME_SAFE_MAC], [WITH_SAVE_ENV([ + # Be sure to add the extra include path if we used pkg-config + # as HAVE_UUID_H may be set even though is only reachable + # by adding extra -I flags. + # + # If the following script does not compile, we simply assume that + # libuuid is missing. + CFLAGS="$CFLAGS $LIBUUID_CFLAGS" + LIBS="$LIBS $LIBUUID_LIBS" + AC_RUN_IFELSE([AC_LANG_SOURCE([[ + #include // PRIu64 + #include // uint64_t + #include // fopen(), fclose() + + #ifdef HAVE_UUID_H + #include + #else + #include + #endif + + #define ERR 1 + int main(void) { + uuid_t uuid; // unsigned char[16] + (void)uuid_generate_time_safe(uuid); + uint64_t node = 0; + for (size_t i = 0; i < 6; i++) { + node |= (uint64_t)uuid[15 - i] << (8 * i); + } + FILE *fp = fopen("conftest.out", "w"); + if (fp == NULL) { + return ERR; + } + int rc = fprintf(fp, "%" PRIu64 "\n", node) >= 0; + rc |= fclose(fp); + return rc == 0 ? 0 : ERR; + }]])], [ + AS_VAR_SET([$1], [`cat conftest.out`]) + ], [], [] + )])]) + +if test "$have_uuid" = "yes" -a "$HAVE_UUID_GENERATE_TIME_SAFE" = "1" +then + AC_MSG_CHECKING([if uuid_generate_time_safe() node value is stable]) + PY_EXTRACT_UUID_GENERATE_TIME_SAFE_MAC([py_cv_uuid_node1]) + PY_EXTRACT_UUID_GENERATE_TIME_SAFE_MAC([py_cv_uuid_node2]) + if test -n "$py_cv_uuid_node1" -a "$py_cv_uuid_node1" = "$py_cv_uuid_node2" + then + AC_DEFINE([HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC], [1]) + AC_MSG_RESULT([stable]) + else + AC_MSG_RESULT([unstable]) + fi +fi + # 'Real Time' functions on Solaris # posix4 on Solaris 2.6 # pthread (first!) on Linux diff --git a/pyconfig.h.in b/pyconfig.h.in index c91facbedf94e5..3dbbda157df70e 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1584,6 +1584,9 @@ /* Define if uuid_generate_time_safe() exists. */ #undef HAVE_UUID_GENERATE_TIME_SAFE +/* Define if uuid_generate_time_safe() is able to deduce a MAC address. */ +#undef HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC + /* Define to 1 if you have the header file. */ #undef HAVE_UUID_H From 71290a6fbe79cef3773a3af6fcf08d8bf15c8460 Mon Sep 17 00:00:00 2001 From: tmlnv <108088921+tmlnv@users.noreply.github.com> Date: Mon, 26 May 2025 12:39:04 +0300 Subject: [PATCH 349/989] gh-134559: Add versionadded for ``object.__replace__()`` (#134672) --- Doc/library/copy.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/library/copy.rst b/Doc/library/copy.rst index 95b41f988a035b..210ad7188003e6 100644 --- a/Doc/library/copy.rst +++ b/Doc/library/copy.rst @@ -122,6 +122,8 @@ and only supports named tuples created by :func:`~collections.namedtuple`, This method should create a new object of the same type, replacing fields with values from *changes*. + .. versionadded:: 3.13 + .. seealso:: From cb8045e86c4fadfd847d614193f2b38ec03933b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 May 2025 12:12:32 +0200 Subject: [PATCH 350/989] gh-134531: cleanup `_hashopenssl.c` to support `EVP_MAC` (#134626) Rename components related to `_hashlib.{HASH,HASHXOF}` objects. - The `EVPobject` structure is renamed `HASHobject`. - Non-clinic `HASH` methods are now prefixed by `_hashlib_HASH_*`. A similar change is made for non-clinic `HASHXOF` methods. - Functions extracting information from `EVP_MD` objects and functions constructing `EVP_MD` objects now include `openssl_evp_md` in their name. This change allows us to avoid future ambiguities between the `EVP_MD` and the `EVP_MAC` APIs (currently, we only use `EVP_MD` for hash functions and rely on the legacy interface for HMAC instead of using `EVP_MAC`). --- ...-05-26-11-01-54.gh-issue-134531.my1Fzt.rst | 2 + Modules/_hashopenssl.c | 347 +++++++++--------- Modules/clinic/_hashopenssl.c.h | 116 +++--- 3 files changed, 248 insertions(+), 217 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-26-11-01-54.gh-issue-134531.my1Fzt.rst diff --git a/Misc/NEWS.d/next/Library/2025-05-26-11-01-54.gh-issue-134531.my1Fzt.rst b/Misc/NEWS.d/next/Library/2025-05-26-11-01-54.gh-issue-134531.my1Fzt.rst new file mode 100644 index 00000000000000..ee5690df5c4193 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-26-11-01-54.gh-issue-134531.my1Fzt.rst @@ -0,0 +1,2 @@ +:mod:`!_hashlib`: Rename internal C functions for :class:`!_hashlib.HASH` +and :class:`!_hashlib.HASHXOF` objects. Patch by Bénédikt Tran. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 48eed5eac975ed..dab0bb9b67fa00 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -38,6 +38,10 @@ #include +#if OPENSSL_VERSION_NUMBER >= 0x30000000L +# define Py_HAS_OPENSSL3_SUPPORT +#endif + #ifndef OPENSSL_THREADS # error "OPENSSL_THREADS is not defined, Python requires thread-safe OpenSSL" #endif @@ -55,7 +59,7 @@ #define PY_OPENSSL_HAS_BLAKE2 1 #endif -#if OPENSSL_VERSION_NUMBER >= 0x30000000L +#ifdef Py_HAS_OPENSSL3_SUPPORT #define PY_EVP_MD EVP_MD #define PY_EVP_MD_fetch(algorithm, properties) EVP_MD_fetch(NULL, algorithm, properties) #define PY_EVP_MD_up_ref(md) EVP_MD_up_ref(md) @@ -77,12 +81,12 @@ * py_alias as keys. */ -enum Py_hash_type { - Py_ht_evp, // usedforsecurity=True / default - Py_ht_evp_nosecurity, // usedforsecurity=False - Py_ht_mac, // HMAC - Py_ht_pbkdf2, // PKBDF2 -}; +typedef enum Py_hash_type { + Py_ht_evp, // usedforsecurity=True / default + Py_ht_evp_nosecurity, // usedforsecurity=False + Py_ht_mac, // HMAC + Py_ht_pbkdf2, // PKBDF2 +} Py_hash_type; typedef struct { const char *py_name; @@ -255,10 +259,10 @@ py_hashentry_table_new(void) { static PyModuleDef _hashlibmodule; typedef struct { - PyTypeObject *EVPtype; + PyTypeObject *HASH_type; // based on EVP_MD PyTypeObject *HMACtype; #ifdef PY_OPENSSL_HAS_SHAKE - PyTypeObject *EVPXOFtype; + PyTypeObject *HASHXOF_type; // based on EVP_MD #endif PyObject *constructs; PyObject *unsupported_digestmod_error; @@ -275,13 +279,13 @@ get_hashlib_state(PyObject *module) typedef struct { PyObject_HEAD - EVP_MD_CTX *ctx; /* OpenSSL message digest context */ + EVP_MD_CTX *ctx; /* OpenSSL message digest context */ // Prevents undefined behavior via multiple threads entering the C API. bool use_mutex; - PyMutex mutex; /* OpenSSL context lock */ -} EVPobject; + PyMutex mutex; /* OpenSSL context lock */ +} HASHobject; -#define EVPobject_CAST(op) ((EVPobject *)(op)) +#define HASHobject_CAST(op) ((HASHobject *)(op)) typedef struct { PyObject_HEAD @@ -296,11 +300,11 @@ typedef struct { #include "clinic/_hashopenssl.c.h" /*[clinic input] module _hashlib -class _hashlib.HASH "EVPobject *" "((_hashlibstate *)PyModule_GetState(module))->EVPtype" -class _hashlib.HASHXOF "EVPobject *" "((_hashlibstate *)PyModule_GetState(module))->EVPXOFtype" +class _hashlib.HASH "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->EVPtype" +class _hashlib.HASHXOF "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->EVPXOFtype" class _hashlib.HMAC "HMACobject *" "((_hashlibstate *)PyModule_GetState(module))->HMACtype" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7df1bcf6f75cb8ef]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4f6b8873ed13d1ff]*/ /* LCOV_EXCL_START */ @@ -364,8 +368,8 @@ notify_ssl_error_occurred(void) } /* LCOV_EXCL_STOP */ -static PyObject* -py_digest_name(const EVP_MD *md) +static const char * +get_openssl_evp_md_utf8name(const EVP_MD *md) { assert(md != NULL); int nid = EVP_MD_nid(md); @@ -388,13 +392,20 @@ py_digest_name(const EVP_MD *md) if (name == NULL) name = OBJ_nid2sn(nid); } + return name; +} +static PyObject * +get_openssl_evp_md_name(const EVP_MD *md) +{ + const char *name = get_openssl_evp_md_utf8name(md); return PyUnicode_FromString(name); } /* Get EVP_MD by HID and purpose */ -static PY_EVP_MD* -py_digest_by_name(PyObject *module, const char *name, enum Py_hash_type py_ht) +static PY_EVP_MD * +get_openssl_evp_md_by_utf8name(PyObject *module, const char *name, + Py_hash_type py_ht) { PY_EVP_MD *digest = NULL; PY_EVP_MD *other_digest = NULL; @@ -460,15 +471,17 @@ py_digest_by_name(PyObject *module, const char *name, enum Py_hash_type py_ht) return digest; } -/* Get digest EVP from object +/* Get digest EVP_MD from object * * * string * * _hashopenssl builtin function * * on error returns NULL with exception set. */ -static PY_EVP_MD* -py_digest_by_digestmod(PyObject *module, PyObject *digestmod, enum Py_hash_type py_ht) { +static PY_EVP_MD * +get_openssl_evp_md(PyObject *module, PyObject *digestmod, + Py_hash_type py_ht) +{ PyObject *name_obj = NULL; const char *name; @@ -494,13 +507,13 @@ py_digest_by_digestmod(PyObject *module, PyObject *digestmod, enum Py_hash_type return NULL; } - return py_digest_by_name(module, name, py_ht); + return get_openssl_evp_md_by_utf8name(module, name, py_ht); } -static EVPobject * -newEVPobject(PyTypeObject *type) +static HASHobject * +new_hash_object(PyTypeObject *type) { - EVPobject *retval = PyObject_New(EVPobject, type); + HASHobject *retval = PyObject_New(HASHobject, type); if (retval == NULL) { return NULL; } @@ -517,7 +530,7 @@ newEVPobject(PyTypeObject *type) } static int -EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len) +_hashlib_HASH_hash(HASHobject *self, const void *vp, Py_ssize_t len) { unsigned int process; const unsigned char *cp = (const unsigned char *)vp; @@ -539,9 +552,9 @@ EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len) /* Internal methods for a hash object */ static void -EVP_dealloc(PyObject *op) +_hashlib_HASH_dealloc(PyObject *op) { - EVPobject *self = EVPobject_CAST(op); + HASHobject *self = HASHobject_CAST(op); PyTypeObject *tp = Py_TYPE(self); EVP_MD_CTX_free(self->ctx); PyObject_Free(self); @@ -549,7 +562,7 @@ EVP_dealloc(PyObject *op) } static int -locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self) +_hashlib_HASH_copy_locked(HASHobject *self, EVP_MD_CTX *new_ctx_p) { int result; ENTER_HASHLIB(self); @@ -561,21 +574,21 @@ locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self) /* External methods for a hash object */ /*[clinic input] -_hashlib.HASH.copy as EVP_copy +_hashlib.HASH.copy Return a copy of the hash object. [clinic start generated code]*/ static PyObject * -EVP_copy_impl(EVPobject *self) -/*[clinic end generated code: output=b370c21cdb8ca0b4 input=31455b6a3e638069]*/ +_hashlib_HASH_copy_impl(HASHobject *self) +/*[clinic end generated code: output=2545541af18d53d7 input=814b19202cd08a26]*/ { - EVPobject *newobj; + HASHobject *newobj; - if ((newobj = newEVPobject(Py_TYPE(self))) == NULL) + if ((newobj = new_hash_object(Py_TYPE(self))) == NULL) return NULL; - if (!locked_EVP_MD_CTX_copy(newobj->ctx, self)) { + if (!_hashlib_HASH_copy_locked(self, newobj->ctx)) { Py_DECREF(newobj); notify_ssl_error_occurred(); return NULL; @@ -584,14 +597,14 @@ EVP_copy_impl(EVPobject *self) } /*[clinic input] -_hashlib.HASH.digest as EVP_digest +_hashlib.HASH.digest Return the digest value as a bytes object. [clinic start generated code]*/ static PyObject * -EVP_digest_impl(EVPobject *self) -/*[clinic end generated code: output=0f6a3a0da46dc12d input=03561809a419bf00]*/ +_hashlib_HASH_digest_impl(HASHobject *self) +/*[clinic end generated code: output=3fc6f9671d712850 input=d8d528d6e50af0de]*/ { unsigned char digest[EVP_MAX_MD_SIZE]; EVP_MD_CTX *temp_ctx; @@ -604,7 +617,7 @@ EVP_digest_impl(EVPobject *self) return NULL; } - if (!locked_EVP_MD_CTX_copy(temp_ctx, self)) { + if (!_hashlib_HASH_copy_locked(self, temp_ctx)) { goto error; } digest_size = EVP_MD_CTX_size(temp_ctx); @@ -623,14 +636,14 @@ EVP_digest_impl(EVPobject *self) } /*[clinic input] -_hashlib.HASH.hexdigest as EVP_hexdigest +_hashlib.HASH.hexdigest Return the digest value as a string of hexadecimal digits. [clinic start generated code]*/ static PyObject * -EVP_hexdigest_impl(EVPobject *self) -/*[clinic end generated code: output=18e6decbaf197296 input=aff9cf0e4c741a9a]*/ +_hashlib_HASH_hexdigest_impl(HASHobject *self) +/*[clinic end generated code: output=1b8e60d9711e7f4d input=ae7553f78f8372d8]*/ { unsigned char digest[EVP_MAX_MD_SIZE]; EVP_MD_CTX *temp_ctx; @@ -643,7 +656,7 @@ EVP_hexdigest_impl(EVPobject *self) } /* Get the raw (binary) digest value */ - if (!locked_EVP_MD_CTX_copy(temp_ctx, self)) { + if (!_hashlib_HASH_copy_locked(self, temp_ctx)) { goto error; } digest_size = EVP_MD_CTX_size(temp_ctx); @@ -662,7 +675,7 @@ EVP_hexdigest_impl(EVPobject *self) } /*[clinic input] -_hashlib.HASH.update as EVP_update +_hashlib.HASH.update obj: object / @@ -671,8 +684,8 @@ Update this hash object's state with the provided string. [clinic start generated code]*/ static PyObject * -EVP_update_impl(EVPobject *self, PyObject *obj) -/*[clinic end generated code: output=d56f91c68348f95f input=9b30ec848f015501]*/ +_hashlib_HASH_update_impl(HASHobject *self, PyObject *obj) +/*[clinic end generated code: output=62ad989754946b86 input=aa1ce20e3f92ceb6]*/ { int result; Py_buffer view; @@ -685,11 +698,11 @@ EVP_update_impl(EVPobject *self, PyObject *obj) if (self->use_mutex) { Py_BEGIN_ALLOW_THREADS PyMutex_Lock(&self->mutex); - result = EVP_hash(self, view.buf, view.len); + result = _hashlib_HASH_hash(self, view.buf, view.len); PyMutex_Unlock(&self->mutex); Py_END_ALLOW_THREADS } else { - result = EVP_hash(self, view.buf, view.len); + result = _hashlib_HASH_hash(self, view.buf, view.len); } PyBuffer_Release(&view); @@ -699,54 +712,54 @@ EVP_update_impl(EVPobject *self, PyObject *obj) Py_RETURN_NONE; } -static PyMethodDef EVP_methods[] = { - EVP_UPDATE_METHODDEF - EVP_DIGEST_METHODDEF - EVP_HEXDIGEST_METHODDEF - EVP_COPY_METHODDEF +static PyMethodDef HASH_methods[] = { + _HASHLIB_HASH_COPY_METHODDEF + _HASHLIB_HASH_DIGEST_METHODDEF + _HASHLIB_HASH_HEXDIGEST_METHODDEF + _HASHLIB_HASH_UPDATE_METHODDEF {NULL, NULL} /* sentinel */ }; static PyObject * -EVP_get_block_size(PyObject *op, void *Py_UNUSED(closure)) +_hashlib_HASH_get_blocksize(PyObject *op, void *Py_UNUSED(closure)) { - EVPobject *self = EVPobject_CAST(op); + HASHobject *self = HASHobject_CAST(op); long block_size = EVP_MD_CTX_block_size(self->ctx); return PyLong_FromLong(block_size); } static PyObject * -EVP_get_digest_size(PyObject *op, void *Py_UNUSED(closure)) +_hashlib_HASH_get_digestsize(PyObject *op, void *Py_UNUSED(closure)) { - EVPobject *self = EVPobject_CAST(op); + HASHobject *self = HASHobject_CAST(op); long size = EVP_MD_CTX_size(self->ctx); return PyLong_FromLong(size); } static PyObject * -EVP_get_name(PyObject *op, void *Py_UNUSED(closure)) +_hashlib_HASH_get_name(PyObject *op, void *Py_UNUSED(closure)) { - EVPobject *self = EVPobject_CAST(op); + HASHobject *self = HASHobject_CAST(op); const EVP_MD *md = EVP_MD_CTX_md(self->ctx); if (md == NULL) { notify_ssl_error_occurred(); return NULL; } - return py_digest_name(md); + return get_openssl_evp_md_name(md); } -static PyGetSetDef EVP_getseters[] = { - {"digest_size", EVP_get_digest_size, NULL, NULL, NULL}, - {"block_size", EVP_get_block_size, NULL, NULL, NULL}, - {"name", EVP_get_name, NULL, NULL, PyDoc_STR("algorithm name.")}, +static PyGetSetDef HASH_getsets[] = { + {"digest_size", _hashlib_HASH_get_digestsize, NULL, NULL, NULL}, + {"block_size", _hashlib_HASH_get_blocksize, NULL, NULL, NULL}, + {"name", _hashlib_HASH_get_name, NULL, NULL, PyDoc_STR("algorithm name.")}, {NULL} /* Sentinel */ }; static PyObject * -EVP_repr(PyObject *self) +_hashlib_HASH_repr(PyObject *self) { - PyObject *name = EVP_get_name(self, NULL); + PyObject *name = _hashlib_HASH_get_name(self, NULL); if (name == NULL) { return NULL; } @@ -756,7 +769,7 @@ EVP_repr(PyObject *self) return repr; } -PyDoc_STRVAR(hashtype_doc, +PyDoc_STRVAR(HASHobject_type_doc, "HASH(name, string=b\'\')\n" "--\n" "\n" @@ -774,27 +787,31 @@ PyDoc_STRVAR(hashtype_doc, "name -- the hash algorithm being used by this object\n" "digest_size -- number of bytes in this hashes output"); -static PyType_Slot EVPtype_slots[] = { - {Py_tp_dealloc, EVP_dealloc}, - {Py_tp_repr, EVP_repr}, - {Py_tp_doc, (char *)hashtype_doc}, - {Py_tp_methods, EVP_methods}, - {Py_tp_getset, EVP_getseters}, +static PyType_Slot HASHobject_type_slots[] = { + {Py_tp_dealloc, _hashlib_HASH_dealloc}, + {Py_tp_repr, _hashlib_HASH_repr}, + {Py_tp_doc, (char *)HASHobject_type_doc}, + {Py_tp_methods, HASH_methods}, + {Py_tp_getset, HASH_getsets}, {0, 0}, }; -static PyType_Spec EVPtype_spec = { - "_hashlib.HASH", /*tp_name*/ - sizeof(EVPobject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE, - EVPtype_slots +static PyType_Spec HASHobject_type_spec = { + .name = "_hashlib.HASH", + .basicsize = sizeof(HASHobject), + .flags = ( + Py_TPFLAGS_DEFAULT + | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_DISALLOW_INSTANTIATION + | Py_TPFLAGS_IMMUTABLETYPE + ), + .slots = HASHobject_type_slots }; #ifdef PY_OPENSSL_HAS_SHAKE /*[clinic input] -_hashlib.HASHXOF.digest as EVPXOF_digest +_hashlib.HASHXOF.digest length: Py_ssize_t @@ -802,8 +819,8 @@ Return the digest value as a bytes object. [clinic start generated code]*/ static PyObject * -EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length) -/*[clinic end generated code: output=ef9320c23280efad input=816a6537cea3d1db]*/ +_hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length) +/*[clinic end generated code: output=dcb09335dd2fe908 input=3eb034ce03c55b21]*/ { EVP_MD_CTX *temp_ctx; PyObject *retval = PyBytes_FromStringAndSize(NULL, length); @@ -819,7 +836,7 @@ EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length) return NULL; } - if (!locked_EVP_MD_CTX_copy(temp_ctx, self)) { + if (!_hashlib_HASH_copy_locked(self, temp_ctx)) { goto error; } if (!EVP_DigestFinalXOF(temp_ctx, @@ -840,7 +857,7 @@ EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length) } /*[clinic input] -_hashlib.HASHXOF.hexdigest as EVPXOF_hexdigest +_hashlib.HASHXOF.hexdigest length: Py_ssize_t @@ -848,8 +865,8 @@ Return the digest value as a string of hexadecimal digits. [clinic start generated code]*/ static PyObject * -EVPXOF_hexdigest_impl(EVPobject *self, Py_ssize_t length) -/*[clinic end generated code: output=eb3e6ee7788bf5b2 input=5f9d6a8f269e34df]*/ +_hashlib_HASHXOF_hexdigest_impl(HASHobject *self, Py_ssize_t length) +/*[clinic end generated code: output=519431cafa014f39 input=0e58f7238adb7ab8]*/ { unsigned char *digest; EVP_MD_CTX *temp_ctx; @@ -869,7 +886,7 @@ EVPXOF_hexdigest_impl(EVPobject *self, Py_ssize_t length) } /* Get the raw (binary) digest value */ - if (!locked_EVP_MD_CTX_copy(temp_ctx, self)) { + if (!_hashlib_HASH_copy_locked(self, temp_ctx)) { goto error; } if (!EVP_DigestFinalXOF(temp_ctx, digest, length)) { @@ -889,25 +906,26 @@ EVPXOF_hexdigest_impl(EVPobject *self, Py_ssize_t length) return NULL; } -static PyMethodDef EVPXOF_methods[] = { - EVPXOF_DIGEST_METHODDEF - EVPXOF_HEXDIGEST_METHODDEF +static PyMethodDef HASHXOFobject_methods[] = { + _HASHLIB_HASHXOF_DIGEST_METHODDEF + _HASHLIB_HASHXOF_HEXDIGEST_METHODDEF {NULL, NULL} /* sentinel */ }; static PyObject * -EVPXOF_get_digest_size(PyObject *Py_UNUSED(self), void *Py_UNUSED(closure)) +_hashlib_HASHXOF_digest_size(PyObject *Py_UNUSED(self), + void *Py_UNUSED(closure)) { return PyLong_FromLong(0); } -static PyGetSetDef EVPXOF_getseters[] = { - {"digest_size", EVPXOF_get_digest_size, NULL, NULL, NULL}, +static PyGetSetDef HASHXOFobject_getsets[] = { + {"digest_size", _hashlib_HASHXOF_digest_size, NULL, NULL, NULL}, {NULL} /* Sentinel */ }; -PyDoc_STRVAR(hashxoftype_doc, +PyDoc_STRVAR(HASHXOFobject_type_doc, "HASHXOF(name, string=b\'\')\n" "--\n" "\n" @@ -925,38 +943,42 @@ PyDoc_STRVAR(hashxoftype_doc, "name -- the hash algorithm being used by this object\n" "digest_size -- number of bytes in this hashes output"); -static PyType_Slot EVPXOFtype_slots[] = { - {Py_tp_doc, (char *)hashxoftype_doc}, - {Py_tp_methods, EVPXOF_methods}, - {Py_tp_getset, EVPXOF_getseters}, +static PyType_Slot HASHXOFobject_type_slots[] = { + {Py_tp_doc, (char *)HASHXOFobject_type_doc}, + {Py_tp_methods, HASHXOFobject_methods}, + {Py_tp_getset, HASHXOFobject_getsets}, {0, 0}, }; -static PyType_Spec EVPXOFtype_spec = { - "_hashlib.HASHXOF", /*tp_name*/ - sizeof(EVPobject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE, - EVPXOFtype_slots +static PyType_Spec HASHXOFobject_type_spec = { + .name = "_hashlib.HASHXOF", + .basicsize = sizeof(HASHobject), + .flags = ( + Py_TPFLAGS_DEFAULT + | Py_TPFLAGS_BASETYPE + | Py_TPFLAGS_DISALLOW_INSTANTIATION + | Py_TPFLAGS_IMMUTABLETYPE + ), + .slots = HASHXOFobject_type_slots }; #endif -static PyObject* -py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj, - int usedforsecurity) +static PyObject * +_hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, + int usedforsecurity) { Py_buffer view = { 0 }; PY_EVP_MD *digest = NULL; PyTypeObject *type; - EVPobject *self = NULL; + HASHobject *self = NULL; if (data_obj != NULL) { GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view); } - digest = py_digest_by_name( + digest = get_openssl_evp_md_by_utf8name( module, digestname, usedforsecurity ? Py_ht_evp : Py_ht_evp_nosecurity ); if (digest == NULL) { @@ -964,12 +986,12 @@ py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj, } if ((EVP_MD_flags(digest) & EVP_MD_FLAG_XOF) == EVP_MD_FLAG_XOF) { - type = get_hashlib_state(module)->EVPXOFtype; + type = get_hashlib_state(module)->HASHXOF_type; } else { - type = get_hashlib_state(module)->EVPtype; + type = get_hashlib_state(module)->HASH_type; } - self = newEVPobject(type); + self = new_hash_object(type); if (self == NULL) { goto exit; } @@ -994,10 +1016,10 @@ py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj, /* We do not initialize self->lock here as this is the constructor * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS - result = EVP_hash(self, view.buf, view.len); + result = _hashlib_HASH_hash(self, view.buf, view.len); Py_END_ALLOW_THREADS } else { - result = EVP_hash(self, view.buf, view.len); + result = _hashlib_HASH_hash(self, view.buf, view.len); } if (result == -1) { assert(PyErr_Occurred()); @@ -1021,9 +1043,9 @@ py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj, /* The module-level function: new() */ /*[clinic input] -_hashlib.new as EVP_new +_hashlib.new as _hashlib_HASH_new - name as name_obj: object + name: str string as data_obj: object(c_default="NULL") = b'' * usedforsecurity: bool = True @@ -1037,16 +1059,11 @@ The MD5 and SHA1 algorithms are always supported. [clinic start generated code]*/ static PyObject * -EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=ddd5053f92dffe90 input=c24554d0337be1b0]*/ +_hashlib_HASH_new_impl(PyObject *module, const char *name, + PyObject *data_obj, int usedforsecurity) +/*[clinic end generated code: output=30c6e7b9a5a4dce3 input=28848db5ccd0a9b5]*/ { - char *name; - if (!PyArg_Parse(name_obj, "s", &name)) { - PyErr_SetString(PyExc_TypeError, "name must be a string"); - return NULL; - } - return py_evp_fromname(module, name, data_obj, usedforsecurity); + return _hashlib_HASH(module, name, data_obj, usedforsecurity); } @@ -1066,7 +1083,7 @@ _hashlib_openssl_md5_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=87b0186440a44f8c input=990e36d5e689b16e]*/ { - return py_evp_fromname(module, Py_hash_md5, data_obj, usedforsecurity); + return _hashlib_HASH(module, Py_hash_md5, data_obj, usedforsecurity); } @@ -1086,7 +1103,7 @@ _hashlib_openssl_sha1_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=6813024cf690670d input=948f2f4b6deabc10]*/ { - return py_evp_fromname(module, Py_hash_sha1, data_obj, usedforsecurity); + return _hashlib_HASH(module, Py_hash_sha1, data_obj, usedforsecurity); } @@ -1106,7 +1123,7 @@ _hashlib_openssl_sha224_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=a2dfe7cc4eb14ebb input=f9272821fadca505]*/ { - return py_evp_fromname(module, Py_hash_sha224, data_obj, usedforsecurity); + return _hashlib_HASH(module, Py_hash_sha224, data_obj, usedforsecurity); } @@ -1126,7 +1143,7 @@ _hashlib_openssl_sha256_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=1f874a34870f0a68 input=549fad9d2930d4c5]*/ { - return py_evp_fromname(module, Py_hash_sha256, data_obj, usedforsecurity); + return _hashlib_HASH(module, Py_hash_sha256, data_obj, usedforsecurity); } @@ -1146,7 +1163,7 @@ _hashlib_openssl_sha384_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=58529eff9ca457b2 input=48601a6e3bf14ad7]*/ { - return py_evp_fromname(module, Py_hash_sha384, data_obj, usedforsecurity); + return _hashlib_HASH(module, Py_hash_sha384, data_obj, usedforsecurity); } @@ -1166,7 +1183,7 @@ _hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=2c744c9e4a40d5f6 input=c5c46a2a817aa98f]*/ { - return py_evp_fromname(module, Py_hash_sha512, data_obj, usedforsecurity); + return _hashlib_HASH(module, Py_hash_sha512, data_obj, usedforsecurity); } @@ -1188,7 +1205,7 @@ _hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=144641c1d144b974 input=e3a01b2888916157]*/ { - return py_evp_fromname(module, Py_hash_sha3_224, data_obj, usedforsecurity); + return _hashlib_HASH(module, Py_hash_sha3_224, data_obj, usedforsecurity); } /*[clinic input] @@ -1207,7 +1224,7 @@ _hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=c61f1ab772d06668 input=e2908126c1b6deed]*/ { - return py_evp_fromname(module, Py_hash_sha3_256, data_obj , usedforsecurity); + return _hashlib_HASH(module, Py_hash_sha3_256, data_obj, usedforsecurity); } /*[clinic input] @@ -1226,7 +1243,7 @@ _hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=f68e4846858cf0ee input=ec0edf5c792f8252]*/ { - return py_evp_fromname(module, Py_hash_sha3_384, data_obj , usedforsecurity); + return _hashlib_HASH(module, Py_hash_sha3_384, data_obj, usedforsecurity); } /*[clinic input] @@ -1245,7 +1262,7 @@ _hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=2eede478c159354a input=64e2cc0c094d56f4]*/ { - return py_evp_fromname(module, Py_hash_sha3_512, data_obj , usedforsecurity); + return _hashlib_HASH(module, Py_hash_sha3_512, data_obj, usedforsecurity); } #endif /* PY_OPENSSL_HAS_SHA3 */ @@ -1266,7 +1283,7 @@ _hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=bc49cdd8ada1fa97 input=6c9d67440eb33ec8]*/ { - return py_evp_fromname(module, Py_hash_shake_128, data_obj , usedforsecurity); + return _hashlib_HASH(module, Py_hash_shake_128, data_obj, usedforsecurity); } /*[clinic input] @@ -1285,7 +1302,7 @@ _hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data_obj, int usedforsecurity) /*[clinic end generated code: output=358d213be8852df7 input=479cbe9fefd4a9f8]*/ { - return py_evp_fromname(module, Py_hash_shake_256, data_obj , usedforsecurity); + return _hashlib_HASH(module, Py_hash_shake_256, data_obj, usedforsecurity); } #endif /* PY_OPENSSL_HAS_SHAKE */ @@ -1312,7 +1329,7 @@ pbkdf2_hmac_impl(PyObject *module, const char *hash_name, long dklen; int retval; - PY_EVP_MD *digest = py_digest_by_name(module, hash_name, Py_ht_pbkdf2); + PY_EVP_MD *digest = get_openssl_evp_md_by_utf8name(module, hash_name, Py_ht_pbkdf2); if (digest == NULL) { goto end; } @@ -1514,7 +1531,7 @@ _hashlib_hmac_singleshot_impl(PyObject *module, Py_buffer *key, return NULL; } - evp = py_digest_by_digestmod(module, digest, Py_ht_mac); + evp = get_openssl_evp_md(module, digest, Py_ht_mac); if (evp == NULL) { return NULL; } @@ -1583,7 +1600,7 @@ _hashlib_hmac_new_impl(PyObject *module, Py_buffer *key, PyObject *msg_obj, return NULL; } - digest = py_digest_by_digestmod(module, digestmod, Py_ht_mac); + digest = get_openssl_evp_md(module, digestmod, Py_ht_mac); if (digest == NULL) { return NULL; } @@ -1740,7 +1757,7 @@ _hmac_repr(PyObject *op) if (md == NULL) { return NULL; } - PyObject *digest_name = py_digest_name(md); + PyObject *digest_name = get_openssl_evp_md_name(md); if (digest_name == NULL) { return NULL; } @@ -1860,7 +1877,7 @@ _hashlib_hmac_get_name(PyObject *op, void *Py_UNUSED(closure)) if (md == NULL) { return NULL; } - PyObject *digest_name = py_digest_name(md); + PyObject *digest_name = get_openssl_evp_md_name(md); if (digest_name == NULL) { return NULL; } @@ -1926,7 +1943,7 @@ typedef struct _internal_name_mapper_state { /* A callback function to pass to OpenSSL's OBJ_NAME_do_all(...) */ static void -#if OPENSSL_VERSION_NUMBER >= 0x30000000L +#ifdef Py_HAS_OPENSSL3_SUPPORT _openssl_hash_name_mapper(EVP_MD *md, void *arg) #else _openssl_hash_name_mapper(const EVP_MD *md, const char *from, @@ -1942,7 +1959,7 @@ _openssl_hash_name_mapper(const EVP_MD *md, const char *from, return; } - py_name = py_digest_name(md); + py_name = get_openssl_evp_md_name(md); if (py_name == NULL) { state->error = 1; } else { @@ -1966,7 +1983,7 @@ hashlib_md_meth_names(PyObject *module) return -1; } -#if OPENSSL_VERSION_NUMBER >= 0x30000000L +#ifdef Py_HAS_OPENSSL3_SUPPORT // get algorithms from all activated providers in default context EVP_MD_do_all_provided(NULL, &_openssl_hash_name_mapper, &state); #else @@ -1999,7 +2016,7 @@ _hashlib_get_fips_mode_impl(PyObject *module) /*[clinic end generated code: output=87eece1bab4d3fa9 input=2db61538c41c6fef]*/ { -#if OPENSSL_VERSION_NUMBER >= 0x30000000L +#ifdef Py_HAS_OPENSSL3_SUPPORT return EVP_default_properties_is_fips_enabled(NULL); #else ERR_clear_error(); @@ -2134,7 +2151,7 @@ _hashlib_compare_digest_impl(PyObject *module, PyObject *a, PyObject *b) /* List of functions exported by this module */ static struct PyMethodDef EVP_functions[] = { - EVP_NEW_METHODDEF + _HASHLIB_HASH_NEW_METHODDEF PBKDF2_HMAC_METHODDEF _HASHLIB_SCRYPT_METHODDEF _HASHLIB_GET_FIPS_MODE_METHODDEF @@ -2163,10 +2180,10 @@ static int hashlib_traverse(PyObject *m, visitproc visit, void *arg) { _hashlibstate *state = get_hashlib_state(m); - Py_VISIT(state->EVPtype); + Py_VISIT(state->HASH_type); Py_VISIT(state->HMACtype); #ifdef PY_OPENSSL_HAS_SHAKE - Py_VISIT(state->EVPXOFtype); + Py_VISIT(state->HASHXOF_type); #endif Py_VISIT(state->constructs); Py_VISIT(state->unsupported_digestmod_error); @@ -2177,10 +2194,10 @@ static int hashlib_clear(PyObject *m) { _hashlibstate *state = get_hashlib_state(m); - Py_CLEAR(state->EVPtype); + Py_CLEAR(state->HASH_type); Py_CLEAR(state->HMACtype); #ifdef PY_OPENSSL_HAS_SHAKE - Py_CLEAR(state->EVPXOFtype); + Py_CLEAR(state->HASHXOF_type); #endif Py_CLEAR(state->constructs); Py_CLEAR(state->unsupported_digestmod_error); @@ -2214,37 +2231,37 @@ hashlib_init_hashtable(PyObject *module) } static int -hashlib_init_evptype(PyObject *module) +hashlib_init_HASH_type(PyObject *module) { _hashlibstate *state = get_hashlib_state(module); - state->EVPtype = (PyTypeObject *)PyType_FromSpec(&EVPtype_spec); - if (state->EVPtype == NULL) { + state->HASH_type = (PyTypeObject *)PyType_FromSpec(&HASHobject_type_spec); + if (state->HASH_type == NULL) { return -1; } - if (PyModule_AddType(module, state->EVPtype) < 0) { + if (PyModule_AddType(module, state->HASH_type) < 0) { return -1; } return 0; } static int -hashlib_init_evpxoftype(PyObject *module) +hashlib_init_HASHXOF_type(PyObject *module) { #ifdef PY_OPENSSL_HAS_SHAKE _hashlibstate *state = get_hashlib_state(module); - if (state->EVPtype == NULL) { + if (state->HASH_type == NULL) { return -1; } - state->EVPXOFtype = (PyTypeObject *)PyType_FromSpecWithBases( - &EVPXOFtype_spec, (PyObject *)state->EVPtype + state->HASHXOF_type = (PyTypeObject *)PyType_FromSpecWithBases( + &HASHXOFobject_type_spec, (PyObject *)state->HASH_type ); - if (state->EVPXOFtype == NULL) { + if (state->HASHXOF_type == NULL) { return -1; } - if (PyModule_AddType(module, state->EVPXOFtype) < 0) { + if (PyModule_AddType(module, state->HASHXOF_type) < 0) { return -1; } #endif @@ -2341,8 +2358,8 @@ hashlib_constants(PyObject *module) static PyModuleDef_Slot hashlib_slots[] = { {Py_mod_exec, hashlib_init_hashtable}, - {Py_mod_exec, hashlib_init_evptype}, - {Py_mod_exec, hashlib_init_evpxoftype}, + {Py_mod_exec, hashlib_init_HASH_type}, + {Py_mod_exec, hashlib_init_HASHXOF_type}, {Py_mod_exec, hashlib_init_hmactype}, {Py_mod_exec, hashlib_md_meth_names}, {Py_mod_exec, hashlib_init_constructors}, diff --git a/Modules/clinic/_hashopenssl.c.h b/Modules/clinic/_hashopenssl.c.h index 59ab46ca3f0978..b2f6b25a235e68 100644 --- a/Modules/clinic/_hashopenssl.c.h +++ b/Modules/clinic/_hashopenssl.c.h @@ -10,98 +10,98 @@ preserve #include "pycore_long.h" // _PyLong_UnsignedLong_Converter() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(EVP_copy__doc__, +PyDoc_STRVAR(_hashlib_HASH_copy__doc__, "copy($self, /)\n" "--\n" "\n" "Return a copy of the hash object."); -#define EVP_COPY_METHODDEF \ - {"copy", (PyCFunction)EVP_copy, METH_NOARGS, EVP_copy__doc__}, +#define _HASHLIB_HASH_COPY_METHODDEF \ + {"copy", (PyCFunction)_hashlib_HASH_copy, METH_NOARGS, _hashlib_HASH_copy__doc__}, static PyObject * -EVP_copy_impl(EVPobject *self); +_hashlib_HASH_copy_impl(HASHobject *self); static PyObject * -EVP_copy(PyObject *self, PyObject *Py_UNUSED(ignored)) +_hashlib_HASH_copy(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return EVP_copy_impl((EVPobject *)self); + return _hashlib_HASH_copy_impl((HASHobject *)self); } -PyDoc_STRVAR(EVP_digest__doc__, +PyDoc_STRVAR(_hashlib_HASH_digest__doc__, "digest($self, /)\n" "--\n" "\n" "Return the digest value as a bytes object."); -#define EVP_DIGEST_METHODDEF \ - {"digest", (PyCFunction)EVP_digest, METH_NOARGS, EVP_digest__doc__}, +#define _HASHLIB_HASH_DIGEST_METHODDEF \ + {"digest", (PyCFunction)_hashlib_HASH_digest, METH_NOARGS, _hashlib_HASH_digest__doc__}, static PyObject * -EVP_digest_impl(EVPobject *self); +_hashlib_HASH_digest_impl(HASHobject *self); static PyObject * -EVP_digest(PyObject *self, PyObject *Py_UNUSED(ignored)) +_hashlib_HASH_digest(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return EVP_digest_impl((EVPobject *)self); + return _hashlib_HASH_digest_impl((HASHobject *)self); } -PyDoc_STRVAR(EVP_hexdigest__doc__, +PyDoc_STRVAR(_hashlib_HASH_hexdigest__doc__, "hexdigest($self, /)\n" "--\n" "\n" "Return the digest value as a string of hexadecimal digits."); -#define EVP_HEXDIGEST_METHODDEF \ - {"hexdigest", (PyCFunction)EVP_hexdigest, METH_NOARGS, EVP_hexdigest__doc__}, +#define _HASHLIB_HASH_HEXDIGEST_METHODDEF \ + {"hexdigest", (PyCFunction)_hashlib_HASH_hexdigest, METH_NOARGS, _hashlib_HASH_hexdigest__doc__}, static PyObject * -EVP_hexdigest_impl(EVPobject *self); +_hashlib_HASH_hexdigest_impl(HASHobject *self); static PyObject * -EVP_hexdigest(PyObject *self, PyObject *Py_UNUSED(ignored)) +_hashlib_HASH_hexdigest(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return EVP_hexdigest_impl((EVPobject *)self); + return _hashlib_HASH_hexdigest_impl((HASHobject *)self); } -PyDoc_STRVAR(EVP_update__doc__, +PyDoc_STRVAR(_hashlib_HASH_update__doc__, "update($self, obj, /)\n" "--\n" "\n" "Update this hash object\'s state with the provided string."); -#define EVP_UPDATE_METHODDEF \ - {"update", (PyCFunction)EVP_update, METH_O, EVP_update__doc__}, +#define _HASHLIB_HASH_UPDATE_METHODDEF \ + {"update", (PyCFunction)_hashlib_HASH_update, METH_O, _hashlib_HASH_update__doc__}, static PyObject * -EVP_update_impl(EVPobject *self, PyObject *obj); +_hashlib_HASH_update_impl(HASHobject *self, PyObject *obj); static PyObject * -EVP_update(PyObject *self, PyObject *obj) +_hashlib_HASH_update(PyObject *self, PyObject *obj) { PyObject *return_value = NULL; - return_value = EVP_update_impl((EVPobject *)self, obj); + return_value = _hashlib_HASH_update_impl((HASHobject *)self, obj); return return_value; } #if defined(PY_OPENSSL_HAS_SHAKE) -PyDoc_STRVAR(EVPXOF_digest__doc__, +PyDoc_STRVAR(_hashlib_HASHXOF_digest__doc__, "digest($self, /, length)\n" "--\n" "\n" "Return the digest value as a bytes object."); -#define EVPXOF_DIGEST_METHODDEF \ - {"digest", _PyCFunction_CAST(EVPXOF_digest), METH_FASTCALL|METH_KEYWORDS, EVPXOF_digest__doc__}, +#define _HASHLIB_HASHXOF_DIGEST_METHODDEF \ + {"digest", _PyCFunction_CAST(_hashlib_HASHXOF_digest), METH_FASTCALL|METH_KEYWORDS, _hashlib_HASHXOF_digest__doc__}, static PyObject * -EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length); +_hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length); static PyObject * -EVPXOF_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_hashlib_HASHXOF_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -151,7 +151,7 @@ EVPXOF_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject } length = ival; } - return_value = EVPXOF_digest_impl((EVPobject *)self, length); + return_value = _hashlib_HASHXOF_digest_impl((HASHobject *)self, length); exit: return return_value; @@ -161,20 +161,20 @@ EVPXOF_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject #if defined(PY_OPENSSL_HAS_SHAKE) -PyDoc_STRVAR(EVPXOF_hexdigest__doc__, +PyDoc_STRVAR(_hashlib_HASHXOF_hexdigest__doc__, "hexdigest($self, /, length)\n" "--\n" "\n" "Return the digest value as a string of hexadecimal digits."); -#define EVPXOF_HEXDIGEST_METHODDEF \ - {"hexdigest", _PyCFunction_CAST(EVPXOF_hexdigest), METH_FASTCALL|METH_KEYWORDS, EVPXOF_hexdigest__doc__}, +#define _HASHLIB_HASHXOF_HEXDIGEST_METHODDEF \ + {"hexdigest", _PyCFunction_CAST(_hashlib_HASHXOF_hexdigest), METH_FASTCALL|METH_KEYWORDS, _hashlib_HASHXOF_hexdigest__doc__}, static PyObject * -EVPXOF_hexdigest_impl(EVPobject *self, Py_ssize_t length); +_hashlib_HASHXOF_hexdigest_impl(HASHobject *self, Py_ssize_t length); static PyObject * -EVPXOF_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_hashlib_HASHXOF_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -224,7 +224,7 @@ EVPXOF_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje } length = ival; } - return_value = EVPXOF_hexdigest_impl((EVPobject *)self, length); + return_value = _hashlib_HASHXOF_hexdigest_impl((HASHobject *)self, length); exit: return return_value; @@ -232,7 +232,7 @@ EVPXOF_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje #endif /* defined(PY_OPENSSL_HAS_SHAKE) */ -PyDoc_STRVAR(EVP_new__doc__, +PyDoc_STRVAR(_hashlib_HASH_new__doc__, "new($module, /, name, string=b\'\', *, usedforsecurity=True)\n" "--\n" "\n" @@ -243,15 +243,15 @@ PyDoc_STRVAR(EVP_new__doc__, "\n" "The MD5 and SHA1 algorithms are always supported."); -#define EVP_NEW_METHODDEF \ - {"new", _PyCFunction_CAST(EVP_new), METH_FASTCALL|METH_KEYWORDS, EVP_new__doc__}, +#define _HASHLIB_HASH_NEW_METHODDEF \ + {"new", _PyCFunction_CAST(_hashlib_HASH_new), METH_FASTCALL|METH_KEYWORDS, _hashlib_HASH_new__doc__}, static PyObject * -EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj, - int usedforsecurity); +_hashlib_HASH_new_impl(PyObject *module, const char *name, + PyObject *data_obj, int usedforsecurity); static PyObject * -EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_hashlib_HASH_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -283,7 +283,7 @@ EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn #undef KWTUPLE PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; - PyObject *name_obj; + const char *name; PyObject *data_obj = NULL; int usedforsecurity = 1; @@ -292,7 +292,19 @@ EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn if (!args) { goto exit; } - name_obj = args[0]; + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("new", "argument 'name'", "str", args[0]); + goto exit; + } + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); + if (name == NULL) { + goto exit; + } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!noptargs) { goto skip_optional_pos; } @@ -311,7 +323,7 @@ EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn goto exit; } skip_optional_kwonly: - return_value = EVP_new_impl(module, name_obj, data_obj, usedforsecurity); + return_value = _hashlib_HASH_new_impl(module, name, data_obj, usedforsecurity); exit: return return_value; @@ -1836,13 +1848,13 @@ _hashlib_compare_digest(PyObject *module, PyObject *const *args, Py_ssize_t narg return return_value; } -#ifndef EVPXOF_DIGEST_METHODDEF - #define EVPXOF_DIGEST_METHODDEF -#endif /* !defined(EVPXOF_DIGEST_METHODDEF) */ +#ifndef _HASHLIB_HASHXOF_DIGEST_METHODDEF + #define _HASHLIB_HASHXOF_DIGEST_METHODDEF +#endif /* !defined(_HASHLIB_HASHXOF_DIGEST_METHODDEF) */ -#ifndef EVPXOF_HEXDIGEST_METHODDEF - #define EVPXOF_HEXDIGEST_METHODDEF -#endif /* !defined(EVPXOF_HEXDIGEST_METHODDEF) */ +#ifndef _HASHLIB_HASHXOF_HEXDIGEST_METHODDEF + #define _HASHLIB_HASHXOF_HEXDIGEST_METHODDEF +#endif /* !defined(_HASHLIB_HASHXOF_HEXDIGEST_METHODDEF) */ #ifndef _HASHLIB_OPENSSL_SHA3_224_METHODDEF #define _HASHLIB_OPENSSL_SHA3_224_METHODDEF @@ -1871,4 +1883,4 @@ _hashlib_compare_digest(PyObject *module, PyObject *const *args, Py_ssize_t narg #ifndef _HASHLIB_SCRYPT_METHODDEF #define _HASHLIB_SCRYPT_METHODDEF #endif /* !defined(_HASHLIB_SCRYPT_METHODDEF) */ -/*[clinic end generated code: output=2c78822e38be64a8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=dc03b64435166a64 input=a9049054013a1b77]*/ From 9eb84d83e00070cec3cfe78f1d0c7a7a0fbef30f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 May 2025 12:38:16 +0200 Subject: [PATCH 351/989] gh-132710: add missing NEWS entry for GH-132901 (#134705) --- .../Library/2025-05-26-12-31-08.gh-issue-132710.ApU3TZ.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-05-26-12-31-08.gh-issue-132710.ApU3TZ.rst diff --git a/Misc/NEWS.d/next/Library/2025-05-26-12-31-08.gh-issue-132710.ApU3TZ.rst b/Misc/NEWS.d/next/Library/2025-05-26-12-31-08.gh-issue-132710.ApU3TZ.rst new file mode 100644 index 00000000000000..b7011517aa9db8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-26-12-31-08.gh-issue-132710.ApU3TZ.rst @@ -0,0 +1,3 @@ +If possible, ensure that :func:`uuid.getnode` returns the same result even +across different processes. Previously, the result was constant only within +the same process. Patch by Bénédikt Tran. From 965662ee4a986605b60da470d9e7c1e9a6f922b3 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Mon, 26 May 2025 20:44:41 +0800 Subject: [PATCH 352/989] gh-91048: Correct Apple platform includes for iOS. (#134712) Correct Apple platform includes for iOS. --- Python/remote_debug.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Python/remote_debug.h b/Python/remote_debug.h index dbc6bdd09a693f..6566979bcd0e3e 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -35,7 +35,7 @@ extern "C" { # include #endif -#if defined(__APPLE__) && TARGET_OS_OSX +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX # include # include # include @@ -100,7 +100,7 @@ typedef struct page_cache_entry { // Define a platform-independent process handle structure typedef struct { pid_t pid; -#if defined(__APPLE__) +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX mach_port_t task; #elif defined(MS_WINDOWS) HANDLE hProcess; @@ -127,7 +127,7 @@ _Py_RemoteDebug_ClearCache(proc_handle_t *handle) } } -#if defined(__APPLE__) && TARGET_OS_OSX +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX static mach_port_t pid_to_task(pid_t pid); #endif @@ -135,7 +135,7 @@ static mach_port_t pid_to_task(pid_t pid); static int _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { handle->pid = pid; -#if defined(__APPLE__) +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX handle->task = pid_to_task(handle->pid); #elif defined(MS_WINDOWS) handle->hProcess = OpenProcess( @@ -167,7 +167,7 @@ _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) { _Py_RemoteDebug_FreePageCache(handle); } -#if defined(__APPLE__) && TARGET_OS_OSX +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX static uintptr_t return_section_address64( @@ -481,7 +481,7 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s return 0; } -#endif // (__APPLE__ && TARGET_OS_OSX) +#endif // (__APPLE__ && defined(TARGET_OS_OSX) && TARGET_OS_OSX) #if defined(__linux__) && HAVE_PROCESS_VM_READV static uintptr_t @@ -759,7 +759,7 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); _PyErr_ChainExceptions1(exc); } -#elif defined(__APPLE__) && TARGET_OS_OSX +#elif defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX // On macOS, try libpython first, then fall back to python address = search_map_for_section(handle, "PyRuntime", "libpython"); if (address == 0) { @@ -810,7 +810,7 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address result += read_bytes; } while ((size_t)read_bytes != local[0].iov_len); return 0; -#elif defined(__APPLE__) && TARGET_OS_OSX +#elif defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX Py_ssize_t result = -1; kern_return_t kr = mach_vm_read_overwrite( handle->task, From 0909d6d8e8983ca286ca623f3f2f8a69cf40ebf6 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Mon, 26 May 2025 15:31:47 +0100 Subject: [PATCH 353/989] gh-91048: Add better error messages for remote debugging for CI builds (#134682) --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 + Lib/test/test_external_inspection.py | 6 +- Modules/_remote_debugging_module.c | 356 +++++++++++++++--- Modules/clinic/_remote_debugging_module.c.h | 33 +- Python/remote_debug.h | 267 ++++++++++--- 8 files changed, 540 insertions(+), 129 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 356bcaa7c350a1..e118b86db50754 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -892,6 +892,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(data)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(database)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(day)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(debug)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decode)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(default)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index aebe798031ce4f..36f3d23d095d59 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -383,6 +383,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(data) STRUCT_FOR_ID(database) STRUCT_FOR_ID(day) + STRUCT_FOR_ID(debug) STRUCT_FOR_ID(decode) STRUCT_FOR_ID(decoder) STRUCT_FOR_ID(default) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 0fa1fa5af99a92..d172cc1485d426 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -890,6 +890,7 @@ extern "C" { INIT_ID(data), \ INIT_ID(database), \ INIT_ID(day), \ + INIT_ID(debug), \ INIT_ID(decode), \ INIT_ID(decoder), \ INIT_ID(default), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 4982c4532afd89..0a9be4e41ace89 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1320,6 +1320,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(debug); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(decode); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 291c419066ac5b..2b4b63a030b1af 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -34,17 +34,17 @@ def _make_test_script(script_dir, script_basename, source): def get_stack_trace(pid): - unwinder = RemoteUnwinder(pid, all_threads=True) + unwinder = RemoteUnwinder(pid, all_threads=True, debug=True) return unwinder.get_stack_trace() def get_async_stack_trace(pid): - unwinder = RemoteUnwinder(pid) + unwinder = RemoteUnwinder(pid, debug=True) return unwinder.get_async_stack_trace() def get_all_awaited_by(pid): - unwinder = RemoteUnwinder(pid) + unwinder = RemoteUnwinder(pid, debug=True) return unwinder.get_all_awaited_by() diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index a13cbd63ad3bd8..6c824f9d8d22f4 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -108,6 +108,7 @@ typedef struct { uintptr_t tstate_addr; uint64_t code_object_generation; _Py_hashtable_t *code_object_cache; + int debug; #ifdef Py_GIL_DISABLED // TLBC cache invalidation tracking uint32_t tlbc_generation; // Track TLBC index pool changes @@ -194,6 +195,11 @@ static int parse_frame_object( * UTILITY FUNCTIONS AND HELPERS * ============================================================================ */ +#define set_exception_cause(unwinder, exc_type, message) \ + if (unwinder->debug) { \ + _set_debug_exception_cause(exc_type, message); \ + } + static void cached_code_metadata_destroy(void *ptr) { @@ -218,13 +224,70 @@ RemoteDebugging_InitState(RemoteDebuggingState *st) return 0; } -// Helper to chain exceptions and avoid repetitions -static void -chain_exceptions(PyObject *exception, const char *string) +static int +is_prerelease_version(uint64_t version) { - PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetString(exception, string); - _PyErr_ChainExceptions1(exc); + return (version & 0xF0) != 0xF0; +} + +static inline int +validate_debug_offsets(struct _Py_DebugOffsets *debug_offsets) +{ + if (memcmp(debug_offsets->cookie, _Py_Debug_Cookie, sizeof(debug_offsets->cookie)) != 0) { + // The remote is probably running a Python version predating debug offsets. + PyErr_SetString( + PyExc_RuntimeError, + "Can't determine the Python version of the remote process"); + return -1; + } + + // Assume debug offsets could change from one pre-release version to another, + // or one minor version to another, but are stable across patch versions. + if (is_prerelease_version(Py_Version) && Py_Version != debug_offsets->version) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't attach from a pre-release Python interpreter" + " to a process running a different Python version"); + return -1; + } + + if (is_prerelease_version(debug_offsets->version) && Py_Version != debug_offsets->version) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't attach to a pre-release Python interpreter" + " from a process running a different Python version"); + return -1; + } + + unsigned int remote_major = (debug_offsets->version >> 24) & 0xFF; + unsigned int remote_minor = (debug_offsets->version >> 16) & 0xFF; + + if (PY_MAJOR_VERSION != remote_major || PY_MINOR_VERSION != remote_minor) { + PyErr_Format( + PyExc_RuntimeError, + "Can't attach from a Python %d.%d process to a Python %d.%d process", + PY_MAJOR_VERSION, PY_MINOR_VERSION, remote_major, remote_minor); + return -1; + } + + // The debug offsets differ between free threaded and non-free threaded builds. + if (_Py_Debug_Free_Threaded && !debug_offsets->free_threaded) { + PyErr_SetString( + PyExc_RuntimeError, + "Cannot attach from a free-threaded Python process" + " to a process running a non-free-threaded version"); + return -1; + } + + if (!_Py_Debug_Free_Threaded && debug_offsets->free_threaded) { + PyErr_SetString( + PyExc_RuntimeError, + "Cannot attach to a free-threaded Python process" + " from a process running a non-free-threaded version"); + return -1; + } + + return 0; } /* ============================================================================ @@ -232,29 +295,32 @@ chain_exceptions(PyObject *exception, const char *string) * ============================================================================ */ static inline int -read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) +read_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr) { - int result = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(void*), ptr_addr); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(void*), ptr_addr); if (result < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read pointer from remote memory"); return -1; } return 0; } static inline int -read_Py_ssize_t(proc_handle_t *handle, uintptr_t address, Py_ssize_t *size) +read_Py_ssize_t(RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t *size) { - int result = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(Py_ssize_t), size); if (result < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read Py_ssize_t from remote memory"); return -1; } return 0; } static int -read_py_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) +read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr) { - if (read_ptr(handle, address, ptr_addr)) { + if (read_ptr(unwinder, address, ptr_addr)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read Python pointer"); return -1; } *ptr_addr &= ~Py_TAG_BITS; @@ -262,10 +328,11 @@ read_py_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) } static int -read_char(proc_handle_t *handle, uintptr_t address, char *result) +read_char(RemoteUnwinderObject *unwinder, uintptr_t address, char *result) { - int res = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(char), result); + int res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(char), result); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read char from remote memory"); return -1; } return 0; @@ -293,6 +360,7 @@ read_py_str( unicode_obj ); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyUnicodeObject"); goto err; } @@ -300,24 +368,28 @@ read_py_str( if (len < 0 || len > max_len) { PyErr_Format(PyExc_RuntimeError, "Invalid string length (%zd) at 0x%lx", len, address); + set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid string length in remote Unicode object"); return NULL; } buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for string reading"); return NULL; } size_t offset = unwinder->debug_offsets.unicode_object.asciiobject_size; res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read string data from remote memory"); goto err; } buf[len] = '\0'; result = PyUnicode_FromStringAndSize(buf, len); if (result == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyUnicode from remote string data"); goto err; } @@ -350,31 +422,36 @@ read_py_bytes( bytes_obj ); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyBytesObject"); goto err; } Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size); if (len < 0 || len > max_len) { PyErr_Format(PyExc_RuntimeError, - "Invalid string length (%zd) at 0x%lx", len, address); + "Invalid bytes length (%zd) at 0x%lx", len, address); + set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid bytes length in remote bytes object"); return NULL; } buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for bytes reading"); return NULL; } size_t offset = unwinder->debug_offsets.bytes_object.ob_sval; res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read bytes data from remote memory"); goto err; } buf[len] = '\0'; result = PyBytes_FromStringAndSize(buf, len); if (result == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyBytes from remote bytes data"); goto err; } @@ -405,6 +482,7 @@ read_py_long( unwinder->debug_offsets.long_object.size, long_obj); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLongObject"); return -1; } @@ -423,6 +501,7 @@ read_py_long( digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); if (!digits) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong"); return -1; } memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit)); @@ -431,6 +510,7 @@ read_py_long( digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); if (!digits) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for large PyLong"); return -1; } @@ -441,6 +521,7 @@ read_py_long( digits ); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLong digits from remote memory"); goto error; } } @@ -519,11 +600,15 @@ read_async_debug( ) { uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(&unwinder->handle); if (!async_debug_addr) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to get AsyncioDebug address"); return -1; } size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, async_debug_addr, size, &unwinder->async_debug_offsets); + if (result < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read AsyncioDebug offsets"); + } return result; } @@ -544,6 +629,7 @@ parse_task_name( unwinder->async_debug_offsets.asyncio_task_object.size, task_obj); if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object"); return NULL; } @@ -558,6 +644,7 @@ parse_task_name( SIZEOF_PYOBJECT, task_name_obj); if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name object"); return NULL; } @@ -569,13 +656,14 @@ parse_task_name( SIZEOF_TYPE_OBJ, type_obj); if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name type object"); return NULL; } if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) { long res = read_py_long(unwinder, task_name_addr); if (res == -1) { - chain_exceptions(PyExc_RuntimeError, "Failed to get task name"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Task name PyLong parsing failed"); return NULL; } return PyUnicode_FromFormat("Task-%d", res); @@ -583,6 +671,7 @@ parse_task_name( if(!(GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_UNICODE_SUBCLASS)) { PyErr_SetString(PyExc_RuntimeError, "Invalid task name object"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Task name object is neither long nor unicode"); return NULL; } @@ -604,6 +693,7 @@ static int parse_task_awaited_by( if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, unwinder->async_debug_offsets.asyncio_task_object.size, task_obj) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object in awaited_by parsing"); return -1; } @@ -618,10 +708,12 @@ static int parse_task_awaited_by( if (awaited_by_is_a_set) { if (parse_tasks_in_set(unwinder, task_ab_addr, awaited_by, recurse_task)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse tasks in awaited_by set"); return -1; } } else { if (parse_task(unwinder, task_ab_addr, awaited_by, recurse_task)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse single awaited_by task"); return -1; } } @@ -644,6 +736,7 @@ handle_yield_from_frame( SIZEOF_INTERP_FRAME, iframe); if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame in yield_from handler"); return -1; } @@ -651,6 +744,7 @@ handle_yield_from_frame( PyErr_SetString( PyExc_RuntimeError, "generator doesn't own its frame \\_o_/"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame ownership mismatch in yield_from"); return -1; } @@ -660,20 +754,22 @@ handle_yield_from_frame( if ((void*)stackpointer_addr != NULL) { uintptr_t gi_await_addr; err = read_py_ptr( - &unwinder->handle, + unwinder, stackpointer_addr - sizeof(void*), &gi_await_addr); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await address"); return -1; } if ((void*)gi_await_addr != NULL) { uintptr_t gi_await_addr_type_addr; err = read_ptr( - &unwinder->handle, + unwinder, gi_await_addr + unwinder->debug_offsets.pyobject.ob_type, &gi_await_addr_type_addr); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await type address"); return -1; } @@ -690,6 +786,7 @@ handle_yield_from_frame( */ err = parse_coro_chain(unwinder, gi_await_addr, render_to); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain in yield_from"); return -1; } } @@ -715,6 +812,7 @@ parse_coro_chain( SIZEOF_GEN_OBJ, gen_object); if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read generator object in coro chain"); return -1; } @@ -726,11 +824,13 @@ parse_coro_chain( uintptr_t prev_frame; uintptr_t gi_iframe_addr = coro_address + unwinder->debug_offsets.gen_object.gi_iframe; if (parse_frame_object(unwinder, &name, gi_iframe_addr, &prev_frame) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in coro chain"); return -1; } if (PyList_Append(render_to, name)) { Py_DECREF(name); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to coro chain"); return -1; } Py_DECREF(name); @@ -756,15 +856,18 @@ create_task_result( result = PyList_New(0); if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create task result list"); goto error; } call_stack = PyList_New(0); if (call_stack == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create call stack list"); goto error; } if (PyList_Append(result, call_stack)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append call stack to task result"); goto error; } Py_CLEAR(call_stack); @@ -775,10 +878,12 @@ create_task_result( tn = PyLong_FromUnsignedLongLong(task_address); } if (tn == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name/address"); goto error; } if (PyList_Append(result, tn)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task name to result"); goto error; } Py_CLEAR(tn); @@ -787,6 +892,7 @@ create_task_result( if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, unwinder->async_debug_offsets.asyncio_task_object.size, task_obj) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object for coro chain"); goto error; } @@ -796,21 +902,25 @@ create_task_result( if ((void*)coro_addr != NULL) { call_stack = PyList_New(0); if (call_stack == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create coro call stack list"); goto error; } if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) { Py_DECREF(call_stack); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain"); goto error; } if (PyList_Reverse(call_stack)) { Py_DECREF(call_stack); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reverse call stack"); goto error; } if (PyList_SetItem(result, 0, call_stack) < 0) { Py_DECREF(call_stack); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to set call stack in result"); goto error; } } @@ -837,36 +947,42 @@ parse_task( int err; err = read_char( - &unwinder->handle, + unwinder, task_address + unwinder->async_debug_offsets.asyncio_task_object.task_is_task, &is_task); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read is_task flag"); goto error; } if (is_task) { result = create_task_result(unwinder, task_address, recurse_task); if (!result) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task result"); goto error; } } else { result = PyList_New(0); if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty task result"); goto error; } } if (PyList_Append(render_to, result)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task result to render list"); goto error; } if (recurse_task) { awaited_by = PyList_New(0); if (awaited_by == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list"); goto error; } if (PyList_Append(result, awaited_by)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by to result"); goto error; } Py_DECREF(awaited_by); @@ -877,6 +993,7 @@ parse_task( // it's borrowed from 'result' and will be decrefed when result is // deleted. awaited_by = NULL; + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task awaited_by relationships"); goto error; } } @@ -898,19 +1015,22 @@ process_set_entry( int recurse_task ) { uintptr_t key_addr; - if (read_py_ptr(&unwinder->handle, table_ptr, &key_addr)) { + if (read_py_ptr(unwinder, table_ptr, &key_addr)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry key"); return -1; } if ((void*)key_addr != NULL) { Py_ssize_t ref_cnt; - if (read_Py_ssize_t(&unwinder->handle, table_ptr, &ref_cnt)) { + if (read_Py_ssize_t(unwinder, table_ptr, &ref_cnt)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry reference count"); return -1; } if (ref_cnt) { // if 'ref_cnt=0' it's a set dummy marker if (parse_task(unwinder, key_addr, awaited_by, recurse_task)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task in set entry"); return -1; } return 1; // Successfully processed a valid entry @@ -933,6 +1053,7 @@ parse_tasks_in_set( SIZEOF_SET_OBJ, set_object); if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set object"); return -1; } @@ -946,6 +1067,7 @@ parse_tasks_in_set( int result = process_set_entry(unwinder, table_ptr, awaited_by, recurse_task); if (result < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process set entry"); return -1; } if (result > 0) { @@ -960,10 +1082,11 @@ parse_tasks_in_set( static int -setup_async_result_structure(PyObject **result, PyObject **calls) +setup_async_result_structure(RemoteUnwinderObject *unwinder, PyObject **result, PyObject **calls) { *result = PyList_New(1); if (*result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create async result structure"); return -1; } @@ -971,6 +1094,7 @@ setup_async_result_structure(PyObject **result, PyObject **calls) if (*calls == NULL) { Py_DECREF(*result); *result = NULL; + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create calls list in async result"); return -1; } @@ -979,6 +1103,7 @@ setup_async_result_structure(PyObject **result, PyObject **calls) Py_DECREF(*result); *result = NULL; *calls = NULL; + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to set calls list in async result"); return -1; } @@ -987,34 +1112,39 @@ setup_async_result_structure(PyObject **result, PyObject **calls) static int add_task_info_to_result( - RemoteUnwinderObject *self, + RemoteUnwinderObject *unwinder, PyObject *result, uintptr_t running_task_addr ) { - PyObject *tn = parse_task_name(self, running_task_addr); + PyObject *tn = parse_task_name(unwinder, running_task_addr); if (tn == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task name for result"); return -1; } if (PyList_Append(result, tn)) { Py_DECREF(tn); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task name to result"); return -1; } Py_DECREF(tn); PyObject* awaited_by = PyList_New(0); if (awaited_by == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list for result"); return -1; } if (PyList_Append(result, awaited_by)) { Py_DECREF(awaited_by); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by to result"); return -1; } Py_DECREF(awaited_by); if (parse_task_awaited_by( - self, running_task_addr, awaited_by, 1) < 0) { + unwinder, running_task_addr, awaited_by, 1) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by for result"); return -1; } @@ -1034,21 +1164,25 @@ process_single_task_node( tn = parse_task_name(unwinder, task_addr); if (tn == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task name in single task node"); goto error; } current_awaited_by = PyList_New(0); if (current_awaited_by == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list in single task node"); goto error; } task_id = PyLong_FromUnsignedLongLong(task_addr); if (task_id == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task ID in single task node"); goto error; } result_item = PyTuple_New(3); if (result_item == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create result tuple in single task node"); goto error; } @@ -1063,6 +1197,7 @@ process_single_task_node( if (PyList_Append(result, result_item)) { Py_DECREF(result_item); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append result item in single task node"); return -1; } Py_DECREF(result_item); @@ -1070,6 +1205,7 @@ process_single_task_node( // Get back current_awaited_by reference for parse_task_awaited_by current_awaited_by = PyTuple_GET_ITEM(result_item, 2); if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by, 0) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by in single task node"); return -1; } @@ -1121,20 +1257,22 @@ get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t c } static int -cache_tlbc_array(RemoteUnwinderObject *self, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation) +cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation) { uintptr_t tlbc_array_ptr; void *tlbc_array = NULL; TLBCCacheEntry *entry = NULL; // Read the TLBC array pointer - if (read_ptr(&self->handle, tlbc_array_addr, &tlbc_array_ptr) != 0 || tlbc_array_ptr == 0) { + if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0 || tlbc_array_ptr == 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer"); return 0; // No TLBC array } // Read the TLBC array size Py_ssize_t tlbc_size; - if (_Py_RemoteDebug_PagedReadRemoteMemory(&self->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0 || tlbc_size <= 0) { + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0 || tlbc_size <= 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size"); return 0; // Invalid size } @@ -1142,11 +1280,13 @@ cache_tlbc_array(RemoteUnwinderObject *self, uintptr_t code_addr, uintptr_t tlbc size_t array_data_size = tlbc_size * sizeof(void*); tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size); if (!tlbc_array) { - return -1; // Memory error + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC array"); + return 0; // Memory error } - if (_Py_RemoteDebug_PagedReadRemoteMemory(&self->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) { + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) { PyMem_RawFree(tlbc_array); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array data"); return 0; // Read error } @@ -1154,7 +1294,8 @@ cache_tlbc_array(RemoteUnwinderObject *self, uintptr_t code_addr, uintptr_t tlbc entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry)); if (!entry) { PyMem_RawFree(tlbc_array); - return -1; // Memory error + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC cache entry"); + return 0; // Memory error } entry->tlbc_array = tlbc_array; @@ -1163,9 +1304,10 @@ cache_tlbc_array(RemoteUnwinderObject *self, uintptr_t code_addr, uintptr_t tlbc // Store in cache void *key = (void *)code_addr; - if (_Py_hashtable_set(self->tlbc_cache, key, entry) < 0) { + if (_Py_hashtable_set(unwinder->tlbc_cache, key, entry) < 0) { tlbc_cache_entry_destroy(entry); - return -1; // Cache error + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to store TLBC entry in cache"); + return 0; // Cache error } return 1; // Success @@ -1304,29 +1446,34 @@ parse_code_object(RemoteUnwinderObject *unwinder, if (_Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read code object"); goto error; } func = read_py_str(unwinder, GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024); if (!func) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read function name from code object"); goto error; } file = read_py_str(unwinder, GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024); if (!file) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read filename from code object"); goto error; } linetable = read_py_bytes(unwinder, GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096); if (!linetable) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read linetable from code object"); goto error; } meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata)); if (!meta) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate cached code metadata"); goto error; } @@ -1338,6 +1485,7 @@ parse_code_object(RemoteUnwinderObject *unwinder, if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) { cached_code_metadata_destroy(meta); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache code metadata"); goto error; } @@ -1363,9 +1511,11 @@ parse_code_object(RemoteUnwinderObject *unwinder, if (!tlbc_entry) { // Cache miss - try to read and cache TLBC array - if (cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation) > 0) { - tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); + if (!cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache TLBC array"); + goto error; } + tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); } if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) { @@ -1400,11 +1550,13 @@ parse_code_object(RemoteUnwinderObject *unwinder, lineno = PyLong_FromLong(info.lineno); if (!lineno) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object"); goto error; } tuple = PyTuple_New(3); if (!tuple) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create result tuple for code object"); goto error; } @@ -1441,7 +1593,7 @@ cleanup_stack_chunks(StackChunkList *chunks) static int process_single_stack_chunk( - proc_handle_t *handle, + RemoteUnwinderObject *unwinder, uintptr_t chunk_addr, StackChunkInfo *chunk_info ) { @@ -1451,11 +1603,13 @@ process_single_stack_chunk( char *this_chunk = PyMem_RawMalloc(current_size); if (!this_chunk) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunk buffer"); return -1; } - if (_Py_RemoteDebug_PagedReadRemoteMemory(handle, chunk_addr, current_size, this_chunk) < 0) { + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, current_size, this_chunk) < 0) { PyMem_RawFree(this_chunk); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read stack chunk"); return -1; } @@ -1465,11 +1619,13 @@ process_single_stack_chunk( this_chunk = PyMem_RawRealloc(this_chunk, actual_size); if (!this_chunk) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to reallocate stack chunk buffer"); return -1; } - if (_Py_RemoteDebug_PagedReadRemoteMemory(handle, chunk_addr, actual_size, this_chunk) < 0) { + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, actual_size, this_chunk) < 0) { PyMem_RawFree(this_chunk); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reread stack chunk with correct size"); return -1; } current_size = actual_size; @@ -1491,13 +1647,15 @@ copy_stack_chunks(RemoteUnwinderObject *unwinder, size_t count = 0; size_t max_chunks = 16; - if (read_ptr(&unwinder->handle, tstate_addr + unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) { + if (read_ptr(unwinder, tstate_addr + unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read initial stack chunk address"); return -1; } chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo)); if (!chunks) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunks array"); return -1; } @@ -1508,13 +1666,15 @@ copy_stack_chunks(RemoteUnwinderObject *unwinder, StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo)); if (!new_chunks) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to grow stack chunks array"); goto error; } chunks = new_chunks; } // Process this chunk - if (process_single_stack_chunk(&unwinder->handle, chunk_addr, &chunks[count]) < 0) { + if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk"); goto error; } @@ -1559,6 +1719,7 @@ parse_frame_from_chunks( ) { void *frame_ptr = find_frame_in_chunks(chunks, address); if (!frame_ptr) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame not found in stack chunks"); return -1; } @@ -1607,11 +1768,13 @@ populate_initial_state_data( sizeof(void*), &address_of_interpreter_state); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state address"); return -1; } if (address_of_interpreter_state == 0) { PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL"); return -1; } @@ -1630,6 +1793,7 @@ populate_initial_state_data( address_of_thread, sizeof(void*), tstate) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state address"); return -1; } @@ -1652,11 +1816,13 @@ find_running_frame( sizeof(void*), &address_of_interpreter_state); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state for running frame"); return -1; } if (address_of_interpreter_state == 0) { PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL in running frame search"); return -1; } @@ -1668,16 +1834,18 @@ find_running_frame( sizeof(void*), &address_of_thread); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread address for running frame"); return -1; } // No Python frames are available for us (can happen at tear-down). if ((void*)address_of_thread != NULL) { int err = read_ptr( - &unwinder->handle, + unwinder, address_of_thread + unwinder->debug_offsets.thread_state.current_frame, frame); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read current frame pointer"); return -1; } return 0; @@ -1704,11 +1872,13 @@ find_running_task( sizeof(void*), &address_of_interpreter_state); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state for running task"); return -1; } if (address_of_interpreter_state == 0) { PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL in running task search"); return -1; } @@ -1720,6 +1890,7 @@ find_running_task( sizeof(void*), &address_of_thread); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread head for running task"); return -1; } @@ -1730,11 +1901,12 @@ find_running_task( } bytes_read = read_py_ptr( - &unwinder->handle, + unwinder, address_of_thread + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop, &address_of_running_loop); if (bytes_read == -1) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running loop address"); return -1; } @@ -1744,11 +1916,12 @@ find_running_task( } int err = read_ptr( - &unwinder->handle, + unwinder, address_of_thread + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task, running_task_addr); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task address"); return -1; } @@ -1757,47 +1930,51 @@ find_running_task( static int find_running_task_and_coro( - RemoteUnwinderObject *self, + RemoteUnwinderObject *unwinder, uintptr_t *running_task_addr, uintptr_t *running_coro_addr, uintptr_t *running_task_code_obj ) { *running_task_addr = (uintptr_t)NULL; if (find_running_task( - self, running_task_addr) < 0) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running task"); + unwinder, running_task_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task search failed"); return -1; } if ((void*)*running_task_addr == NULL) { PyErr_SetString(PyExc_RuntimeError, "No running task found"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task address is NULL"); return -1; } if (read_py_ptr( - &self->handle, - *running_task_addr + self->async_debug_offsets.asyncio_task_object.task_coro, + unwinder, + *running_task_addr + unwinder->async_debug_offsets.asyncio_task_object.task_coro, running_coro_addr) < 0) { - chain_exceptions(PyExc_RuntimeError, "Failed to read running task coro"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro read failed"); return -1; } if ((void*)*running_coro_addr == NULL) { PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro address is NULL"); return -1; } // note: genobject's gi_iframe is an embedded struct so the address to // the offset leads directly to its first field: f_executable if (read_py_ptr( - &self->handle, - *running_coro_addr + self->debug_offsets.gen_object.gi_iframe, + unwinder, + *running_coro_addr + unwinder->debug_offsets.gen_object.gi_iframe, running_task_code_obj) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task code object"); return -1; } if ((void*)*running_task_code_obj == NULL) { PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task code object address is NULL"); return -1; } @@ -1825,6 +2002,7 @@ parse_frame_object( frame ); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame"); return -1; } @@ -1870,6 +2048,7 @@ parse_async_frame_object( frame ); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read async frame"); return -1; } @@ -1884,6 +2063,7 @@ parse_async_frame_object( && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) { PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner)); + set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame"); return -1; } @@ -1908,6 +2088,7 @@ parse_async_frame_object( if (parse_code_object( unwinder, result, *code_object, instruction_pointer, previous_frame, tlbc_index)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse code object in async frame"); return -1; } @@ -1916,13 +2097,13 @@ parse_async_frame_object( static int parse_async_frame_chain( - RemoteUnwinderObject *self, + RemoteUnwinderObject *unwinder, PyObject *calls, uintptr_t running_task_code_obj ) { uintptr_t address_of_current_frame; - if (find_running_frame(self, self->runtime_start_address, &address_of_current_frame) < 0) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running frame"); + if (find_running_frame(unwinder, unwinder->runtime_start_address, &address_of_current_frame) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Running frame search failed in async chain"); return -1; } @@ -1930,7 +2111,7 @@ parse_async_frame_chain( while ((void*)address_of_current_frame != NULL) { PyObject* frame_info = NULL; int res = parse_async_frame_object( - self, + unwinder, &frame_info, address_of_current_frame, &address_of_current_frame, @@ -1938,7 +2119,7 @@ parse_async_frame_chain( ); if (res < 0) { - chain_exceptions(PyExc_RuntimeError, "Failed to parse async frame object"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Async frame object parsing failed in chain"); return -1; } @@ -1948,6 +2129,7 @@ parse_async_frame_chain( if (PyList_Append(calls, frame_info) == -1) { Py_DECREF(frame_info); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame info to async chain"); return -1; } @@ -1975,6 +2157,7 @@ append_awaited_by_for_thread( if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, head_addr, sizeof(task_node), task_node) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task node head"); return -1; } @@ -1984,12 +2167,14 @@ append_awaited_by_for_thread( while (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) != head_addr) { if (++iteration_count > MAX_ITERATIONS) { PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Task list iteration limit exceeded"); return -1; } if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) { PyErr_SetString(PyExc_RuntimeError, "Invalid linked list structure reading remote memory"); + set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list"); return -1; } @@ -1997,6 +2182,7 @@ append_awaited_by_for_thread( - unwinder->async_debug_offsets.asyncio_task_object.task_node; if (process_single_task_node(unwinder, task_addr, result) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process task node in awaited_by"); return -1; } @@ -2006,6 +2192,7 @@ append_awaited_by_for_thread( (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next), sizeof(task_node), task_node) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by"); return -1; } } @@ -2022,12 +2209,14 @@ append_awaited_by( { PyObject *tid_py = PyLong_FromUnsignedLong(tid); if (tid_py == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID object"); return -1; } PyObject *result_item = PyTuple_New(2); if (result_item == NULL) { Py_DECREF(tid_py); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by result tuple"); return -1; } @@ -2035,6 +2224,7 @@ append_awaited_by( if (awaited_by_for_thread == NULL) { Py_DECREF(tid_py); Py_DECREF(result_item); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by thread list"); return -1; } @@ -2042,12 +2232,14 @@ append_awaited_by( PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref if (PyList_Append(result, result_item)) { Py_DECREF(result_item); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by result item"); return -1; } Py_DECREF(result_item); if (append_awaited_by_for_thread(unwinder, head_addr, awaited_by_for_thread)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by for thread"); return -1; } @@ -2076,6 +2268,7 @@ process_frame_chain( if (++frame_count > MAX_FRAMES) { PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded"); return -1; } @@ -2083,6 +2276,7 @@ process_frame_chain( if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, chunks) < 0) { PyErr_Clear(); if (parse_frame_object(unwinder, &frame, frame_addr, &next_frame_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in chain"); return -1; } } @@ -2096,11 +2290,13 @@ process_frame_chain( "Broken frame chain: expected frame at 0x%lx, got 0x%lx", prev_frame_addr, frame_addr); Py_DECREF(frame); + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain consistency check failed"); return -1; } if (PyList_Append(frame_info, frame) == -1) { Py_DECREF(frame); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to frame info list"); return -1; } Py_DECREF(frame); @@ -2126,6 +2322,7 @@ unwind_stack_for_thread( int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, *current_tstate, unwinder->debug_offsets.thread_state.size, ts); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread state"); goto error; } @@ -2133,14 +2330,17 @@ unwind_stack_for_thread( frame_info = PyList_New(0); if (!frame_info) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create frame info list"); goto error; } if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to copy stack chunks"); goto error; } if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; } @@ -2149,11 +2349,13 @@ unwind_stack_for_thread( thread_id = PyLong_FromLongLong( GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id)); if (thread_id == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID"); goto error; } result = PyTuple_New(2); if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create thread unwind result tuple"); goto error; } @@ -2186,6 +2388,7 @@ _remote_debugging.RemoteUnwinder.__init__ pid: int * all_threads: bool = False + debug: bool = False Initialize a new RemoteUnwinder object for debugging a remote Python process. @@ -2193,6 +2396,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process. pid: Process ID of the target Python process to debug all_threads: If True, initialize state for all threads in the process. If False, only initialize for the main thread. + debug: If True, chain exceptions to explain the sequence of events that + lead to the exception. The RemoteUnwinder provides functionality to inspect and debug a running Python process, including examining thread states, stack frames and other runtime data. @@ -2205,15 +2410,19 @@ process, including examining thread states, stack frames and other runtime data. static int _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, - int pid, int all_threads) -/*[clinic end generated code: output=b8027cb247092081 input=6a2056b04e6f050e]*/ + int pid, int all_threads, + int debug) +/*[clinic end generated code: output=3982f2a7eba49334 input=48a762566b828e91]*/ { + self->debug = debug; if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle"); return -1; } self->runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&self->handle); if (self->runtime_start_address == 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to get Python runtime address"); return -1; } @@ -2221,6 +2430,13 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, &self->runtime_start_address, &self->debug_offsets) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read debug offsets"); + return -1; + } + + // Validate that the debug offsets are valid + if(validate_debug_offsets(&self->debug_offsets) == -1) { + set_exception_cause(self, PyExc_RuntimeError, "Invalid debug offsets found"); return -1; } @@ -2235,6 +2451,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, if (populate_initial_state_data(all_threads, self, self->runtime_start_address, &self->interpreter_addr ,&self->tstate_addr) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to populate initial state data"); return -1; } @@ -2247,6 +2464,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, ); if (self->code_object_cache == NULL) { PyErr_NoMemory(); + set_exception_cause(self, PyExc_MemoryError, "Failed to create code object cache"); return -1; } @@ -2263,6 +2481,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, if (self->tlbc_cache == NULL) { _Py_hashtable_destroy(self->code_object_cache); PyErr_NoMemory(); + set_exception_cause(self, PyExc_MemoryError, "Failed to create TLBC cache"); return -1; } #endif @@ -2314,6 +2533,7 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self self->interpreter_addr, INTERP_STATE_BUFFER_SIZE, interp_state_buffer) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read interpreter state buffer"); goto exit; } @@ -2347,6 +2567,7 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self result = PyList_New(0); if (!result) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create stack trace result list"); goto exit; } @@ -2354,12 +2575,14 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate); if (!frame_info) { Py_CLEAR(result); + set_exception_cause(self, PyExc_RuntimeError, "Failed to unwind stack for thread"); goto exit; } if (PyList_Append(result, frame_info) == -1) { Py_DECREF(frame_info); Py_CLEAR(result); + set_exception_cause(self, PyExc_RuntimeError, "Failed to append thread frame info"); goto exit; } Py_DECREF(frame_info); @@ -2425,11 +2648,13 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s { if (!self->async_debug_offsets_available) { PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_all_awaited_by"); return NULL; } PyObject *result = PyList_New(0); if (result == NULL) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create awaited_by result list"); goto result_err; } @@ -2442,6 +2667,7 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s sizeof(void*), &thread_state_addr)) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read main thread state in get_all_awaited_by"); goto result_err; } @@ -2454,6 +2680,7 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s sizeof(tid), &tid)) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read thread ID in get_all_awaited_by"); goto result_err; } @@ -2462,6 +2689,7 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s if (append_awaited_by(self, tid, head_addr, result)) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to append awaited_by for thread in get_all_awaited_by"); goto result_err; } @@ -2471,6 +2699,7 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s sizeof(void*), &thread_state_addr)) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read next thread state in get_all_awaited_by"); goto result_err; } } @@ -2485,6 +2714,7 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s // interesting for debugging. if (append_awaited_by(self, 0, head_addr, result)) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to append interpreter awaited_by in get_all_awaited_by"); goto result_err; } @@ -2530,27 +2760,32 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject { if (!self->async_debug_offsets_available) { PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_async_stack_trace"); return NULL; } PyObject *result = NULL; PyObject *calls = NULL; - if (setup_async_result_structure(&result, &calls) < 0) { + if (setup_async_result_structure(self, &result, &calls) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to setup async result structure"); goto cleanup; } uintptr_t running_task_addr, running_coro_addr, running_task_code_obj; if (find_running_task_and_coro(self, &running_task_addr, &running_coro_addr, &running_task_code_obj) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to find running task and coro"); goto cleanup; } if (parse_async_frame_chain(self, calls, running_task_code_obj) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to parse async frame chain"); goto cleanup; } if (add_task_info_to_result(self, result, running_task_addr) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to add task info to result"); goto cleanup; } @@ -2687,5 +2922,4 @@ PyMODINIT_FUNC PyInit__remote_debugging(void) { return PyModuleDef_Init(&remote_debugging_module); -} - +} \ No newline at end of file diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h index e83e2fd7fd2b5b..5c313a2d66404a 100644 --- a/Modules/clinic/_remote_debugging_module.c.h +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -10,7 +10,7 @@ preserve #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, -"RemoteUnwinder(pid, *, all_threads=False)\n" +"RemoteUnwinder(pid, *, all_threads=False, debug=False)\n" "--\n" "\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" @@ -19,6 +19,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, " pid: Process ID of the target Python process to debug\n" " all_threads: If True, initialize state for all threads in the process.\n" " If False, only initialize for the main thread.\n" +" debug: If True, chain exceptions to explain the sequence of events that\n" +" lead to the exception.\n" "\n" "The RemoteUnwinder provides functionality to inspect and debug a running Python\n" "process, including examining thread states, stack frames and other runtime data.\n" @@ -30,7 +32,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, static int _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, - int pid, int all_threads); + int pid, int all_threads, + int debug); static int _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) @@ -38,7 +41,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int return_value = -1; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -47,7 +50,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), }, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(debug), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -56,19 +59,20 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"pid", "all_threads", NULL}; + static const char * const _keywords[] = {"pid", "all_threads", "debug", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "RemoteUnwinder", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; int pid; int all_threads = 0; + int debug = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -82,12 +86,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje if (!noptargs) { goto skip_optional_kwonly; } - all_threads = PyObject_IsTrue(fastargs[1]); - if (all_threads < 0) { + if (fastargs[1]) { + all_threads = PyObject_IsTrue(fastargs[1]); + if (all_threads < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + debug = PyObject_IsTrue(fastargs[2]); + if (debug < 0) { goto exit; } skip_optional_kwonly: - return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads); + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, debug); exit: return return_value; @@ -240,4 +253,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject return return_value; } -/*[clinic end generated code: output=654772085f1f4bf6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=774ec34aa653402d input=a9049054013a1b77]*/ diff --git a/Python/remote_debug.h b/Python/remote_debug.h index 6566979bcd0e3e..6cbf1c8deaaed9 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -73,6 +73,18 @@ extern "C" { # define HAVE_PROCESS_VM_READV 0 #endif +#define _set_debug_exception_cause(exception, format, ...) \ + do { \ + if (!PyErr_ExceptionMatches(PyExc_PermissionError)) { \ + PyThreadState *tstate = _PyThreadState_GET(); \ + if (!_PyErr_Occurred(tstate)) { \ + _PyErr_Format(tstate, exception, format, ##__VA_ARGS__); \ + } else { \ + _PyErr_FormatFromCause(exception, format, ##__VA_ARGS__); \ + } \ + } \ + } while (0) + static inline size_t get_page_size(void) { size_t page_size = 0; @@ -137,12 +149,17 @@ _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { handle->pid = pid; #if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX handle->task = pid_to_task(handle->pid); + if (handle->task == 0) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to initialize macOS process handle"); + return -1; + } #elif defined(MS_WINDOWS) handle->hProcess = OpenProcess( PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION, FALSE, pid); if (handle->hProcess == NULL) { PyErr_SetFromWindowsErr(0); + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to initialize Windows process handle"); return -1; } #endif @@ -206,8 +223,10 @@ return_section_address64( &object_name ); if (ret != KERN_SUCCESS) { - PyErr_SetString( - PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + PyErr_Format(PyExc_RuntimeError, + "mach_vm_region failed while parsing 64-bit Mach-O binary " + "at base address 0x%lx (kern_return_t: %d)", + base, ret); return 0; } } @@ -227,9 +246,6 @@ return_section_address64( cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize); } - // We should not be here, but if we are there, we should say about this - PyErr_SetString( - PyExc_RuntimeError, "Cannot find section address.\n"); return 0; } @@ -270,8 +286,10 @@ return_section_address32( &object_name ); if (ret != KERN_SUCCESS) { - PyErr_SetString( - PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + PyErr_Format(PyExc_RuntimeError, + "mach_vm_region failed while parsing 32-bit Mach-O binary " + "at base address 0x%lx (kern_return_t: %d)", + base, ret); return 0; } } @@ -291,9 +309,6 @@ return_section_address32( cmd = (struct segment_command*)((void*)cmd + cmd->cmdsize); } - // We should not be here, but if we are there, we should say about this - PyErr_SetString( - PyExc_RuntimeError, "Cannot find section address.\n"); return 0; } @@ -311,8 +326,20 @@ return_section_address_fat( int is_abi64; size_t cpu_size = sizeof(cpu), abi64_size = sizeof(is_abi64); - sysctlbyname("hw.cputype", &cpu, &cpu_size, NULL, 0); - sysctlbyname("hw.cpu64bit_capable", &is_abi64, &abi64_size, NULL, 0); + if (sysctlbyname("hw.cputype", &cpu, &cpu_size, NULL, 0) != 0) { + PyErr_Format(PyExc_OSError, + "Failed to determine CPU type via sysctlbyname " + "for fat binary analysis at 0x%lx: %s", + base, strerror(errno)); + return 0; + } + if (sysctlbyname("hw.cpu64bit_capable", &is_abi64, &abi64_size, NULL, 0) != 0) { + PyErr_Format(PyExc_OSError, + "Failed to determine CPU ABI capability via sysctlbyname " + "for fat binary analysis at 0x%lx: %s", + base, strerror(errno)); + return 0; + } cpu |= is_abi64 * CPU_ARCH_ABI64; @@ -343,13 +370,18 @@ return_section_address_fat( return return_section_address64(section, proc_ref, base, (void*)hdr); default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic in fat binary.\n"); + PyErr_Format(PyExc_RuntimeError, + "Unknown Mach-O magic number 0x%x in fat binary architecture %u at base 0x%lx", + hdr->magic, i, base); return 0; } } } - PyErr_SetString(PyExc_RuntimeError, "No matching architecture found in fat binary.\n"); + PyErr_Format(PyExc_RuntimeError, + "No matching architecture found for CPU type 0x%x " + "in fat binary at base 0x%lx (%u architectures examined)", + cpu, base, nfat_arch); return 0; } @@ -358,20 +390,26 @@ search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_ { int fd = open(path, O_RDONLY); if (fd == -1) { - PyErr_Format(PyExc_RuntimeError, "Cannot open binary %s\n", path); + PyErr_Format(PyExc_OSError, + "Cannot open binary file '%s' for section '%s' search: %s", + path, secname, strerror(errno)); return 0; } struct stat fs; if (fstat(fd, &fs) == -1) { - PyErr_Format(PyExc_RuntimeError, "Cannot get size of binary %s\n", path); + PyErr_Format(PyExc_OSError, + "Cannot get file size for binary '%s' during section '%s' search: %s", + path, secname, strerror(errno)); close(fd); return 0; } void* map = mmap(0, fs.st_size, PROT_READ, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { - PyErr_Format(PyExc_RuntimeError, "Cannot map binary %s\n", path); + PyErr_Format(PyExc_OSError, + "Cannot memory map binary file '%s' (size: %lld bytes) for section '%s' search: %s", + path, (long long)fs.st_size, secname, strerror(errno)); close(fd); return 0; } @@ -393,13 +431,22 @@ search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_ result = return_section_address_fat(secname, proc_ref, base, map); break; default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic"); + PyErr_Format(PyExc_RuntimeError, + "Unrecognized Mach-O magic number 0x%x in binary file '%s' for section '%s' search", + magic, path, secname); break; } - munmap(map, fs.st_size); + if (munmap(map, fs.st_size) != 0) { + PyErr_Format(PyExc_OSError, + "Failed to unmap binary file '%s' (size: %lld bytes): %s", + path, (long long)fs.st_size, strerror(errno)); + result = 0; + } if (close(fd) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Failed to close binary file '%s': %s", + path, strerror(errno)); result = 0; } return result; @@ -414,7 +461,10 @@ pid_to_task(pid_t pid) result = task_for_pid(mach_task_self(), pid, &task); if (result != KERN_SUCCESS) { - PyErr_Format(PyExc_PermissionError, "Cannot get task for PID %d", pid); + PyErr_Format(PyExc_PermissionError, + "Cannot get task port for PID %d (kern_return_t: %d). " + "This typically requires running as root or having the 'com.apple.system-task-ports' entitlement.", + pid, result); return 0; } return task; @@ -431,13 +481,15 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s mach_port_t proc_ref = pid_to_task(handle->pid); if (proc_ref == 0) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_PermissionError, "Cannot get task for PID"); + PyErr_Format(PyExc_PermissionError, + "Cannot get task port for PID %d during section search", + handle->pid); } return 0; } - int match_found = 0; char map_filename[MAXPATHLEN + 1]; + while (mach_vm_region( proc_ref, &address, @@ -447,6 +499,7 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s &count, &object_name) == KERN_SUCCESS) { + if ((region_info.protection & VM_PROT_READ) == 0 || (region_info.protection & VM_PROT_EXECUTE) == 0) { address += size; @@ -467,17 +520,17 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s filename = map_filename; // No path, use the whole string } - if (!match_found && strncmp(filename, substr, strlen(substr)) == 0) { - match_found = 1; - return search_section_in_file( + if (strncmp(filename, substr, strlen(substr)) == 0) { + uintptr_t result = search_section_in_file( secname, map_filename, address, size, proc_ref); + if (result != 0) { + return result; + } } address += size; } - PyErr_SetString(PyExc_RuntimeError, - "mach_vm_region failed to find the section"); return 0; } @@ -500,24 +553,38 @@ search_elf_file_for_section( int fd = open(elf_file, O_RDONLY); if (fd < 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot open ELF file '%s' for section '%s' search: %s", + elf_file, secname, strerror(errno)); goto exit; } struct stat file_stats; if (fstat(fd, &file_stats) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot get file size for ELF file '%s' during section '%s' search: %s", + elf_file, secname, strerror(errno)); goto exit; } file_memory = mmap(NULL, file_stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (file_memory == MAP_FAILED) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot memory map ELF file '%s' (size: %lld bytes) for section '%s' search: %s", + elf_file, (long long)file_stats.st_size, secname, strerror(errno)); goto exit; } Elf_Ehdr* elf_header = (Elf_Ehdr*)file_memory; + // Validate ELF header + if (elf_header->e_shstrndx >= elf_header->e_shnum) { + PyErr_Format(PyExc_RuntimeError, + "Invalid ELF file '%s': string table index %u >= section count %u", + elf_file, elf_header->e_shstrndx, elf_header->e_shnum); + goto exit; + } + Elf_Shdr* section_header_table = (Elf_Shdr*)(file_memory + elf_header->e_shoff); Elf_Shdr* shstrtab_section = §ion_header_table[elf_header->e_shstrndx]; @@ -534,6 +601,10 @@ search_elf_file_for_section( } } + if (section == NULL) { + goto exit; + } + Elf_Phdr* program_header_table = (Elf_Phdr*)(file_memory + elf_header->e_phoff); // Find the first PT_LOAD segment Elf_Phdr* first_load_segment = NULL; @@ -544,18 +615,25 @@ search_elf_file_for_section( } } - if (section != NULL && first_load_segment != NULL) { - uintptr_t elf_load_addr = first_load_segment->p_vaddr - - (first_load_segment->p_vaddr % first_load_segment->p_align); - result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; + if (first_load_segment == NULL) { + PyErr_Format(PyExc_RuntimeError, + "No PT_LOAD segment found in ELF file '%s' (%u program headers examined)", + elf_file, elf_header->e_phnum); + goto exit; } + uintptr_t elf_load_addr = first_load_segment->p_vaddr + - (first_load_segment->p_vaddr % first_load_segment->p_align); + result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; + exit: if (file_memory != NULL) { munmap(file_memory, file_stats.st_size); } if (fd >= 0 && close(fd) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Failed to close ELF file '%s': %s", + elf_file, strerror(errno)); result = 0; } return result; @@ -569,7 +647,9 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c FILE* maps_file = fopen(maps_file_path, "r"); if (maps_file == NULL) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot open process memory map file '%s' for PID %d section search: %s", + maps_file_path, handle->pid, strerror(errno)); return 0; } @@ -578,11 +658,16 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c char *line = PyMem_Malloc(linesz); if (!line) { fclose(maps_file); - PyErr_NoMemory(); + _set_debug_exception_cause(PyExc_MemoryError, + "Cannot allocate memory for reading process map file '%s'", + maps_file_path); return 0; } uintptr_t retval = 0; + int lines_processed = 0; + int matches_found = 0; + while (fgets(line + linelen, linesz - linelen, maps_file) != NULL) { linelen = strlen(line); if (line[linelen - 1] != '\n') { @@ -593,7 +678,9 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c if (!biggerline) { PyMem_Free(line); fclose(maps_file); - PyErr_NoMemory(); + _set_debug_exception_cause(PyExc_MemoryError, + "Cannot reallocate memory while reading process map file '%s' (attempted size: %zu)", + maps_file_path, linesz); return 0; } line = biggerline; @@ -604,6 +691,7 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c line[linelen - 1] = '\0'; // and prepare to read the next line into the start of the buffer. linelen = 0; + lines_processed++; unsigned long start = 0; unsigned long path_pos = 0; @@ -624,6 +712,7 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c } if (strstr(filename, substr)) { + matches_found++; retval = search_elf_file_for_section(handle, secname, start, path); if (retval) { break; @@ -633,7 +722,9 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c PyMem_Free(line); if (fclose(maps_file) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Failed to close process map file '%s': %s", + maps_file_path, strerror(errno)); retval = 0; } @@ -649,11 +740,20 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* HANDLE hFile = CreateFileW(mod_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hFile == INVALID_HANDLE_VALUE) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_OSError, + "Cannot open PE file for section '%s' analysis (error %lu)", + secname, error); return NULL; } + HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, 0); if (!hMap) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_OSError, + "Cannot create file mapping for PE file section '%s' analysis (error %lu)", + secname, error); CloseHandle(hFile); return NULL; } @@ -661,6 +761,10 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* BYTE* mapView = (BYTE*)MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0); if (!mapView) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_OSError, + "Cannot map view of PE file for section '%s' analysis (error %lu)", + secname, error); CloseHandle(hMap); CloseHandle(hFile); return NULL; @@ -668,7 +772,9 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* IMAGE_DOS_HEADER* pDOSHeader = (IMAGE_DOS_HEADER*)mapView; if (pDOSHeader->e_magic != IMAGE_DOS_SIGNATURE) { - PyErr_SetString(PyExc_RuntimeError, "Invalid DOS signature."); + PyErr_Format(PyExc_RuntimeError, + "Invalid DOS signature (0x%x) in PE file for section '%s' analysis (expected 0x%x)", + pDOSHeader->e_magic, secname, IMAGE_DOS_SIGNATURE); UnmapViewOfFile(mapView); CloseHandle(hMap); CloseHandle(hFile); @@ -677,7 +783,9 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* IMAGE_NT_HEADERS* pNTHeaders = (IMAGE_NT_HEADERS*)(mapView + pDOSHeader->e_lfanew); if (pNTHeaders->Signature != IMAGE_NT_SIGNATURE) { - PyErr_SetString(PyExc_RuntimeError, "Invalid NT signature."); + PyErr_Format(PyExc_RuntimeError, + "Invalid NT signature (0x%lx) in PE file for section '%s' analysis (expected 0x%lx)", + pNTHeaders->Signature, secname, IMAGE_NT_SIGNATURE); UnmapViewOfFile(mapView); CloseHandle(hMap); CloseHandle(hFile); @@ -711,17 +819,27 @@ search_windows_map_for_section(proc_handle_t* handle, const char* secname, const } while (hProcSnap == INVALID_HANDLE_VALUE && GetLastError() == ERROR_BAD_LENGTH); if (hProcSnap == INVALID_HANDLE_VALUE) { - PyErr_SetString(PyExc_PermissionError, "Unable to create module snapshot. Check permissions or PID."); + PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_PermissionError, + "Unable to create module snapshot for PID %d section '%s' " + "search (error %lu). Check permissions or PID validity", + handle->pid, secname, error); return 0; } MODULEENTRY32W moduleEntry; moduleEntry.dwSize = sizeof(moduleEntry); void* runtime_addr = NULL; + int modules_examined = 0; + int matches_found = 0; for (BOOL hasModule = Module32FirstW(hProcSnap, &moduleEntry); hasModule; hasModule = Module32NextW(hProcSnap, &moduleEntry)) { + modules_examined++; + // Look for either python executable or DLL if (wcsstr(moduleEntry.szModule, substr)) { + matches_found++; runtime_addr = analyze_pe(moduleEntry.szExePath, moduleEntry.modBaseAddr, secname); if (runtime_addr != NULL) { break; @@ -730,6 +848,7 @@ search_windows_map_for_section(proc_handle_t* handle, const char* secname, const } CloseHandle(hProcSnap); + return (uintptr_t)runtime_addr; } @@ -747,7 +866,9 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) if (address == 0) { // Error out: 'python' substring covers both executable and DLL PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); + PyErr_Format(PyExc_RuntimeError, + "Failed to find the PyRuntime section in process %d on Windows platform", + handle->pid); _PyErr_ChainExceptions1(exc); } #elif defined(__linux__) @@ -756,16 +877,28 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) if (address == 0) { // Error out: 'python' substring covers both executable and DLL PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); + PyErr_Format(PyExc_RuntimeError, + "Failed to find the PyRuntime section in process %d on Linux platform", + handle->pid); _PyErr_ChainExceptions1(exc); } #elif defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX // On macOS, try libpython first, then fall back to python - address = search_map_for_section(handle, "PyRuntime", "libpython"); - if (address == 0) { - // TODO: Differentiate between not found and error + const char* candidates[] = {"libpython", "python", "Python", NULL}; + for (const char** candidate = candidates; *candidate; candidate++) { PyErr_Clear(); - address = search_map_for_section(handle, "PyRuntime", "python"); + address = search_map_for_section(handle, "PyRuntime", *candidate); + if (address != 0) { + break; + } + } + if (address == 0) { + PyObject *exc = PyErr_GetRaisedException(); + PyErr_Format(PyExc_RuntimeError, + "Failed to find the PyRuntime section in process %d " + "on macOS platform (tried both libpython and python)", + handle->pid); + _PyErr_ChainExceptions1(exc); } #else Py_UNREACHABLE(); @@ -784,6 +917,11 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address do { if (!ReadProcessMemory(handle->hProcess, (LPCVOID)(remote_address + result), (char*)dst + result, len - result, &read_bytes)) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + _set_debug_exception_cause(PyExc_OSError, + "ReadProcessMemory failed for PID %d at address 0x%lx " + "(size %zu, partial read %zu bytes): Windows error %lu", + handle->pid, remote_address + result, len - result, result, error); return -1; } result += read_bytes; @@ -804,6 +942,10 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address read_bytes = process_vm_readv(handle->pid, local, 1, remote, 1, 0); if (read_bytes < 0) { PyErr_SetFromErrno(PyExc_OSError); + _set_debug_exception_cause(PyExc_OSError, + "process_vm_readv failed for PID %d at address 0x%lx " + "(size %zu, partial read %zd bytes): %s", + handle->pid, remote_address + result, len - result, result, strerror(errno)); return -1; } @@ -822,13 +964,22 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address if (kr != KERN_SUCCESS) { switch (kr) { case KERN_PROTECTION_FAILURE: - PyErr_SetString(PyExc_PermissionError, "Not enough permissions to read memory"); + PyErr_Format(PyExc_PermissionError, + "Memory protection failure reading from PID %d at address " + "0x%lx (size %zu): insufficient permissions", + handle->pid, remote_address, len); break; case KERN_INVALID_ARGUMENT: - PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_read_overwrite"); + PyErr_Format(PyExc_ValueError, + "Invalid argument to mach_vm_read_overwrite for PID %d at " + "address 0x%lx (size %zu)", + handle->pid, remote_address, len); break; default: - PyErr_SetString(PyExc_RuntimeError, "Unknown error reading memory"); + PyErr_Format(PyExc_RuntimeError, + "mach_vm_read_overwrite failed for PID %d at address 0x%lx " + "(size %zu): kern_return_t %d", + handle->pid, remote_address, len, kr); } return -1; } @@ -868,7 +1019,10 @@ _Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle, if (entry->data == NULL) { entry->data = PyMem_RawMalloc(page_size); if (entry->data == NULL) { - PyErr_NoMemory(); + _set_debug_exception_cause(PyExc_MemoryError, + "Cannot allocate %zu bytes for page cache entry " + "during read from PID %d at address 0x%lx", + page_size, handle->pid, addr); return -1; } } @@ -900,13 +1054,16 @@ _Py_RemoteDebug_ReadDebugOffsets( *runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); if (!*runtime_start_address) { if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get PyRuntime address"); + PyErr_Format(PyExc_RuntimeError, + "Failed to locate PyRuntime address for PID %d", + handle->pid); } + _set_debug_exception_cause(PyExc_RuntimeError, "PyRuntime address lookup failed during debug offsets initialization"); return -1; } size_t size = sizeof(struct _Py_DebugOffsets); if (0 != _Py_RemoteDebug_ReadRemoteMemory(handle, *runtime_start_address, size, debug_offsets)) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to read debug offsets structure from remote process"); return -1; } return 0; From ce0561fa591f0237d3c0157c597d3c6e36bf48df Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Mon, 26 May 2025 15:36:30 +0100 Subject: [PATCH 354/989] Add CODEOWNERS for remote debugging (#134720) --- .github/CODEOWNERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 88b957669826f7..b6bf1700befa7a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -331,3 +331,8 @@ Modules/_xxtestfuzz/ @ammaraskar **/*templateobject* @lysnikolaou **/*templatelib* @lysnikolaou **/*tstring* @lysnikolaou + +# Remote debugging +Python/remote_debug.h @pablogsal +Python/remote_debugging.c @pablogsal +Modules/_remote_debugging_module.c @pablogsal @ambv @1st1 \ No newline at end of file From 7774869b1b5acc375a12293d2ccc39f606c064e6 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Mon, 26 May 2025 15:47:09 +0100 Subject: [PATCH 355/989] Add trailing line to CODEOWNERS (#134722) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b6bf1700befa7a..775d9c63260c83 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -335,4 +335,4 @@ Modules/_xxtestfuzz/ @ammaraskar # Remote debugging Python/remote_debug.h @pablogsal Python/remote_debugging.c @pablogsal -Modules/_remote_debugging_module.c @pablogsal @ambv @1st1 \ No newline at end of file +Modules/_remote_debugging_module.c @pablogsal @ambv @1st1 From f2ce4bbdfdfa2b658fbeef66f414be2ecf7981dd Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Mon, 26 May 2025 15:48:41 +0100 Subject: [PATCH 356/989] gh-132983: Convert dict_content to take Py_buffer in ``ZstdDict()`` (#133924) --- Modules/_zstd/clinic/zstddict.c.h | 43 +++++++++++++-- Modules/_zstd/compressor.c | 17 ++---- Modules/_zstd/decompressor.c | 16 ++---- Modules/_zstd/zstddict.c | 88 ++++++++++++++++--------------- Modules/_zstd/zstddict.h | 6 ++- 5 files changed, 97 insertions(+), 73 deletions(-) diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h index 810befdaf71f44..79db85405d6e6b 100644 --- a/Modules/_zstd/clinic/zstddict.c.h +++ b/Modules/_zstd/clinic/zstddict.c.h @@ -25,7 +25,7 @@ PyDoc_STRVAR(_zstd_ZstdDict_new__doc__, "by multiple ZstdCompressor or ZstdDecompressor objects."); static PyObject * -_zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, int is_raw); static PyObject * @@ -63,7 +63,7 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; - PyObject *dict_content; + Py_buffer dict_content = {NULL, NULL}; int is_raw = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, @@ -71,7 +71,9 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) if (!fastargs) { goto exit; } - dict_content = fastargs[0]; + if (PyObject_GetBuffer(fastargs[0], &dict_content, PyBUF_SIMPLE) != 0) { + goto exit; + } if (!noptargs) { goto skip_optional_kwonly; } @@ -80,12 +82,43 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto exit; } skip_optional_kwonly: - return_value = _zstd_ZstdDict_new_impl(type, dict_content, is_raw); + return_value = _zstd_ZstdDict_new_impl(type, &dict_content, is_raw); exit: + /* Cleanup for dict_content */ + if (dict_content.obj) { + PyBuffer_Release(&dict_content); + } + return return_value; } +PyDoc_STRVAR(_zstd_ZstdDict_dict_content__doc__, +"The content of a Zstandard dictionary, as a bytes object."); +#if defined(_zstd_ZstdDict_dict_content_DOCSTR) +# undef _zstd_ZstdDict_dict_content_DOCSTR +#endif +#define _zstd_ZstdDict_dict_content_DOCSTR _zstd_ZstdDict_dict_content__doc__ + +#if !defined(_zstd_ZstdDict_dict_content_DOCSTR) +# define _zstd_ZstdDict_dict_content_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, (setter)_zstd_ZstdDict_dict_content_set, _zstd_ZstdDict_dict_content_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, NULL, _zstd_ZstdDict_dict_content_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_dict_content_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDict_dict_content_get_impl((ZstdDict *)self); +} + PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__, "Load as a digested dictionary to compressor.\n" "\n" @@ -189,4 +222,4 @@ _zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) { return _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); } -/*[clinic end generated code: output=47b12b5848b53ed8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=4696cbc722e5fdfc input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 31cb8c535c05a6..7f0558909b4422 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -173,11 +173,8 @@ _get_CDict(ZstdDict *self, int compressionLevel) } if (capsule == NULL) { /* Create ZSTD_CDict instance */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); Py_BEGIN_ALLOW_THREADS - cdict = ZSTD_createCDict(dict_buffer, - dict_len, + cdict = ZSTD_createCDict(self->dict_buffer, self->dict_len, compressionLevel); Py_END_ALLOW_THREADS @@ -236,17 +233,13 @@ _zstd_load_impl(ZstdCompressor *self, ZstdDict *zd, else if (type == DICT_TYPE_UNDIGESTED) { /* Load a dictionary. It doesn't override compression context's parameters. */ - zstd_ret = ZSTD_CCtx_loadDictionary( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); + zstd_ret = ZSTD_CCtx_loadDictionary(self->cctx, zd->dict_buffer, + zd->dict_len); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ - zstd_ret = ZSTD_CCtx_refPrefix( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); + zstd_ret = ZSTD_CCtx_refPrefix(self->cctx, zd->dict_buffer, + zd->dict_len); } else { Py_UNREACHABLE(); diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index d084f0847c72dd..015cb774ed2d76 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -68,10 +68,8 @@ _get_DDict(ZstdDict *self) if (self->d_dict == NULL) { /* Create ZSTD_DDict instance from dictionary content */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); Py_BEGIN_ALLOW_THREADS - ret = ZSTD_createDDict(dict_buffer, dict_len); + ret = ZSTD_createDDict(self->dict_buffer, self->dict_len); Py_END_ALLOW_THREADS self->d_dict = ret; @@ -160,17 +158,13 @@ _zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd, } else if (type == DICT_TYPE_UNDIGESTED) { /* Load a dictionary */ - zstd_ret = ZSTD_DCtx_loadDictionary( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); + zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer, + zd->dict_len); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ - zstd_ret = ZSTD_DCtx_refPrefix( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); + zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer, + zd->dict_len); } else { /* Impossible code path */ diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index e3e9e5d064515f..afc58b42e893d3 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -26,7 +26,7 @@ class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" /*[clinic input] @classmethod _zstd.ZstdDict.__new__ as _zstd_ZstdDict_new - dict_content: object + dict_content: Py_buffer The content of a Zstandard dictionary as a bytes-like object. / * @@ -42,17 +42,25 @@ by multiple ZstdCompressor or ZstdDecompressor objects. [clinic start generated code]*/ static PyObject * -_zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, int is_raw) -/*[clinic end generated code: output=3ebff839cb3be6d7 input=6b5de413869ae878]*/ +/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/ { + /* All dictionaries must be at least 8 bytes */ + if (dict_content->len < 8) { + PyErr_SetString(PyExc_ValueError, + "Zstandard dictionary content too short " + "(must have at least eight bytes)"); + return NULL; + } + ZstdDict* self = PyObject_GC_New(ZstdDict, type); if (self == NULL) { - goto error; + return NULL; } - self->dict_content = NULL; self->d_dict = NULL; + self->dict_buffer = NULL; self->dict_id = 0; self->lock = (PyMutex){0}; @@ -62,39 +70,26 @@ _zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, goto error; } - /* Check dict_content's type */ - self->dict_content = PyBytes_FromObject(dict_content); - if (self->dict_content == NULL) { - PyErr_SetString(PyExc_TypeError, - "dict_content argument should be bytes-like object."); - goto error; - } - - /* Both ordinary dictionary and "raw content" dictionary should - at least 8 bytes */ - if (Py_SIZE(self->dict_content) < 8) { - PyErr_SetString(PyExc_ValueError, - "Zstandard dictionary content should at least " - "8 bytes."); + self->dict_buffer = PyMem_Malloc(dict_content->len); + if (!self->dict_buffer) { + PyErr_NoMemory(); goto error; } + memcpy(self->dict_buffer, dict_content->buf, dict_content->len); + self->dict_len = dict_content->len; /* Get dict_id, 0 means "raw content" dictionary. */ - self->dict_id = ZSTD_getDictID_fromDict( - PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content)); + self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len); /* Check validity for ordinary dictionary */ if (!is_raw && self->dict_id == 0) { - char *msg = "Invalid Zstandard dictionary and is_raw not set.\n"; - PyErr_SetString(PyExc_ValueError, msg); + PyErr_SetString(PyExc_ValueError, "invalid Zstandard dictionary"); goto error; } - // Can only track self once self->dict_content is included PyObject_GC_Track(self); - return (PyObject*)self; + return (PyObject *)self; error: Py_XDECREF(self); @@ -115,12 +110,12 @@ ZstdDict_dealloc(PyObject *ob) assert(!PyMutex_IsLocked(&self->lock)); - /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ - Py_CLEAR(self->dict_content); + /* Release dict_buffer after freeing ZSTD_CDict/ZSTD_DDict instances */ + PyMem_Free(self->dict_buffer); Py_CLEAR(self->c_dicts); PyTypeObject *tp = Py_TYPE(self); - PyObject_GC_Del(ob); + tp->tp_free(self); Py_DECREF(tp); } @@ -131,25 +126,33 @@ PyDoc_STRVAR(ZstdDict_dictid_doc, "The special value '0' means a 'raw content' dictionary," "without any restrictions on format or content."); -PyDoc_STRVAR(ZstdDict_dictcontent_doc, -"The content of a Zstandard dictionary, as a bytes object."); - static PyObject * -ZstdDict_str(PyObject *ob) +ZstdDict_repr(PyObject *ob) { ZstdDict *dict = ZstdDict_CAST(ob); return PyUnicode_FromFormat("", - dict->dict_id, Py_SIZE(dict->dict_content)); + (unsigned int)dict->dict_id, dict->dict_len); } static PyMemberDef ZstdDict_members[] = { - {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, - ZstdDict_dictid_doc}, - {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), - Py_READONLY, ZstdDict_dictcontent_doc}, + {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, {NULL} }; +/*[clinic input] +@getter +_zstd.ZstdDict.dict_content + +The content of a Zstandard dictionary, as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self) +/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/ +{ + return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len); +} + /*[clinic input] @getter _zstd.ZstdDict.as_digested_dict @@ -219,6 +222,7 @@ _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) } static PyGetSetDef ZstdDict_getset[] = { + _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF @@ -229,8 +233,7 @@ static Py_ssize_t ZstdDict_length(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - assert(PyBytes_Check(self->dict_content)); - return Py_SIZE(self->dict_content); + return self->dict_len; } static int @@ -238,7 +241,6 @@ ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg) { ZstdDict *self = ZstdDict_CAST(ob); Py_VISIT(self->c_dicts); - Py_VISIT(self->dict_content); return 0; } @@ -246,7 +248,7 @@ static int ZstdDict_clear(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - Py_CLEAR(self->dict_content); + Py_CLEAR(self->c_dicts); return 0; } @@ -255,7 +257,7 @@ static PyType_Slot zstddict_slots[] = { {Py_tp_getset, ZstdDict_getset}, {Py_tp_new, _zstd_ZstdDict_new}, {Py_tp_dealloc, ZstdDict_dealloc}, - {Py_tp_str, ZstdDict_str}, + {Py_tp_repr, ZstdDict_repr}, {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__}, {Py_sq_length, ZstdDict_length}, {Py_tp_traverse, ZstdDict_traverse}, diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h index dcba0f21852087..4a403416dbd4a3 100644 --- a/Modules/_zstd/zstddict.h +++ b/Modules/_zstd/zstddict.h @@ -15,8 +15,10 @@ typedef struct { ZSTD_DDict *d_dict; PyObject *c_dicts; - /* Content of the dictionary, bytes object. */ - PyObject *dict_content; + /* Dictionary content. */ + char *dict_buffer; + Py_ssize_t dict_len; + /* Dictionary id */ uint32_t dict_id; From 806107d7a2fa9baa76d4025f46fab2c8725963f4 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 26 May 2025 18:06:33 +0300 Subject: [PATCH 357/989] gh-134693: Fix `[-Wmaybe-uninitialized]` warning in `_remote_debugging_module.c` (#134694) --- Modules/_remote_debugging_module.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 6c824f9d8d22f4..86e269f31246e5 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -2054,6 +2054,14 @@ parse_async_frame_object( *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); + *code_object = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); + // Strip tag bits for consistent comparison + *code_object &= ~Py_TAG_BITS; + assert(code_object != NULL); + if ((void*)*code_object == NULL) { + return 0; + } + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK || GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { return 0; // C frame @@ -2067,15 +2075,6 @@ parse_async_frame_object( return -1; } - *code_object = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); - // Strip tag bits for consistent comparison - *code_object &= ~Py_TAG_BITS; - - assert(code_object != NULL); - if ((void*)*code_object == NULL) { - return 0; - } - uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); // Get tlbc_index for free threading builds From 7291eaba8b20b19e7971d43d88286d5820e5eb56 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Mon, 26 May 2025 08:35:04 -0700 Subject: [PATCH 358/989] gh-119180: Updates to PEP 649/749 docs (#134640) - Mention (again) that `type.__annotations__` is unsafe. It is now safe when using only classes defined under PEP 649 semantics, but not with classes defined using `from __future__ import annotations`. - Mention that annotations on instances no longer work. There was already an issue about this. - Mention the general changes in the "Porting to Python 3.14" section. - `annotationlib` was proposed by PEP-749, not PEP-649. Co-authored-by: Emma Smith Co-authored-by: Carol Willing --- Doc/reference/datamodel.rst | 14 +++++++++++++- Doc/whatsnew/3.14.rst | 11 ++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 005a768f684e2c..32a2e266262c52 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1228,10 +1228,22 @@ Special attributes :attr:`__annotations__ attributes `. For best practices on working with :attr:`~object.__annotations__`, - please see :mod:`annotationlib`. Where possible, use + please see :mod:`annotationlib`. Use :func:`annotationlib.get_annotations` instead of accessing this attribute directly. + .. warning:: + + Accessing the :attr:`!__annotations__` attribute directly + on a class object may return annotations for the wrong class, specifically + in certain cases where the class, its base class, or a metaclass + is defined under ``from __future__ import annotations``. + See :pep:`749 <749#pep749-metaclasses>` for details. + + This attribute does not exist on certain builtin classes. On + user-defined classes without ``__annotations__``, it is an + empty dictionary. + .. versionchanged:: 3.14 Annotations are now :ref:`lazily evaluated `. See :pep:`649`. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 88e52015bdc2b1..561d1a8914b50c 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -74,7 +74,7 @@ deferred evaluation of annotations (:pep:`649`), and a new type of interpreter that uses tail calls. The library changes include the addition of a new :mod:`!annotationlib` module -for introspecting and wrapping annotations (:pep:`649`), +for introspecting and wrapping annotations (:pep:`749`), a new :mod:`!compression.zstd` module for Zstandard support (:pep:`784`), plus syntax highlighting in the REPL, as well as the usual deprecations and removals, @@ -444,6 +444,10 @@ In particular, do not read annotations directly from the namespace dictionary attribute of type objects. Use :func:`annotationlib.get_annotate_from_class_namespace` during class construction and :func:`annotationlib.get_annotations` afterwards. +In previous releases, it was sometimes possible to access class annotations from +an instance of an annotated class. This behavior was undocumented and accidental, +and will no longer work in Python 3.14. + ``from __future__ import annotations`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2501,6 +2505,11 @@ Changes in the Python API See :ref:`above ` for more details. (Contributed by Jelle Zijlstra in :gh:`105499`.) +* The runtime behavior of annotations has changed in various ways; see + :ref:`above ` for details. While most code that interacts + with annotations should continue to work, some undocumented details may behave + differently. + Build changes ============= From b8a885ce6318c5f1e916e19e2961434d096c5cef Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 26 May 2025 20:42:38 +0300 Subject: [PATCH 359/989] Add CODEOWNERS and YAML to end-of-file-fixer and trailing-whitespace (#134730) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- .github/workflows/tail-call.yml | 1 - .pre-commit-config.yaml | 8 +++++--- .readthedocs.yml | 1 - 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tail-call.yml b/.github/workflows/tail-call.yml index 4636372e26c41b..e32cbf0aaa3c3e 100644 --- a/.github/workflows/tail-call.yml +++ b/.github/workflows/tail-call.yml @@ -137,4 +137,3 @@ jobs: CC=clang-20 ./configure --with-tail-call-interp --disable-gil make all --jobs 4 ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3632cf392039f5..822a8a9f4e5076 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,12 +43,14 @@ repos: exclude: ^Lib/test/test_tomllib/ - id: check-yaml - id: end-of-file-fixer - types: [python] + types_or: [python, yaml] exclude: Lib/test/tokenizedata/coding20731.py + - id: end-of-file-fixer + files: '^\.github/CODEOWNERS$' - id: trailing-whitespace - types_or: [c, inc, python, rst] + types_or: [c, inc, python, rst, yaml] - id: trailing-whitespace - files: '\.(gram)$' + files: '^\.github/CODEOWNERS|\.(gram)$' - repo: https://github.com/python-jsonschema/check-jsonschema rev: 0.33.0 diff --git a/.readthedocs.yml b/.readthedocs.yml index a57de00544e4e3..0a2c3f8345367f 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -32,4 +32,3 @@ build: - make -C Doc venv html - mkdir _readthedocs - mv Doc/build/html _readthedocs/html - From c60f39ada625562bff26400f304690c19fe9f504 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Tue, 27 May 2025 02:46:49 +0900 Subject: [PATCH 360/989] gh-134557: Suppress immortalization in _PyCode_GetScriptXIData under free-threading (gh-134686) Disable immortalization around Py_CompileString*(). The same approach as 332356b that fixed the refleaks in compile() and eval(). E: 09e72cf can pass test_capi, test_sys and test__interpchannels with this patch for me. --- Python/crossinterp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 26eecdddf4bdd0..13d91c508c41fa 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -908,8 +908,15 @@ get_script_xidata(PyThreadState *tstate, PyObject *obj, int pure, } goto error; } +#ifdef Py_GIL_DISABLED + // Don't immortalize code constants to avoid memory leaks. + ((_PyThreadStateImpl *)tstate)->suppress_co_const_immortalization++; +#endif code = Py_CompileStringExFlags( script, filename, Py_file_input, &cf, optimize); +#ifdef Py_GIL_DISABLED + ((_PyThreadStateImpl *)tstate)->suppress_co_const_immortalization--; +#endif Py_XDECREF(ref); if (code == NULL) { goto error; From 08c78e02fab4a1c9c075637422d621f9c740959a Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Mon, 26 May 2025 13:49:39 -0400 Subject: [PATCH 361/989] gh-134675: Add t-string prefixes to tokenizer module, lexical analysis doc, and add a test to make sure we catch this error in the future. (#134734) * Add t-string prefixes to _all_string_prefixes, and add a test to make sure we catch this error in the future. * Update lexical analysis docs for t-string prefixes. --- Doc/reference/lexical_analysis.rst | 3 +- Lib/test/test_tokenize.py | 56 ++++++++++++++++++++++++++++++ Lib/tokenize.py | 2 +- 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index 6c4a4ea81afe29..b22eb4db7945d1 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -489,8 +489,9 @@ String literals are described by the following lexical definitions: .. productionlist:: python-grammar stringliteral: [`stringprefix`](`shortstring` | `longstring`) - stringprefix: "r" | "u" | "R" | "U" | "f" | "F" + stringprefix: "r" | "u" | "R" | "U" | "f" | "F" | "t" | "T" : | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" + : | "tr" | "Tr" | "tR" | "TR" | "rt" | "rT" | "Rt" | "RT" shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"' longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""' shortstringitem: `shortstringchar` | `stringescapeseq` diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index e6b19fe1812d44..d4b51841891b28 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,6 +1,8 @@ import contextlib +import itertools import os import re +import string import tempfile import token import tokenize @@ -3238,5 +3240,59 @@ def test_exact_flag(self): self.check_output(source, expect, flag) +class StringPrefixTest(unittest.TestCase): + def test_prefixes(self): + # Get the list of defined string prefixes. I don't see an + # obvious documented way of doing this, but probably the best + # thing is to split apart tokenize.StringPrefix. + + # Make sure StringPrefix begins and ends in parens. + self.assertEqual(tokenize.StringPrefix[0], '(') + self.assertEqual(tokenize.StringPrefix[-1], ')') + + # Then split apart everything else by '|'. + defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|')) + + # Now compute the actual string prefixes, by exec-ing all + # valid prefix combinations, followed by an empty string. + + # Try all prefix lengths until we find a length that has zero + # valid prefixes. This will miss the case where for example + # there are no valid 3 character prefixes, but there are valid + # 4 character prefixes. That seems extremely unlikely. + + # Note that the empty prefix is being included, because length + # starts at 0. That's expected, since StringPrefix includes + # the empty prefix. + + valid_prefixes = set() + for length in itertools.count(): + num_at_this_length = 0 + for prefix in ( + "".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length)) + ): + for t in itertools.permutations(prefix): + for u in itertools.product(*[(c, c.upper()) for c in t]): + p = ''.join(u) + if p == "not": + # 'not' can never be a string prefix, + # because it's a valid expression: not "" + continue + try: + eval(f'{p}""') + + # No syntax error, so p is a valid string + # prefix. + + valid_prefixes.add(p) + num_at_this_length += 1 + except SyntaxError: + pass + if num_at_this_length == 0: + break + + self.assertEqual(defined_prefixes, valid_prefixes) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 559a7aecbde2d1..7e71755068e1df 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -86,7 +86,7 @@ def _all_string_prefixes(): # The valid string prefixes. Only contain the lower case versions, # and don't contain any permutations (include 'fr', but not # 'rf'). The various permutations will be generated. - _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr'] + _valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr'] # if we add binary f-strings, add: ['fb', 'fbr'] result = {''} for prefix in _valid_string_prefixes: From 56743afe879ca8fc061a852c5c1a311ff590bba1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 26 May 2025 11:50:10 -0600 Subject: [PATCH 362/989] gh-132775: Unrevert "Use _PyCode GetScriptXIData()" (gh-134735) This reverts commit 8a793c4a365d06a7264887698ccd7d6ba6aba9f2, AKA gh-134599. This effectively re-applies commit 09e72cf (gh-134511) --- Lib/test/support/interpreters/channels.py | 2 +- Lib/test/test__interpreters.py | 25 +- Lib/test/test_interpreters/test_api.py | 23 +- Modules/_interpretersmodule.c | 294 ++++++---------------- 4 files changed, 104 insertions(+), 240 deletions(-) diff --git a/Lib/test/support/interpreters/channels.py b/Lib/test/support/interpreters/channels.py index b25a17b1aabb93..1724759b75a999 100644 --- a/Lib/test/support/interpreters/channels.py +++ b/Lib/test/support/interpreters/channels.py @@ -69,7 +69,7 @@ def list_all(): if not hasattr(send, '_unboundop'): send._set_unbound(unboundop) else: - assert send._unbound[0] == op + assert send._unbound[0] == unboundop channels.append(chan) return channels diff --git a/Lib/test/test__interpreters.py b/Lib/test/test__interpreters.py index 63fdaad8de7ef5..ad3ebbfdff64a7 100644 --- a/Lib/test/test__interpreters.py +++ b/Lib/test/test__interpreters.py @@ -474,13 +474,15 @@ def setUp(self): def test_signatures(self): # See https://github.com/python/cpython/issues/126654 - msg = "expected 'shared' to be a dict" + msg = r'_interpreters.exec\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.exec(self.id, 'a', 1) with self.assertRaisesRegex(TypeError, msg): _interpreters.exec(self.id, 'a', shared=1) + msg = r'_interpreters.run_string\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.run_string(self.id, 'a', shared=1) + msg = r'_interpreters.run_func\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.run_func(self.id, lambda: None, shared=1) @@ -952,7 +954,8 @@ def test_invalid_syntax(self): """) with self.subTest('script'): - self.assert_run_failed(SyntaxError, script) + with self.assertRaises(SyntaxError): + _interpreters.run_string(self.id, script) with self.subTest('module'): modname = 'spam_spam_spam' @@ -1019,12 +1022,19 @@ def script(): with open(w, 'w', encoding="utf-8") as spipe: with contextlib.redirect_stdout(spipe): print('it worked!', end='') + failed = None def f(): - _interpreters.set___main___attrs(self.id, dict(w=w)) - _interpreters.run_func(self.id, script) + nonlocal failed + try: + _interpreters.set___main___attrs(self.id, dict(w=w)) + _interpreters.run_func(self.id, script) + except Exception as exc: + failed = exc t = threading.Thread(target=f) t.start() t.join() + if failed: + raise Exception from failed with open(r, encoding="utf-8") as outfile: out = outfile.read() @@ -1053,19 +1063,16 @@ def test_closure(self): spam = True def script(): assert spam - - with self.assertRaises(TypeError): + with self.assertRaises(ValueError): _interpreters.run_func(self.id, script) - # XXX This hasn't been fixed yet. - @unittest.expectedFailure def test_return_value(self): def script(): return 'spam' with self.assertRaises(ValueError): _interpreters.run_func(self.id, script) - @unittest.skip("we're not quite there yet") +# @unittest.skip("we're not quite there yet") def test_args(self): with self.subTest('args'): def script(a, b=0): diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 1e2d572b1cbb81..165949167ceba8 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -839,9 +839,16 @@ def test_bad_script(self): interp.exec(10) def test_bytes_for_script(self): + r, w = self.pipe() + RAN = b'R' + DONE = b'D' interp = interpreters.create() - with self.assertRaises(TypeError): - interp.exec(b'print("spam")') + interp.exec(f"""if True: + import os + os.write({w}, {RAN!r}) + """) + os.write(w, DONE) + self.assertEqual(os.read(r, 1), RAN) def test_with_background_threads_still_running(self): r_interp, w_interp = self.pipe() @@ -1010,8 +1017,6 @@ def test_call(self): for i, (callable, args, kwargs) in enumerate([ (call_func_noop, (), {}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), (Spam.noop, (), {}), ]): with self.subTest(f'success case #{i+1}'): @@ -1036,6 +1041,8 @@ def test_call(self): (call_func_complex, ('custom', 'spam!'), {}), (call_func_complex, ('custom-inner', 'eggs!'), {}), (call_func_complex, ('???',), {'exc': ValueError('spam')}), + (call_func_return_shareable, (), {}), + (call_func_return_not_shareable, (), {}), ]): with self.subTest(f'invalid case #{i+1}'): with self.assertRaises(Exception): @@ -1051,8 +1058,6 @@ def test_call_in_thread(self): for i, (callable, args, kwargs) in enumerate([ (call_func_noop, (), {}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), (Spam.noop, (), {}), ]): with self.subTest(f'success case #{i+1}'): @@ -1079,6 +1084,8 @@ def test_call_in_thread(self): (call_func_complex, ('custom', 'spam!'), {}), (call_func_complex, ('custom-inner', 'eggs!'), {}), (call_func_complex, ('???',), {'exc': ValueError('spam')}), + (call_func_return_shareable, (), {}), + (call_func_return_not_shareable, (), {}), ]): with self.subTest(f'invalid case #{i+1}'): if args or kwargs: @@ -1618,8 +1625,8 @@ def test_exec(self): def test_call(self): with self.subTest('no args'): interpid = _interpreters.create() - exc = _interpreters.call(interpid, call_func_return_shareable) - self.assertIs(exc, None) + with self.assertRaises(ValueError): + _interpreters.call(interpid, call_func_return_shareable) with self.subTest('uncaught exception'): interpid = _interpreters.create() diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index f4807fd214b19e..376517ab92360f 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -9,7 +9,6 @@ #include "pycore_code.h" // _PyCode_HAS_EXECUTORS() #include "pycore_crossinterp.h" // _PyXIData_t #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() -#include "pycore_function.h" // _PyFunction_VerifyStateless() #include "pycore_interp.h" // _PyInterpreterState_IDIncref() #include "pycore_modsupport.h" // _PyArg_BadArgument() #include "pycore_namespace.h" // _PyNamespace_New() @@ -361,81 +360,6 @@ _get_current_xibufferview_type(void) } -/* Python code **************************************************************/ - -static const char * -check_code_str(PyUnicodeObject *text) -{ - assert(text != NULL); - if (PyUnicode_GET_LENGTH(text) == 0) { - return "too short"; - } - - // XXX Verify that it parses? - - return NULL; -} - -#ifndef NDEBUG -static int -code_has_args(PyCodeObject *code) -{ - assert(code != NULL); - return (code->co_argcount > 0 - || code->co_posonlyargcount > 0 - || code->co_kwonlyargcount > 0 - || code->co_flags & (CO_VARARGS | CO_VARKEYWORDS)); -} -#endif - -#define RUN_TEXT 1 -#define RUN_CODE 2 - -static const char * -get_code_str(PyObject *arg, Py_ssize_t *len_p, PyObject **bytes_p, int *flags_p) -{ - const char *codestr = NULL; - Py_ssize_t len = -1; - PyObject *bytes_obj = NULL; - int flags = 0; - - if (PyUnicode_Check(arg)) { - assert(PyUnicode_Check(arg) - && (check_code_str((PyUnicodeObject *)arg) == NULL)); - codestr = PyUnicode_AsUTF8AndSize(arg, &len); - if (codestr == NULL) { - return NULL; - } - if (strlen(codestr) != (size_t)len) { - PyErr_SetString(PyExc_ValueError, - "source code string cannot contain null bytes"); - return NULL; - } - flags = RUN_TEXT; - } - else { - assert(PyCode_Check(arg)); - assert(_PyCode_VerifyStateless( - PyThreadState_Get(), (PyCodeObject *)arg, NULL, NULL, NULL) == 0); - assert(!code_has_args((PyCodeObject *)arg)); - flags = RUN_CODE; - - // Serialize the code object. - bytes_obj = PyMarshal_WriteObjectToString(arg, Py_MARSHAL_VERSION); - if (bytes_obj == NULL) { - return NULL; - } - codestr = PyBytes_AS_STRING(bytes_obj); - len = PyBytes_GET_SIZE(bytes_obj); - } - - *flags_p = flags; - *bytes_p = bytes_obj; - *len_p = len; - return codestr; -} - - /* interpreter-specific code ************************************************/ static int @@ -499,22 +423,14 @@ config_from_object(PyObject *configobj, PyInterpreterConfig *config) static int -_run_script(PyObject *ns, const char *codestr, Py_ssize_t codestrlen, int flags) +_run_script(_PyXIData_t *script, PyObject *ns) { - PyObject *result = NULL; - if (flags & RUN_TEXT) { - result = PyRun_StringFlags(codestr, Py_file_input, ns, ns, NULL); - } - else if (flags & RUN_CODE) { - PyObject *code = PyMarshal_ReadObjectFromString(codestr, codestrlen); - if (code != NULL) { - result = PyEval_EvalCode(code, ns, ns); - Py_DECREF(code); - } - } - else { - Py_UNREACHABLE(); + PyObject *code = _PyXIData_NewObject(script); + if (code == NULL) { + return -1; } + PyObject *result = PyEval_EvalCode(code, ns, ns); + Py_DECREF(code); if (result == NULL) { return -1; } @@ -523,12 +439,11 @@ _run_script(PyObject *ns, const char *codestr, Py_ssize_t codestrlen, int flags) } static int -_run_in_interpreter(PyInterpreterState *interp, - const char *codestr, Py_ssize_t codestrlen, - PyObject *shareables, int flags, +_exec_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, + _PyXIData_t *script, PyObject *shareables, PyObject **p_excinfo) { - assert(!PyErr_Occurred()); + assert(!_PyErr_Occurred(tstate)); _PyXI_session *session = _PyXI_NewSession(); if (session == NULL) { return -1; @@ -536,7 +451,7 @@ _run_in_interpreter(PyInterpreterState *interp, // Prep and switch interpreters. if (_PyXI_Enter(session, interp, shareables) < 0) { - if (PyErr_Occurred()) { + if (_PyErr_Occurred(tstate)) { // If an error occured at this step, it means that interp // was not prepared and switched. _PyXI_FreeSession(session); @@ -558,7 +473,7 @@ _run_in_interpreter(PyInterpreterState *interp, if (mainns == NULL) { goto finally; } - res = _run_script(mainns, codestr, codestrlen, flags); + res = _run_script(script, mainns); finally: // Clean up and switch back. @@ -952,104 +867,23 @@ PyDoc_STRVAR(set___main___attrs_doc, Bind the given attributes in the interpreter's __main__ module."); -static PyUnicodeObject * -convert_script_arg(PyThreadState *tstate, - PyObject *arg, const char *fname, const char *displayname, - const char *expected) -{ - PyUnicodeObject *str = NULL; - if (PyUnicode_CheckExact(arg)) { - str = (PyUnicodeObject *)Py_NewRef(arg); - } - else if (PyUnicode_Check(arg)) { - // XXX str = PyUnicode_FromObject(arg); - str = (PyUnicodeObject *)Py_NewRef(arg); - } - else { - _PyArg_BadArgument(fname, displayname, expected, arg); - return NULL; - } - - const char *err = check_code_str(str); - if (err != NULL) { - Py_DECREF(str); - _PyErr_Format(tstate, PyExc_ValueError, - "%.200s(): bad script text (%s)", fname, err); - return NULL; - } - - return str; -} - -static PyCodeObject * -convert_code_arg(PyThreadState *tstate, - PyObject *arg, const char *fname, const char *displayname, - const char *expected) +static void +unwrap_not_shareable(PyThreadState *tstate) { - PyObject *cause; - PyCodeObject *code = NULL; - if (PyFunction_Check(arg)) { - // For now we allow globals, so we can't use - // _PyFunction_VerifyStateless(). - PyObject *codeobj = PyFunction_GetCode(arg); - if (_PyCode_VerifyStateless( - tstate, (PyCodeObject *)codeobj, NULL, NULL, NULL) < 0) { - goto chained; - } - code = (PyCodeObject *)Py_NewRef(codeobj); + PyObject *exctype = _PyXIData_GetNotShareableErrorType(tstate); + if (!_PyErr_ExceptionMatches(tstate, exctype)) { + return; } - else if (PyCode_Check(arg)) { - if (_PyCode_VerifyStateless( - tstate, (PyCodeObject *)arg, NULL, NULL, NULL) < 0) { - goto chained; - } - code = (PyCodeObject *)Py_NewRef(arg); + PyObject *exc = _PyErr_GetRaisedException(tstate); + PyObject *cause = PyException_GetCause(exc); + if (cause != NULL) { + Py_DECREF(exc); + exc = cause; } else { - _PyArg_BadArgument(fname, displayname, expected, arg); - return NULL; + assert(PyException_GetContext(exc) == NULL); } - - return code; - -chained: - cause = _PyErr_GetRaisedException(tstate); - assert(cause != NULL); - _PyArg_BadArgument(fname, displayname, expected, arg); - PyObject *exc = _PyErr_GetRaisedException(tstate); - PyException_SetCause(exc, cause); _PyErr_SetRaisedException(tstate, exc); - return NULL; -} - -static int -_interp_exec(PyObject *self, PyInterpreterState *interp, - PyObject *code_arg, PyObject *shared_arg, PyObject **p_excinfo) -{ - if (shared_arg != NULL && !PyDict_CheckExact(shared_arg)) { - PyErr_SetString(PyExc_TypeError, "expected 'shared' to be a dict"); - return -1; - } - - // Extract code. - Py_ssize_t codestrlen = -1; - PyObject *bytes_obj = NULL; - int flags = 0; - const char *codestr = get_code_str(code_arg, - &codestrlen, &bytes_obj, &flags); - if (codestr == NULL) { - return -1; - } - - // Run the code in the interpreter. - int res = _run_in_interpreter(interp, codestr, codestrlen, - shared_arg, flags, p_excinfo); - Py_XDECREF(bytes_obj); - if (res < 0) { - return -1; - } - - return 0; } static PyObject * @@ -1062,8 +896,9 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" FUNCNAME, kwlist, - &id, &code, &shared, &restricted)) + "OO|O!$p:" FUNCNAME, kwlist, + &id, &code, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1075,22 +910,17 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - const char *expected = "a string, a function, or a code object"; - if (PyUnicode_Check(code)) { - code = (PyObject *)convert_script_arg(tstate, code, FUNCNAME, - "argument 2", expected); - } - else { - code = (PyObject *)convert_code_arg(tstate, code, FUNCNAME, - "argument 2", expected); - } - if (code == NULL) { + // We don't need the script to be "pure", which means it can use + // global variables. They will be resolved against __main__. + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { + unwrap_not_shareable(tstate); return NULL; } PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, code, shared, &excinfo); - Py_DECREF(code); + int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); + _PyXIData_Release(&xidata); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; @@ -1126,8 +956,9 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OU|O$p:" FUNCNAME, kwlist, - &id, &script, &shared, &restricted)) + "OU|O!$p:" FUNCNAME, kwlist, + &id, &script, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1139,15 +970,20 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - script = (PyObject *)convert_script_arg(tstate, script, FUNCNAME, - "argument 2", "a string"); - if (script == NULL) { + if (PyFunction_Check(script) || PyCode_Check(script)) { + _PyArg_BadArgument(FUNCNAME, "argument 2", "a string", script); + return NULL; + } + + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, script, &xidata) < 0) { + unwrap_not_shareable(tstate); return NULL; } PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, script, shared, &excinfo); - Py_DECREF(script); + int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); + _PyXIData_Release(&xidata); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; @@ -1173,8 +1009,9 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" FUNCNAME, kwlist, - &id, &func, &shared, &restricted)) + "OO|O!$p:" FUNCNAME, kwlist, + &id, &func, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1186,16 +1023,29 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyCodeObject *code = convert_code_arg(tstate, func, FUNCNAME, - "argument 2", - "a function or a code object"); - if (code == NULL) { + // We don't worry about checking globals. They will be resolved + // against __main__. + PyObject *code; + if (PyFunction_Check(func)) { + code = PyFunction_GET_CODE(func); + } + else if (PyCode_Check(func)) { + code = func; + } + else { + _PyArg_BadArgument(FUNCNAME, "argument 2", "a function", func); + return NULL; + } + + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { + unwrap_not_shareable(tstate); return NULL; } PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, (PyObject *)code, shared, &excinfo); - Py_DECREF(code); + int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); + _PyXIData_Release(&xidata); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; @@ -1248,15 +1098,15 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyObject *code = (PyObject *)convert_code_arg(tstate, callable, FUNCNAME, - "argument 2", "a function"); - if (code == NULL) { + _PyXIData_t xidata = {0}; + if (_PyCode_GetPureScriptXIData(tstate, callable, &xidata) < 0) { + unwrap_not_shareable(tstate); return NULL; } PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, code, NULL, &excinfo); - Py_DECREF(code); + int res = _exec_in_interpreter(tstate, interp, &xidata, NULL, &excinfo); + _PyXIData_Release(&xidata); if (res < 0) { assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); return excinfo; From 3c0525126ef95efe2f578e93db09f3282e3ca08f Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 26 May 2025 23:56:40 +0530 Subject: [PATCH 363/989] gh-134637: Fix performance regression in calling `ctypes` function pointer in `free threading`. (#134702) Fix performance regression in calling `ctypes` function pointer in `free threading`. --- ...-05-26-17-06-40.gh-issue-134637.9-3zRL.rst | 1 + Modules/_ctypes/_ctypes.c | 142 +++++++++++------- 2 files changed, 91 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-26-17-06-40.gh-issue-134637.9-3zRL.rst diff --git a/Misc/NEWS.d/next/Library/2025-05-26-17-06-40.gh-issue-134637.9-3zRL.rst b/Misc/NEWS.d/next/Library/2025-05-26-17-06-40.gh-issue-134637.9-3zRL.rst new file mode 100644 index 00000000000000..2a4d8725210834 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-26-17-06-40.gh-issue-134637.9-3zRL.rst @@ -0,0 +1 @@ +Fix performance regression in calling a :mod:`ctypes` function pointer in :term:`free threading`. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 7de6bb396b084e..7e8a133caa72ac 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -3610,6 +3610,45 @@ generic_pycdata_new(ctypes_state *st, PyCFuncPtr_Type */ +static inline void +atomic_xsetref(PyObject **field, PyObject *value) +{ +#ifdef Py_GIL_DISABLED + PyObject *old = *field; + _Py_atomic_store_ptr(field, value); + Py_XDECREF(old); +#else + Py_XSETREF(*field, value); +#endif +} +/* + This function atomically loads the reference from *field, and + tries to get a new reference to it. If the incref fails, + it acquires critical section of obj and returns a new reference to the *field. + In the general case, this avoids contention on acquiring the critical section. +*/ +static inline PyObject * +atomic_xgetref(PyObject *obj, PyObject **field) +{ +#ifdef Py_GIL_DISABLED + PyObject *value = _Py_atomic_load_ptr(field); + if (value == NULL) { + return NULL; + } + if (_Py_TryIncrefCompare(field, value)) { + return value; + } + Py_BEGIN_CRITICAL_SECTION(obj); + value = Py_XNewRef(*field); + Py_END_CRITICAL_SECTION(); + return value; +#else + return Py_XNewRef(*field); +#endif +} + + + /*[clinic input] @critical_section @setter @@ -3626,7 +3665,7 @@ _ctypes_CFuncPtr_errcheck_set_impl(PyCFuncPtrObject *self, PyObject *value) return -1; } Py_XINCREF(value); - Py_XSETREF(self->errcheck, value); + atomic_xsetref(&self->errcheck, value); return 0; } @@ -3658,12 +3697,10 @@ static int _ctypes_CFuncPtr_restype_set_impl(PyCFuncPtrObject *self, PyObject *value) /*[clinic end generated code: output=0be0a086abbabf18 input=683c3bef4562ccc6]*/ { - PyObject *checker, *oldchecker; + PyObject *checker; if (value == NULL) { - oldchecker = self->checker; - self->checker = NULL; - Py_CLEAR(self->restype); - Py_XDECREF(oldchecker); + atomic_xsetref(&self->restype, NULL); + atomic_xsetref(&self->checker, NULL); return 0; } ctypes_state *st = get_module_state_by_def(Py_TYPE(Py_TYPE(self))); @@ -3679,11 +3716,9 @@ _ctypes_CFuncPtr_restype_set_impl(PyCFuncPtrObject *self, PyObject *value) if (PyObject_GetOptionalAttr(value, &_Py_ID(_check_retval_), &checker) < 0) { return -1; } - oldchecker = self->checker; - self->checker = checker; Py_INCREF(value); - Py_XSETREF(self->restype, value); - Py_XDECREF(oldchecker); + atomic_xsetref(&self->checker, checker); + atomic_xsetref(&self->restype, value); return 0; } @@ -3728,16 +3763,16 @@ _ctypes_CFuncPtr_argtypes_set_impl(PyCFuncPtrObject *self, PyObject *value) PyObject *converters; if (value == NULL || value == Py_None) { - Py_CLEAR(self->converters); - Py_CLEAR(self->argtypes); + atomic_xsetref(&self->argtypes, NULL); + atomic_xsetref(&self->converters, NULL); } else { ctypes_state *st = get_module_state_by_def(Py_TYPE(Py_TYPE(self))); converters = converters_from_argtypes(st, value); if (!converters) return -1; - Py_XSETREF(self->converters, converters); + atomic_xsetref(&self->converters, converters); Py_INCREF(value); - Py_XSETREF(self->argtypes, value); + atomic_xsetref(&self->argtypes, value); } return 0; } @@ -4533,16 +4568,11 @@ _build_result(PyObject *result, PyObject *callargs, } static PyObject * -PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) +PyCFuncPtr_call(PyObject *op, PyObject *inargs, PyObject *kwds) { - _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); - PyObject *restype; - PyObject *converters; - PyObject *checker; - PyObject *argtypes; - PyObject *result; - PyObject *callargs; - PyObject *errcheck; + PyObject *result = NULL; + PyObject *callargs = NULL; + PyObject *ret = NULL; #ifdef MS_WIN32 IUnknown *piunk = NULL; #endif @@ -4560,13 +4590,24 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) } assert(info); /* Cannot be NULL for PyCFuncPtrObject instances */ - restype = self->restype ? self->restype : info->restype; - converters = self->converters ? self->converters : info->converters; - checker = self->checker ? self->checker : info->checker; - argtypes = self->argtypes ? self->argtypes : info->argtypes; -/* later, we probably want to have an errcheck field in stginfo */ - errcheck = self->errcheck /* ? self->errcheck : info->errcheck */; - + PyObject *restype = atomic_xgetref(op, &self->restype); + if (restype == NULL) { + restype = Py_XNewRef(info->restype); + } + PyObject *converters = atomic_xgetref(op, &self->converters); + if (converters == NULL) { + converters = Py_XNewRef(info->converters); + } + PyObject *checker = atomic_xgetref(op, &self->checker); + if (checker == NULL) { + checker = Py_XNewRef(info->checker); + } + PyObject *argtypes = atomic_xgetref(op, &self->argtypes); + if (argtypes == NULL) { + argtypes = Py_XNewRef(info->argtypes); + } + /* later, we probably want to have an errcheck field in stginfo */ + PyObject *errcheck = atomic_xgetref(op, &self->errcheck); pProc = *(void **)self->b_ptr; #ifdef MS_WIN32 @@ -4577,25 +4618,25 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) if (!this) { PyErr_SetString(PyExc_ValueError, "native com method call without 'this' parameter"); - return NULL; + goto finally; } if (!CDataObject_Check(st, this)) { PyErr_SetString(PyExc_TypeError, "Expected a COM this pointer as first argument"); - return NULL; + goto finally; } /* there should be more checks? No, in Python */ /* First arg is a pointer to an interface instance */ if (!this->b_ptr || *(void **)this->b_ptr == NULL) { PyErr_SetString(PyExc_ValueError, "NULL COM pointer access"); - return NULL; + goto finally; } piunk = *(IUnknown **)this->b_ptr; if (NULL == piunk->lpVtbl) { PyErr_SetString(PyExc_ValueError, "COM method call without VTable"); - return NULL; + goto finally; } pProc = ((void **)piunk->lpVtbl)[self->index - 0x1000]; } @@ -4603,8 +4644,9 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) callargs = _build_callargs(st, self, argtypes, inargs, kwds, &outmask, &inoutmask, &numretvals); - if (callargs == NULL) - return NULL; + if (callargs == NULL) { + goto finally; + } if (converters) { int required = Py_SAFE_DOWNCAST(PyTuple_GET_SIZE(converters), @@ -4623,7 +4665,7 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) required, required == 1 ? "" : "s", actual); - return NULL; + goto finally; } } else if (required != actual) { Py_DECREF(callargs); @@ -4632,7 +4674,7 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) required, required == 1 ? "" : "s", actual); - return NULL; + goto finally; } } @@ -4663,23 +4705,19 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) if (v == NULL || v != callargs) { Py_DECREF(result); Py_DECREF(callargs); - return v; + ret = v; + goto finally; } Py_DECREF(v); } - - return _build_result(result, callargs, - outmask, inoutmask, numretvals); -} - -static PyObject * -PyCFuncPtr_call(PyObject *op, PyObject *inargs, PyObject *kwds) -{ - PyObject *result; - Py_BEGIN_CRITICAL_SECTION(op); - result = PyCFuncPtr_call_lock_held(op, inargs, kwds); - Py_END_CRITICAL_SECTION(); - return result; + ret = _build_result(result, callargs, outmask, inoutmask, numretvals); +finally: + Py_XDECREF(restype); + Py_XDECREF(converters); + Py_XDECREF(checker); + Py_XDECREF(argtypes); + Py_XDECREF(errcheck); + return ret; } static int From 96905bdd273d2e5724d2c1b6b0f95ecb0daeaabe Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Tue, 27 May 2025 06:43:35 +0900 Subject: [PATCH 364/989] gh-134160: Use multi-phase init in documentation examples (#134296) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/c-api/allocation.rst | 2 +- Doc/c-api/intro.rst | 4 +- Doc/extending/building.rst | 6 +- Doc/extending/embedding.rst | 12 ++- Doc/extending/extending.rst | 150 +++++++++++++--------------- Doc/extending/index.rst | 4 + Doc/extending/newtypes_tutorial.rst | 37 ++++--- Doc/includes/newtypes/custom.c | 41 +++++--- Doc/includes/newtypes/custom2.c | 42 ++++---- Doc/includes/newtypes/custom3.c | 40 +++++--- Doc/includes/newtypes/custom4.c | 40 +++++--- Doc/includes/newtypes/sublist.c | 46 +++++---- 12 files changed, 228 insertions(+), 196 deletions(-) diff --git a/Doc/c-api/allocation.rst b/Doc/c-api/allocation.rst index f8d01a3f29b30e..59d913a0462382 100644 --- a/Doc/c-api/allocation.rst +++ b/Doc/c-api/allocation.rst @@ -153,6 +153,6 @@ Allocating Objects on the Heap .. seealso:: - :c:func:`PyModule_Create` + :ref:`moduleobjects` To allocate and create extension modules. diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index 0c20ad17194eb6..112b141032f55a 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -127,7 +127,7 @@ complete listing. item defined in the module file. Example:: static struct PyModuleDef spam_module = { - PyModuleDef_HEAD_INIT, + .m_base = PyModuleDef_HEAD_INIT, .m_name = "spam", ... }; @@ -135,7 +135,7 @@ complete listing. PyMODINIT_FUNC PyInit_spam(void) { - return PyModule_Create(&spam_module); + return PyModuleDef_Init(&spam_module); } diff --git a/Doc/extending/building.rst b/Doc/extending/building.rst index ddde567f6f3efa..a58eb40d431c59 100644 --- a/Doc/extending/building.rst +++ b/Doc/extending/building.rst @@ -23,10 +23,10 @@ instance. See :ref:`initializing-modules` for details. .. highlight:: python For modules with ASCII-only names, the function must be named -``PyInit_``, with ```` replaced by the name of the -module. When using :ref:`multi-phase-initialization`, non-ASCII module names +:samp:`PyInit_{}`, with ```` replaced by the name of the module. +When using :ref:`multi-phase-initialization`, non-ASCII module names are allowed. In this case, the initialization function name is -``PyInitU_``, with ```` encoded using Python's +:samp:`PyInitU_{}`, with ```` encoded using Python's *punycode* encoding with hyphens replaced by underscores. In Python:: def initfunc_name(name): diff --git a/Doc/extending/embedding.rst b/Doc/extending/embedding.rst index b777862da79f14..cb41889437c8b0 100644 --- a/Doc/extending/embedding.rst +++ b/Doc/extending/embedding.rst @@ -245,21 +245,23 @@ Python extension. For example:: return PyLong_FromLong(numargs); } - static PyMethodDef EmbMethods[] = { + static PyMethodDef emb_module_methods[] = { {"numargs", emb_numargs, METH_VARARGS, "Return the number of arguments received by the process."}, {NULL, NULL, 0, NULL} }; - static PyModuleDef EmbModule = { - PyModuleDef_HEAD_INIT, "emb", NULL, -1, EmbMethods, - NULL, NULL, NULL, NULL + static struct PyModuleDef emb_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "emb", + .m_size = 0, + .m_methods = emb_module_methods, }; static PyObject* PyInit_emb(void) { - return PyModule_Create(&EmbModule); + return PyModuleDef_Init(&emb_module); } Insert the above code just above the :c:func:`main` function. Also, insert the diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst index b0493bed75b151..918c751b009761 100644 --- a/Doc/extending/extending.rst +++ b/Doc/extending/extending.rst @@ -203,31 +203,42 @@ function usually raises :c:data:`PyExc_TypeError`. If you have an argument whos value must be in a particular range or must satisfy other conditions, :c:data:`PyExc_ValueError` is appropriate. -You can also define a new exception that is unique to your module. For this, you -usually declare a static object variable at the beginning of your file:: +You can also define a new exception that is unique to your module. +For this, you can declare a static global object variable at the beginning +of the file:: static PyObject *SpamError; -and initialize it in your module's initialization function (:c:func:`!PyInit_spam`) -with an exception object:: +and initialize it with an exception object in the module's +:c:data:`Py_mod_exec` function (:c:func:`!spam_module_exec`):: - PyMODINIT_FUNC - PyInit_spam(void) + static int + spam_module_exec(PyObject *m) { - PyObject *m; - - m = PyModule_Create(&spammodule); - if (m == NULL) - return NULL; - SpamError = PyErr_NewException("spam.error", NULL, NULL); - if (PyModule_AddObjectRef(m, "error", SpamError) < 0) { - Py_CLEAR(SpamError); - Py_DECREF(m); - return NULL; + if (PyModule_AddObjectRef(m, "SpamError", SpamError) < 0) { + return -1; } - return m; + return 0; + } + + static PyModuleDef_Slot spam_module_slots[] = { + {Py_mod_exec, spam_module_exec}, + {0, NULL} + }; + + static struct PyModuleDef spam_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "spam", + .m_size = 0, // non-negative + .m_slots = spam_module_slots, + }; + + PyMODINIT_FUNC + PyInit_spam(void) + { + return PyModuleDef_Init(&spam_module); } Note that the Python name for the exception object is :exc:`!spam.error`. The @@ -318,7 +329,7 @@ The Module's Method Table and Initialization Function I promised to show how :c:func:`!spam_system` is called from Python programs. First, we need to list its name and address in a "method table":: - static PyMethodDef SpamMethods[] = { + static PyMethodDef spam_methods[] = { ... {"system", spam_system, METH_VARARGS, "Execute a shell command."}, @@ -343,13 +354,10 @@ function. The method table must be referenced in the module definition structure:: - static struct PyModuleDef spammodule = { - PyModuleDef_HEAD_INIT, - "spam", /* name of module */ - spam_doc, /* module documentation, may be NULL */ - -1, /* size of per-interpreter state of the module, - or -1 if the module keeps state in global variables. */ - SpamMethods + static struct PyModuleDef spam_module = { + ... + .m_methods = spam_methods, + ... }; This structure, in turn, must be passed to the interpreter in the module's @@ -360,23 +368,17 @@ only non-\ ``static`` item defined in the module file:: PyMODINIT_FUNC PyInit_spam(void) { - return PyModule_Create(&spammodule); + return PyModuleDef_Init(&spam_module); } Note that :c:macro:`PyMODINIT_FUNC` declares the function as ``PyObject *`` return type, declares any special linkage declarations required by the platform, and for C++ declares the function as ``extern "C"``. -When the Python program imports module :mod:`!spam` for the first time, -:c:func:`!PyInit_spam` is called. (See below for comments about embedding Python.) -It calls :c:func:`PyModule_Create`, which returns a module object, and -inserts built-in function objects into the newly created module based upon the -table (an array of :c:type:`PyMethodDef` structures) found in the module definition. -:c:func:`PyModule_Create` returns a pointer to the module object -that it creates. It may abort with a fatal error for -certain errors, or return ``NULL`` if the module could not be initialized -satisfactorily. The init function must return the module object to its caller, -so that it then gets inserted into ``sys.modules``. +:c:func:`!PyInit_spam` is called when each interpreter imports its module +:mod:`!spam` for the first time. (See below for comments about embedding Python.) +A pointer to the module definition must be returned via :c:func:`PyModuleDef_Init`, +so that the import machinery can create the module and store it in ``sys.modules``. When embedding Python, the :c:func:`!PyInit_spam` function is not called automatically unless there's an entry in the :c:data:`PyImport_Inittab` table. @@ -433,23 +435,19 @@ optionally followed by an import of the module:: .. note:: - Removing entries from ``sys.modules`` or importing compiled modules into - multiple interpreters within a process (or following a :c:func:`fork` without an - intervening :c:func:`exec`) can create problems for some extension modules. - Extension module authors should exercise caution when initializing internal data - structures. + If you declare a global variable or a local static one, the module may + experience unintended side-effects on re-initialisation, for example when + removing entries from ``sys.modules`` or importing compiled modules into + multiple interpreters within a process + (or following a :c:func:`fork` without an intervening :c:func:`exec`). + If module state is not yet fully :ref:`isolated `, + authors should consider marking the module as having no support for subinterpreters + (via :c:macro:`Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED`). A more substantial example module is included in the Python source distribution -as :file:`Modules/xxmodule.c`. This file may be used as a template or simply +as :file:`Modules/xxlimited.c`. This file may be used as a template or simply read as an example. -.. note:: - - Unlike our ``spam`` example, ``xxmodule`` uses *multi-phase initialization* - (new in Python 3.5), where a PyModuleDef structure is returned from - ``PyInit_spam``, and creation of the module is left to the import machinery. - For details on multi-phase initialization, see :PEP:`489`. - .. _compilation: @@ -790,18 +788,17 @@ Philbrick (philbrick@hks.com):: {NULL, NULL, 0, NULL} /* sentinel */ }; - static struct PyModuleDef keywdargmodule = { - PyModuleDef_HEAD_INIT, - "keywdarg", - NULL, - -1, - keywdarg_methods + static struct PyModuleDef keywdarg_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "keywdarg", + .m_size = 0, + .m_methods = keywdarg_methods, }; PyMODINIT_FUNC PyInit_keywdarg(void) { - return PyModule_Create(&keywdargmodule); + return PyModuleDef_Init(&keywdarg_module); } @@ -1072,8 +1069,9 @@ why his :meth:`!__del__` methods would fail... The second case of problems with a borrowed reference is a variant involving threads. Normally, multiple threads in the Python interpreter can't get in each -other's way, because there is a global lock protecting Python's entire object -space. However, it is possible to temporarily release this lock using the macro +other's way, because there is a :term:`global lock ` +protecting Python's entire object space. +However, it is possible to temporarily release this lock using the macro :c:macro:`Py_BEGIN_ALLOW_THREADS`, and to re-acquire it using :c:macro:`Py_END_ALLOW_THREADS`. This is common around blocking I/O calls, to let other threads use the processor while waiting for the I/O to complete. @@ -1259,20 +1257,15 @@ two more lines must be added:: #include "spammodule.h" The ``#define`` is used to tell the header file that it is being included in the -exporting module, not a client module. Finally, the module's initialization -function must take care of initializing the C API pointer array:: +exporting module, not a client module. Finally, the module's :c:data:`mod_exec +` function must take care of initializing the C API pointer array:: - PyMODINIT_FUNC - PyInit_spam(void) + static int + spam_module_exec(PyObject *m) { - PyObject *m; static void *PySpam_API[PySpam_API_pointers]; PyObject *c_api_object; - m = PyModule_Create(&spammodule); - if (m == NULL) - return NULL; - /* Initialize the C API pointer array */ PySpam_API[PySpam_System_NUM] = (void *)PySpam_System; @@ -1280,11 +1273,10 @@ function must take care of initializing the C API pointer array:: c_api_object = PyCapsule_New((void *)PySpam_API, "spam._C_API", NULL); if (PyModule_Add(m, "_C_API", c_api_object) < 0) { - Py_DECREF(m); - return NULL; + return -1; } - return m; + return 0; } Note that ``PySpam_API`` is declared ``static``; otherwise the pointer @@ -1343,20 +1335,16 @@ like this:: All that a client module must do in order to have access to the function :c:func:`!PySpam_System` is to call the function (or rather macro) -:c:func:`!import_spam` in its initialization function:: +:c:func:`!import_spam` in its :c:data:`mod_exec ` function:: - PyMODINIT_FUNC - PyInit_client(void) + static int + client_module_exec(PyObject *m) { - PyObject *m; - - m = PyModule_Create(&clientmodule); - if (m == NULL) - return NULL; - if (import_spam() < 0) - return NULL; + if (import_spam() < 0) { + return -1; + } /* additional initialization can happen here */ - return m; + return 0; } The main disadvantage of this approach is that the file :file:`spammodule.h` is diff --git a/Doc/extending/index.rst b/Doc/extending/index.rst index 01b4df6d44acff..c1d8eb54ec1859 100644 --- a/Doc/extending/index.rst +++ b/Doc/extending/index.rst @@ -49,6 +49,10 @@ assistance from third party tools. It is intended primarily for creators of those tools, rather than being a recommended way to create your own C extensions. +.. seealso:: + + :pep:`489` -- Multi-phase extension module initialization + .. toctree:: :maxdepth: 2 :numbered: diff --git a/Doc/extending/newtypes_tutorial.rst b/Doc/extending/newtypes_tutorial.rst index 3fc91841416d71..f14690de4f86e8 100644 --- a/Doc/extending/newtypes_tutorial.rst +++ b/Doc/extending/newtypes_tutorial.rst @@ -55,8 +55,10 @@ from the previous chapter. This file defines three things: #. How the :class:`!Custom` **type** behaves: this is the ``CustomType`` struct, which defines a set of flags and function pointers that the interpreter inspects when specific operations are requested. -#. How to initialize the :mod:`!custom` module: this is the ``PyInit_custom`` - function and the associated ``custommodule`` struct. +#. How to define and execute the :mod:`!custom` module: this is the + ``PyInit_custom`` function and the associated ``custom_module`` struct for + defining the module, and the ``custom_module_exec`` function to set up + a fresh module object. The first bit is:: @@ -171,18 +173,18 @@ implementation provided by the API function :c:func:`PyType_GenericNew`. :: .tp_new = PyType_GenericNew, Everything else in the file should be familiar, except for some code in -:c:func:`!PyInit_custom`:: +:c:func:`!custom_module_exec`:: - if (PyType_Ready(&CustomType) < 0) - return; + if (PyType_Ready(&CustomType) < 0) { + return -1; + } This initializes the :class:`!Custom` type, filling in a number of members to the appropriate default values, including :c:member:`~PyObject.ob_type` that we initially set to ``NULL``. :: if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; + return -1; } This adds the type to the module dictionary. This allows us to create @@ -875,27 +877,22 @@ but let the base class handle it by calling its own :c:member:`~PyTypeObject.tp_ The :c:type:`PyTypeObject` struct supports a :c:member:`~PyTypeObject.tp_base` specifying the type's concrete base class. Due to cross-platform compiler issues, you can't fill that field directly with a reference to -:c:type:`PyList_Type`; it should be done later in the module initialization +:c:type:`PyList_Type`; it should be done in the :c:data:`Py_mod_exec` function:: - PyMODINIT_FUNC - PyInit_sublist(void) + static int + sublist_module_exec(PyObject *m) { - PyObject* m; SubListType.tp_base = &PyList_Type; - if (PyType_Ready(&SubListType) < 0) - return NULL; - - m = PyModule_Create(&sublistmodule); - if (m == NULL) - return NULL; + if (PyType_Ready(&SubListType) < 0) { + return -1; + } if (PyModule_AddObjectRef(m, "SubList", (PyObject *) &SubListType) < 0) { - Py_DECREF(m); - return NULL; + return -1; } - return m; + return 0; } Before calling :c:func:`PyType_Ready`, the type structure must have the diff --git a/Doc/includes/newtypes/custom.c b/Doc/includes/newtypes/custom.c index 5253f879360210..039a1a7219349c 100644 --- a/Doc/includes/newtypes/custom.c +++ b/Doc/includes/newtypes/custom.c @@ -16,28 +16,37 @@ static PyTypeObject CustomType = { .tp_new = PyType_GenericNew, }; -static PyModuleDef custommodule = { +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + // Just use this while using static types + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/custom2.c b/Doc/includes/newtypes/custom2.c index a87917583ca495..1ff8e707d1b0a0 100644 --- a/Doc/includes/newtypes/custom2.c +++ b/Doc/includes/newtypes/custom2.c @@ -106,28 +106,36 @@ static PyTypeObject CustomType = { .tp_methods = Custom_methods, }; -static PyModuleDef custommodule = { - .m_base =PyModuleDef_HEAD_INIT, +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { + .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom2", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom2(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/custom3.c b/Doc/includes/newtypes/custom3.c index 854034d4066d20..22f50eb0e1de89 100644 --- a/Doc/includes/newtypes/custom3.c +++ b/Doc/includes/newtypes/custom3.c @@ -151,28 +151,36 @@ static PyTypeObject CustomType = { .tp_getset = Custom_getsetters, }; -static PyModuleDef custommodule = { +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom3", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom3(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/custom4.c b/Doc/includes/newtypes/custom4.c index a0a1eeb289190b..07585aff5987f3 100644 --- a/Doc/includes/newtypes/custom4.c +++ b/Doc/includes/newtypes/custom4.c @@ -170,28 +170,36 @@ static PyTypeObject CustomType = { .tp_getset = Custom_getsetters, }; -static PyModuleDef custommodule = { +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom4", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom4(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/sublist.c b/Doc/includes/newtypes/sublist.c index 00664f3454156f..b784456a4ef667 100644 --- a/Doc/includes/newtypes/sublist.c +++ b/Doc/includes/newtypes/sublist.c @@ -31,7 +31,7 @@ SubList_init(PyObject *op, PyObject *args, PyObject *kwds) } static PyTypeObject SubListType = { - PyVarObject_HEAD_INIT(NULL, 0) + .ob_base = PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "sublist.SubList", .tp_doc = PyDoc_STR("SubList objects"), .tp_basicsize = sizeof(SubListObject), @@ -41,29 +41,37 @@ static PyTypeObject SubListType = { .tp_methods = SubList_methods, }; -static PyModuleDef sublistmodule = { - PyModuleDef_HEAD_INIT, +static int +sublist_module_exec(PyObject *m) +{ + SubListType.tp_base = &PyList_Type; + if (PyType_Ready(&SubListType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "SubList", (PyObject *) &SubListType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot sublist_module_slots[] = { + {Py_mod_exec, sublist_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef sublist_module = { + .m_base = PyModuleDef_HEAD_INIT, .m_name = "sublist", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = sublist_module_slots, }; PyMODINIT_FUNC PyInit_sublist(void) { - PyObject *m; - SubListType.tp_base = &PyList_Type; - if (PyType_Ready(&SubListType) < 0) - return NULL; - - m = PyModule_Create(&sublistmodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "SubList", (PyObject *) &SubListType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&sublist_module); } From 9bf03c6c30c9ee2979061410651c3f6132683640 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Mon, 26 May 2025 18:24:38 -0700 Subject: [PATCH 365/989] gh-134583: Update devcontainer reference to include image with libzstd-devel (gh-134765) Update devcontainer reference --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 64c85c1101e6e6..fd1d7151518f40 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,5 @@ { - "image": "ghcr.io/python/devcontainer:2024.09.25.11038928730", + "image": "ghcr.io/python/devcontainer:2025.05.25.15232270922", "onCreateCommand": [ // Install common tooling. "dnf", From 92ea1eb38ff97ac046a0031d505c30a51f58a43f Mon Sep 17 00:00:00 2001 From: Shamil Date: Tue, 27 May 2025 11:23:06 +0300 Subject: [PATCH 366/989] gh-134664: document `cleanup_socket` parameter in `asyncio.start_unix_server` (#134750) --- Doc/library/asyncio-stream.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Doc/library/asyncio-stream.rst b/Doc/library/asyncio-stream.rst index c56166cabb9093..90c90862ca1ed3 100644 --- a/Doc/library/asyncio-stream.rst +++ b/Doc/library/asyncio-stream.rst @@ -171,13 +171,17 @@ and work with streams: .. function:: start_unix_server(client_connected_cb, path=None, \ *, limit=None, sock=None, backlog=100, ssl=None, \ ssl_handshake_timeout=None, \ - ssl_shutdown_timeout=None, start_serving=True) + ssl_shutdown_timeout=None, start_serving=True, cleanup_socket=True) :async: Start a Unix socket server. Similar to :func:`start_server` but works with Unix sockets. + If *cleanup_socket* is true then the Unix socket will automatically + be removed from the filesystem when the server is closed, unless the + socket has been replaced after the server has been created. + See also the documentation of :meth:`loop.create_unix_server`. .. note:: @@ -198,6 +202,9 @@ and work with streams: .. versionchanged:: 3.11 Added the *ssl_shutdown_timeout* parameter. + .. versionchanged:: 3.13 + Added the *cleanup_socket* parameter. + StreamReader ============ From 8704d6b39139d2b1c3dd871590188fb7deb8aaad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= Date: Tue, 27 May 2025 10:25:12 +0200 Subject: [PATCH 367/989] gh-62824: Adjust test_alias_modules_exist test to use imports instead of file checks (#134777) --- Lib/test/test_codecs.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 8c9a0972492294..d8666f7290e72e 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1,6 +1,7 @@ import codecs import contextlib import copy +import importlib import io import pickle import os @@ -3111,9 +3112,9 @@ def test_aliases(self): def test_alias_modules_exist(self): encodings_dir = os.path.dirname(encodings.__file__) for value in encodings.aliases.aliases.values(): - codec_file = os.path.join(encodings_dir, value + ".py") - self.assertTrue(os.path.isfile(codec_file), - "Codec file not found: " + codec_file) + codec_mod = f"encodings.{value}" + self.assertIsNotNone(importlib.util.find_spec(codec_mod), + f"Codec module not found: {codec_mod}") def test_quopri_stateless(self): # Should encode with quotetabs=True From 737b4ba020ecaf4b30d5a4c8f99882ce0001ddd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 27 May 2025 10:48:34 +0200 Subject: [PATCH 368/989] gh-134635: add `zlib.{adler32,crc32}_combine` to combine checksums (#134650) --- Doc/library/zlib.rst | 28 ++++ Doc/whatsnew/3.15.rst | 10 ++ Lib/test/test_zlib.py | 108 ++++++++++++++++ ...-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst | 3 + Modules/clinic/zlibmodule.c.h | 120 +++++++++++++++++- Modules/zlibmodule.c | 88 +++++++++++++ 6 files changed, 356 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 75ead3c4cb144c..7c5e9b086e170d 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -44,6 +44,20 @@ The available exception and functions in this module are: .. versionchanged:: 3.0 The result is always unsigned. +.. function:: adler32_combine(adler1, adler2, len2, /) + + Combine two Adler-32 checksums into one. + + Given the Adler-32 checksum *adler1* of a sequence ``A`` and the + Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*, + return the Adler-32 checksum of ``A`` and ``B`` concatenated. + + This function is typically useful to combine Adler-32 checksums + that were concurrently computed. To compute checksums sequentially, use + :func:`adler32` with the running checksum as the ``value`` argument. + + .. versionadded:: next + .. function:: compress(data, /, level=-1, wbits=MAX_WBITS) Compresses the bytes in *data*, returning a bytes object containing compressed data. @@ -136,6 +150,20 @@ The available exception and functions in this module are: .. versionchanged:: 3.0 The result is always unsigned. +.. function:: crc32_combine(crc1, crc2, len2, /) + + Combine two CRC-32 checksums into one. + + Given the CRC-32 checksum *crc1* of a sequence ``A`` and the + CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*, + return the CRC-32 checksum of ``A`` and ``B`` concatenated. + + This function is typically useful to combine CRC-32 checksums + that were concurrently computed. To compute checksums sequentially, use + :func:`crc32` with the running checksum as the ``value`` argument. + + .. versionadded:: next + .. function:: decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE) Decompresses the bytes in *data*, returning a bytes object containing the diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index bf186c191b04d1..cd4b2e8b3dd8ed 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -97,6 +97,16 @@ ssl (Contributed by Will Childs-Klein in :gh:`133624`.) +zlib +---- + +* Allow combining two Adler-32 checksums via :func:`~zlib.adler32_combine`. + (Contributed by Callum Attryde and Bénédikt Tran in :gh:`134635`.) + +* Allow combining two CRC-32 checksums via :func:`~zlib.crc32_combine`. + (Contributed by Bénédikt Tran in :gh:`134635`.) + + .. Add improved modules above alphabetically, not here at the end. Optimizations diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 4d97fe56f3a094..c57ab51eca16b4 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -119,6 +119,114 @@ def test_same_as_binascii_crc32(self): self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam')) +class ChecksumCombineMixin: + """Mixin class for testing checksum combination.""" + + N = 1000 + default_iv: int + + def parse_iv(self, iv): + """Parse an IV value. + + - The default IV is returned if *iv* is None. + - A random IV is returned if *iv* is -1. + - Otherwise, *iv* is returned as is. + """ + if iv is None: + return self.default_iv + if iv == -1: + return random.randint(1, 0x80000000) + return iv + + def checksum(self, data, init=None): + """Compute the checksum of data with a given initial value. + + The *init* value is parsed by ``parse_iv``. + """ + iv = self.parse_iv(init) + return self._checksum(data, iv) + + def _checksum(self, data, init): + raise NotImplementedError + + def combine(self, a, b, blen): + """Combine two checksums together.""" + raise NotImplementedError + + def get_random_data(self, data_len, *, iv=None): + """Get a triplet (data, iv, checksum).""" + data = random.randbytes(data_len) + init = self.parse_iv(iv) + checksum = self.checksum(data, init) + return data, init, checksum + + def test_combine_empty(self): + for _ in range(self.N): + a, iv, checksum = self.get_random_data(32, iv=-1) + res = self.combine(iv, self.checksum(a), len(a)) + self.assertEqual(res, checksum) + + def test_combine_no_iv(self): + for _ in range(self.N): + a, _, chk_a = self.get_random_data(32) + b, _, chk_b = self.get_random_data(64) + res = self.combine(chk_a, chk_b, len(b)) + self.assertEqual(res, self.checksum(a + b)) + + def test_combine_no_iv_invalid_length(self): + a, _, chk_a = self.get_random_data(32) + b, _, chk_b = self.get_random_data(64) + checksum = self.checksum(a + b) + for invalid_len in [1, len(a), 48, len(b) + 1, 191]: + invalid_res = self.combine(chk_a, chk_b, invalid_len) + self.assertNotEqual(invalid_res, checksum) + + self.assertRaises(TypeError, self.combine, 0, 0, "len") + + def test_combine_with_iv(self): + for _ in range(self.N): + a, iv_a, chk_a_with_iv = self.get_random_data(32, iv=-1) + chk_a_no_iv = self.checksum(a) + b, iv_b, chk_b_with_iv = self.get_random_data(64, iv=-1) + chk_b_no_iv = self.checksum(b) + + # We can represent c = COMBINE(CHK(a, iv_a), CHK(b, iv_b)) as: + # + # c = CHK(CHK(b'', iv_a) + CHK(a) + CHK(b'', iv_b) + CHK(b)) + # = COMBINE( + # COMBINE(CHK(b'', iv_a), CHK(a)), + # COMBINE(CHK(b'', iv_b), CHK(b)), + # ) + # = COMBINE(COMBINE(iv_a, CHK(a)), COMBINE(iv_b, CHK(b))) + tmp0 = self.combine(iv_a, chk_a_no_iv, len(a)) + tmp1 = self.combine(iv_b, chk_b_no_iv, len(b)) + expected = self.combine(tmp0, tmp1, len(b)) + checksum = self.combine(chk_a_with_iv, chk_b_with_iv, len(b)) + self.assertEqual(checksum, expected) + + +class CRC32CombineTestCase(ChecksumCombineMixin, unittest.TestCase): + + default_iv = 0 + + def _checksum(self, data, init): + return zlib.crc32(data, init) + + def combine(self, a, b, blen): + return zlib.crc32_combine(a, b, blen) + + +class Adler32CombineTestCase(ChecksumCombineMixin, unittest.TestCase): + + default_iv = 1 + + def _checksum(self, data, init): + return zlib.adler32(data, init) + + def combine(self, a, b, blen): + return zlib.adler32_combine(a, b, blen) + + # Issue #10276 - check that inputs >=4 GiB are handled correctly. class ChecksumBigBufferTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst b/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst new file mode 100644 index 00000000000000..4cabbf2f896917 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst @@ -0,0 +1,3 @@ +:mod:`zlib`: Allow to combine Adler-32 and CRC-32 checksums via +:func:`~zlib.adler32_combine` and :func:`~zlib.crc32_combine`. Patch by +Callum Attryde and Bénédikt Tran. diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 2710f65a840db9..146a7e250019f0 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -1044,6 +1044,65 @@ zlib_adler32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(zlib_adler32_combine__doc__, +"adler32_combine($module, adler1, adler2, len2, /)\n" +"--\n" +"\n" +"Combine two Adler-32 checksums into one.\n" +"\n" +" adler1\n" +" Adler-32 checksum for sequence A\n" +" adler2\n" +" Adler-32 checksum for sequence B\n" +" len2\n" +" Length of sequence B\n" +"\n" +"Given the Adler-32 checksum \'adler1\' of a sequence A and the\n" +"Adler-32 checksum \'adler2\' of a sequence B of length \'len2\',\n" +"return the Adler-32 checksum of A and B concatenated."); + +#define ZLIB_ADLER32_COMBINE_METHODDEF \ + {"adler32_combine", _PyCFunction_CAST(zlib_adler32_combine), METH_FASTCALL, zlib_adler32_combine__doc__}, + +static unsigned int +zlib_adler32_combine_impl(PyObject *module, unsigned int adler1, + unsigned int adler2, PyObject *len2); + +static PyObject * +zlib_adler32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + unsigned int adler1; + unsigned int adler2; + PyObject *len2; + unsigned int _return_value; + + if (!_PyArg_CheckPositional("adler32_combine", nargs, 3, 3)) { + goto exit; + } + adler1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]); + if (adler1 == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + adler2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); + if (adler2 == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + if (!PyLong_Check(args[2])) { + _PyArg_BadArgument("adler32_combine", "argument 3", "int", args[2]); + goto exit; + } + len2 = args[2]; + _return_value = zlib_adler32_combine_impl(module, adler1, adler2, len2); + if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(zlib_crc32__doc__, "crc32($module, data, value=0, /)\n" "--\n" @@ -1098,6 +1157,65 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(zlib_crc32_combine__doc__, +"crc32_combine($module, crc1, crc2, len2, /)\n" +"--\n" +"\n" +"Combine two CRC-32 checksums into one.\n" +"\n" +" crc1\n" +" CRC-32 checksum for sequence A\n" +" crc2\n" +" CRC-32 checksum for sequence B\n" +" len2\n" +" Length of sequence B\n" +"\n" +"Given the CRC-32 checksum \'crc1\' of a sequence A and the\n" +"CRC-32 checksum \'crc2\' of a sequence B of length \'len2\',\n" +"return the CRC-32 checksum of A and B concatenated."); + +#define ZLIB_CRC32_COMBINE_METHODDEF \ + {"crc32_combine", _PyCFunction_CAST(zlib_crc32_combine), METH_FASTCALL, zlib_crc32_combine__doc__}, + +static unsigned int +zlib_crc32_combine_impl(PyObject *module, unsigned int crc1, + unsigned int crc2, PyObject *len2); + +static PyObject * +zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + unsigned int crc1; + unsigned int crc2; + PyObject *len2; + unsigned int _return_value; + + if (!_PyArg_CheckPositional("crc32_combine", nargs, 3, 3)) { + goto exit; + } + crc1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]); + if (crc1 == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + crc2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); + if (crc2 == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + if (!PyLong_Check(args[2])) { + _PyArg_BadArgument("crc32_combine", "argument 3", "int", args[2]); + goto exit; + } + len2 = args[2]; + _return_value = zlib_crc32_combine_impl(module, crc1, crc2, len2); + if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + +exit: + return return_value; +} + #ifndef ZLIB_COMPRESS_COPY_METHODDEF #define ZLIB_COMPRESS_COPY_METHODDEF #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */ @@ -1121,4 +1239,4 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */ -/*[clinic end generated code: output=33938c7613a8c1c7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=3f7692eb3b5d5a0c input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index d4b4b91697c08e..f7009364644b7e 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -17,6 +17,16 @@ #error "At least zlib version 1.2.2.1 is required" #endif +#if (SIZEOF_OFF_T == SIZEOF_SIZE_T) +# define convert_to_z_off_t PyLong_AsSsize_t +#elif (SIZEOF_OFF_T == SIZEOF_LONG_LONG) +# define convert_to_z_off_t PyLong_AsLongLong +#elif (SIZEOF_OFF_T == SIZEOF_LONG) +# define convert_to_z_off_t PyLong_AsLong +#else +# error off_t does not match either size_t, long, or long long! +#endif + // Blocks output buffer wrappers #include "pycore_blocks_output_buffer.h" @@ -1876,6 +1886,44 @@ zlib_adler32_impl(PyObject *module, Py_buffer *data, unsigned int value) return PyLong_FromUnsignedLong(value & 0xffffffffU); } +/*[clinic input] +zlib.adler32_combine -> unsigned_int + + adler1: unsigned_int(bitwise=True) + Adler-32 checksum for sequence A + + adler2: unsigned_int(bitwise=True) + Adler-32 checksum for sequence B + + len2: object(subclass_of='&PyLong_Type') + Length of sequence B + / + +Combine two Adler-32 checksums into one. + +Given the Adler-32 checksum 'adler1' of a sequence A and the +Adler-32 checksum 'adler2' of a sequence B of length 'len2', +return the Adler-32 checksum of A and B concatenated. +[clinic start generated code]*/ + +static unsigned int +zlib_adler32_combine_impl(PyObject *module, unsigned int adler1, + unsigned int adler2, PyObject *len2) +/*[clinic end generated code: output=61842cefb16afb1b input=51bb045c95130c6f]*/ +{ +#if defined(Z_WANT64) + z_off64_t len = convert_to_z_off_t(len2); +#else + z_off_t len = convert_to_z_off_t(len2); +#endif + if (PyErr_Occurred()) { + return (unsigned int)-1; + } + return adler32_combine(adler1, adler2, len); +} + + + /*[clinic input] zlib.crc32 -> unsigned_int @@ -1923,13 +1971,50 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value) return value; } +/*[clinic input] +zlib.crc32_combine -> unsigned_int + + crc1: unsigned_int(bitwise=True) + CRC-32 checksum for sequence A + + crc2: unsigned_int(bitwise=True) + CRC-32 checksum for sequence B + + len2: object(subclass_of='&PyLong_Type') + Length of sequence B + / + +Combine two CRC-32 checksums into one. + +Given the CRC-32 checksum 'crc1' of a sequence A and the +CRC-32 checksum 'crc2' of a sequence B of length 'len2', +return the CRC-32 checksum of A and B concatenated. +[clinic start generated code]*/ + +static unsigned int +zlib_crc32_combine_impl(PyObject *module, unsigned int crc1, + unsigned int crc2, PyObject *len2) +/*[clinic end generated code: output=c4def907c602e6eb input=9c8a065d9040dc66]*/ +{ +#if defined(Z_WANT64) + z_off64_t len = convert_to_z_off_t(len2); +#else + z_off_t len = convert_to_z_off_t(len2); +#endif + if (PyErr_Occurred()) { + return (unsigned int)-1; + } + return crc32_combine(crc1, crc2, len); +} static PyMethodDef zlib_methods[] = { ZLIB_ADLER32_METHODDEF + ZLIB_ADLER32_COMBINE_METHODDEF ZLIB_COMPRESS_METHODDEF ZLIB_COMPRESSOBJ_METHODDEF ZLIB_CRC32_METHODDEF + ZLIB_CRC32_COMBINE_METHODDEF ZLIB_DECOMPRESS_METHODDEF ZLIB_DECOMPRESSOBJ_METHODDEF {NULL, NULL} @@ -1981,14 +2066,17 @@ static PyType_Spec ZlibDecompressor_type_spec = { .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), .slots = ZlibDecompressor_type_slots, }; + PyDoc_STRVAR(zlib_module_documentation, "The functions in this module allow compression and decompression using the\n" "zlib library, which is based on GNU zip.\n" "\n" "adler32(string[, start]) -- Compute an Adler-32 checksum.\n" +"adler32_combine(adler1, adler2, len2, /) -- Combine two Adler-32 checksums.\n" "compress(data[, level]) -- Compress data, with compression level 0-9 or -1.\n" "compressobj([level[, ...]]) -- Return a compressor object.\n" "crc32(string[, start]) -- Compute a CRC-32 checksum.\n" +"crc32_combine(crc1, crc2, len2, /) -- Combine two CRC-32 checksums.\n" "decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n" "decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n" "\n" From 579686d9fb1bccc74c694d569f0a8bf28d9ca85a Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Tue, 27 May 2025 04:49:28 -0400 Subject: [PATCH 369/989] gh-134752: Improve speed of test_tokenize.StringPrefixTest.test_prefixes. (#134766) --- Lib/test/test_tokenize.py | 72 ++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index d4b51841891b28..865e0c5b40ddd3 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3241,39 +3241,40 @@ def test_exact_flag(self): class StringPrefixTest(unittest.TestCase): - def test_prefixes(self): - # Get the list of defined string prefixes. I don't see an - # obvious documented way of doing this, but probably the best - # thing is to split apart tokenize.StringPrefix. - - # Make sure StringPrefix begins and ends in parens. - self.assertEqual(tokenize.StringPrefix[0], '(') - self.assertEqual(tokenize.StringPrefix[-1], ')') - - # Then split apart everything else by '|'. - defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|')) - - # Now compute the actual string prefixes, by exec-ing all - # valid prefix combinations, followed by an empty string. - - # Try all prefix lengths until we find a length that has zero - # valid prefixes. This will miss the case where for example - # there are no valid 3 character prefixes, but there are valid - # 4 character prefixes. That seems extremely unlikely. - - # Note that the empty prefix is being included, because length - # starts at 0. That's expected, since StringPrefix includes - # the empty prefix. + @staticmethod + def determine_valid_prefixes(): + # Try all lengths until we find a length that has zero valid + # prefixes. This will miss the case where for example there + # are no valid 3 character prefixes, but there are valid 4 + # character prefixes. That seems unlikely. + + single_char_valid_prefixes = set() + + # Find all of the single character string prefixes. Just get + # the lowercase version, we'll deal with combinations of upper + # and lower case later. I'm using this logic just in case + # some uppercase-only prefix is added. + for letter in itertools.chain(string.ascii_lowercase, string.ascii_uppercase): + try: + eval(f'{letter}""') + single_char_valid_prefixes.add(letter.lower()) + except SyntaxError: + pass + # This logic assumes that all combinations of valid prefixes only use + # the characters that are valid single character prefixes. That seems + # like a valid assumption, but if it ever changes this will need + # adjusting. valid_prefixes = set() for length in itertools.count(): num_at_this_length = 0 for prefix in ( - "".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length)) + "".join(l) + for l in itertools.combinations(single_char_valid_prefixes, length) ): for t in itertools.permutations(prefix): for u in itertools.product(*[(c, c.upper()) for c in t]): - p = ''.join(u) + p = "".join(u) if p == "not": # 'not' can never be a string prefix, # because it's a valid expression: not "" @@ -3289,9 +3290,26 @@ def test_prefixes(self): except SyntaxError: pass if num_at_this_length == 0: - break + return valid_prefixes + + + def test_prefixes(self): + # Get the list of defined string prefixes. I don't see an + # obvious documented way of doing this, but probably the best + # thing is to split apart tokenize.StringPrefix. + + # Make sure StringPrefix begins and ends in parens. We're + # assuming it's of the form "(a|b|ab)", if a, b, and cd are + # valid string prefixes. + self.assertEqual(tokenize.StringPrefix[0], '(') + self.assertEqual(tokenize.StringPrefix[-1], ')') + + # Then split apart everything else by '|'. + defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|')) - self.assertEqual(defined_prefixes, valid_prefixes) + # Now compute the actual allowed string prefixes and compare + # to what is defined in the tokenize module. + self.assertEqual(defined_prefixes, self.determine_valid_prefixes()) if __name__ == "__main__": From 51762b6cadb8f316dd783716bc5c168c2e2d07f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 27 May 2025 10:51:05 +0200 Subject: [PATCH 370/989] gh-134210: handle signals in `_curses.window.getch` (#134326) --- ...5-05-24-13-10-35.gh-issue-134210.0IuMY2.rst | 2 ++ Modules/_cursesmodule.c | 18 ++++++++++++++---- Modules/clinic/_cursesmodule.c.h | 11 +++-------- 3 files changed, 19 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-24-13-10-35.gh-issue-134210.0IuMY2.rst diff --git a/Misc/NEWS.d/next/Library/2025-05-24-13-10-35.gh-issue-134210.0IuMY2.rst b/Misc/NEWS.d/next/Library/2025-05-24-13-10-35.gh-issue-134210.0IuMY2.rst new file mode 100644 index 00000000000000..b440e8308db6a2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-24-13-10-35.gh-issue-134210.0IuMY2.rst @@ -0,0 +1,2 @@ +:func:`curses.window.getch` now correctly handles signals. Patch by Bénédikt +Tran. diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index 55d664cebe31ec..27469d75079f45 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -1656,7 +1656,7 @@ _curses_window_getbkgd_impl(PyCursesWindowObject *self) } /*[clinic input] -_curses.window.getch -> int +_curses.window.getch [ y: int @@ -1673,10 +1673,10 @@ keypad keys and so on return numbers higher than 256. In no-delay mode, -1 is returned if there is no input, else getch() waits until a key is pressed. [clinic start generated code]*/ -static int +static PyObject * _curses_window_getch_impl(PyCursesWindowObject *self, int group_right_1, int y, int x) -/*[clinic end generated code: output=980aa6af0c0ca387 input=bb24ebfb379f991f]*/ +/*[clinic end generated code: output=e1639e87d545e676 input=73f350336b1ee8c8]*/ { int rtn; @@ -1689,7 +1689,17 @@ _curses_window_getch_impl(PyCursesWindowObject *self, int group_right_1, } Py_END_ALLOW_THREADS - return rtn; + if (rtn == ERR) { + // We suppress ERR returned by wgetch() in nodelay mode + // after we handled possible interruption signals. + if (PyErr_CheckSignals()) { + return NULL; + } + // ERR is an implementation detail, so to be on the safe side, + // we forcibly set the return value to -1 as documented above. + rtn = -1; + } + return PyLong_FromLong(rtn); } /*[clinic input] diff --git a/Modules/clinic/_cursesmodule.c.h b/Modules/clinic/_cursesmodule.c.h index 552360eb80a545..a898a7e17cf8d1 100644 --- a/Modules/clinic/_cursesmodule.c.h +++ b/Modules/clinic/_cursesmodule.c.h @@ -768,7 +768,7 @@ PyDoc_STRVAR(_curses_window_getch__doc__, #define _CURSES_WINDOW_GETCH_METHODDEF \ {"getch", (PyCFunction)_curses_window_getch, METH_VARARGS, _curses_window_getch__doc__}, -static int +static PyObject * _curses_window_getch_impl(PyCursesWindowObject *self, int group_right_1, int y, int x); @@ -779,7 +779,6 @@ _curses_window_getch(PyObject *self, PyObject *args) int group_right_1 = 0; int y = 0; int x = 0; - int _return_value; switch (PyTuple_GET_SIZE(args)) { case 0: @@ -794,11 +793,7 @@ _curses_window_getch(PyObject *self, PyObject *args) PyErr_SetString(PyExc_TypeError, "_curses.window.getch requires 0 to 2 arguments"); goto exit; } - _return_value = _curses_window_getch_impl((PyCursesWindowObject *)self, group_right_1, y, x); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromLong((long)_return_value); + return_value = _curses_window_getch_impl((PyCursesWindowObject *)self, group_right_1, y, x); exit: return return_value; @@ -4440,4 +4435,4 @@ _curses_has_extended_color_support(PyObject *module, PyObject *Py_UNUSED(ignored #ifndef _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #define _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #endif /* !defined(_CURSES_ASSUME_DEFAULT_COLORS_METHODDEF) */ -/*[clinic end generated code: output=42b2923d88c8d0f6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7753612d7613903c input=a9049054013a1b77]*/ From 604f83550bc4fd5507019969247c12a6a6454ded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 27 May 2025 10:57:41 +0200 Subject: [PATCH 371/989] gh-134210: refactor signal handling in `_curses.window.{get_wch,getkey}` (#134646) --- Modules/_cursesmodule.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index 27469d75079f45..21c2509efe816a 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -1655,6 +1655,21 @@ _curses_window_getbkgd_impl(PyCursesWindowObject *self) return (long) getbkgd(self->win); } +static PyObject * +curses_check_signals_on_input_error(PyCursesWindowObject *self, + const char *curses_funcname, + const char *python_funcname) +{ + assert(!PyErr_Occurred()); + if (PyErr_CheckSignals()) { + return NULL; + } + cursesmodule_state *state = get_cursesmodule_state_by_win(self); + PyErr_Format(state->error, "%s() (called by %s()): no input", + curses_funcname, python_funcname); + return NULL; +} + /*[clinic input] _curses.window.getch @@ -1737,14 +1752,9 @@ _curses_window_getkey_impl(PyCursesWindowObject *self, int group_right_1, Py_END_ALLOW_THREADS if (rtn == ERR) { - /* getch() returns ERR in nodelay mode */ - PyErr_CheckSignals(); - if (!PyErr_Occurred()) { - cursesmodule_state *state = get_cursesmodule_state_by_win(self); - const char *funcname = group_right_1 ? "mvwgetch" : "wgetch"; - PyErr_Format(state->error, "getkey(): %s(): no input", funcname); - } - return NULL; + /* wgetch() returns ERR in nodelay mode */ + const char *funcname = group_right_1 ? "mvwgetch" : "wgetch"; + return curses_check_signals_on_input_error(self, funcname, "getkey"); } else if (rtn <= 255) { #ifdef NCURSES_VERSION_MAJOR #if NCURSES_VERSION_MAJOR*100+NCURSES_VERSION_MINOR <= 507 @@ -1797,14 +1807,9 @@ _curses_window_get_wch_impl(PyCursesWindowObject *self, int group_right_1, Py_END_ALLOW_THREADS if (ct == ERR) { - if (PyErr_CheckSignals()) - return NULL; - - /* get_wch() returns ERR in nodelay mode */ - cursesmodule_state *state = get_cursesmodule_state_by_win(self); + /* wget_wch() returns ERR in nodelay mode */ const char *funcname = group_right_1 ? "mvwget_wch" : "wget_wch"; - PyErr_Format(state->error, "get_wch(): %s(): no input", funcname); - return NULL; + return curses_check_signals_on_input_error(self, funcname, "get_wch"); } if (ct == KEY_CODE_YES) return PyLong_FromLong(rtn); From 30dde1eeb3fa1e0e7417f9cdded8fd90766f2559 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 27 May 2025 12:15:16 +0200 Subject: [PATCH 372/989] gh-133579: consistently report C curses function failures (#134327) Some curses module-level functions and window methods now raise a `curses.error` when a call to a C curses function fails: - Module-level functions: assume_default_colors, baudrate, cbreak, echo, longname, initscr, nl, raw, termattrs, termname, and unctrl. - Window methods: addch, addnstr, addstr, border, box, chgat, getbkgd, inch, insstr, and insnstr. In addition, `curses.window.refresh` and `curses.window.noutrefresh` now raise a `TypeError` instead of a `curses.error` when called with an incorrect number of arguments for pads. See also ee36db550076e5a9185444ffbc53eaf8157ef04c for similar changes. --- Doc/howto/curses.rst | 2 + ...-05-27-11-13-51.gh-issue-133579.KY9M6S.rst | 8 + ...-05-27-11-18-13.gh-issue-133579.ohtgdC.rst | 3 + ...-05-27-11-24-38.gh-issue-133579.WGPUC1.rst | 7 + Modules/_cursesmodule.c | 240 ++++++++++++------ Modules/clinic/_cursesmodule.c.h | 25 +- 6 files changed, 192 insertions(+), 93 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-27-11-13-51.gh-issue-133579.KY9M6S.rst create mode 100644 Misc/NEWS.d/next/Library/2025-05-27-11-18-13.gh-issue-133579.ohtgdC.rst create mode 100644 Misc/NEWS.d/next/Library/2025-05-27-11-24-38.gh-issue-133579.WGPUC1.rst diff --git a/Doc/howto/curses.rst b/Doc/howto/curses.rst index 6994a5328e8149..816639552d7cd6 100644 --- a/Doc/howto/curses.rst +++ b/Doc/howto/curses.rst @@ -161,6 +161,8 @@ your terminal won't be left in a funny state on exception and you'll be able to read the exception's message and traceback. +.. _windows-and-pads: + Windows and Pads ================ diff --git a/Misc/NEWS.d/next/Library/2025-05-27-11-13-51.gh-issue-133579.KY9M6S.rst b/Misc/NEWS.d/next/Library/2025-05-27-11-13-51.gh-issue-133579.KY9M6S.rst new file mode 100644 index 00000000000000..129d5d9842576b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-27-11-13-51.gh-issue-133579.KY9M6S.rst @@ -0,0 +1,8 @@ +:ref:`curses.window `: Consistently report failures +of curses C API calls in Window methods by raising a :exc:`curses.error`. +This affects :meth:`~curses.window.addch`, :meth:`~curses.window.addnstr`, +:meth:`~curses.window.addstr`, :meth:`~curses.window.border`, +:meth:`~curses.window.box`, :meth:`~curses.window.chgat`, +:meth:`~curses.window.getbkgd`, :meth:`~curses.window.inch`, +:meth:`~curses.window.insstr` and :meth:`~curses.window.insnstr`. +Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/Library/2025-05-27-11-18-13.gh-issue-133579.ohtgdC.rst b/Misc/NEWS.d/next/Library/2025-05-27-11-18-13.gh-issue-133579.ohtgdC.rst new file mode 100644 index 00000000000000..e0ef959f125d62 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-27-11-18-13.gh-issue-133579.ohtgdC.rst @@ -0,0 +1,3 @@ +:meth:`curses.window.refresh` and :meth:`curses.window.noutrefresh` now raise +a :exc:`TypeError` instead of :exc:`curses.error` when called with an incorrect +number of arguments for :ref:`pads `. Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/Library/2025-05-27-11-24-38.gh-issue-133579.WGPUC1.rst b/Misc/NEWS.d/next/Library/2025-05-27-11-24-38.gh-issue-133579.WGPUC1.rst new file mode 100644 index 00000000000000..552b7ca1a7187d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-27-11-24-38.gh-issue-133579.WGPUC1.rst @@ -0,0 +1,7 @@ +:mod:`curses`: Consistently report failures of curses C API calls in +module-level methods by raising a :exc:`curses.error`. This affects +:func:`~curses.assume_default_colors`, :func:`~curses.baudrate`, +:func:`~curses.cbreak`, :func:`~curses.echo`, :func:`~curses.longname`, +:func:`~curses.initscr`, :func:`~curses.nl`, :func:`~curses.raw`, +:func:`~curses.termattrs`, :func:`~curses.termname` and :func:`~curses.unctrl`. +Patch by Bénédikt Tran. diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index 21c2509efe816a..290ae4e55cd7a7 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -932,8 +932,10 @@ PyCursesWindow_dealloc(PyObject *self) PyObject_GC_UnTrack(self); PyCursesWindowObject *wo = (PyCursesWindowObject *)self; if (wo->win != stdscr && wo->win != NULL) { - // silently ignore errors in delwin(3) - (void)delwin(wo->win); + if (delwin(wo->win) == ERR) { + curses_window_set_error(wo, "delwin", "__del__"); + PyErr_FormatUnraisable("Exception ignored in delwin()"); + } } if (wo->encoding != NULL) { PyMem_Free(wo->encoding); @@ -1001,7 +1003,11 @@ _curses_window_addch_impl(PyCursesWindowObject *self, int group_left_1, type = PyCurses_ConvertToCchar_t(self, ch, &cch, wstr); if (type == 2) { wstr[1] = L'\0'; - setcchar(&wcval, wstr, attr, PAIR_NUMBER(attr), NULL); + rtn = setcchar(&wcval, wstr, attr, PAIR_NUMBER(attr), NULL); + if (rtn == ERR) { + curses_window_set_error(self, "setcchar", "addch"); + return NULL; + } if (coordinates_group) { rtn = mvwadd_wch(self->win,y,x, &wcval); funcname = "mvwadd_wch"; @@ -1031,6 +1037,27 @@ _curses_window_addch_impl(PyCursesWindowObject *self, int group_left_1, return curses_window_check_err(self, rtn, funcname, "addch"); } +#ifdef HAVE_NCURSESW +#define curses_release_wstr(STRTYPE, WSTR) \ + do { \ + if ((STRTYPE) == 2) { \ + PyMem_Free((WSTR)); \ + } \ + } while (0) +#else +#define curses_release_wstr(_STRTYPE, _WSTR) +#endif + +static int +curses_wattrset(PyCursesWindowObject *self, long attr, const char *funcname) +{ + if (wattrset(self->win, attr) == ERR) { + curses_window_set_error(self, "wattrset", funcname); + return -1; + } + return 0; +} + /*[clinic input] _curses.window.addstr @@ -1084,7 +1111,10 @@ _curses_window_addstr_impl(PyCursesWindowObject *self, int group_left_1, } if (use_attr) { attr_old = getattrs(self->win); - (void)wattrset(self->win,attr); + if (curses_wattrset(self, attr, "addstr") < 0) { + curses_release_wstr(strtype, wstr); + return NULL; + } } #ifdef HAVE_NCURSESW if (strtype == 2) { @@ -1112,9 +1142,15 @@ _curses_window_addstr_impl(PyCursesWindowObject *self, int group_left_1, } Py_DECREF(bytesobj); } - if (use_attr) - (void)wattrset(self->win,attr_old); - return curses_window_check_err(self, rtn, funcname, "addstr"); + if (rtn == ERR) { + curses_window_set_error(self, funcname, "addstr"); + return NULL; + } + if (use_attr) { + rtn = wattrset(self->win, attr_old); + return curses_window_check_err(self, rtn, "wattrset", "addstr"); + } + Py_RETURN_NONE; } /*[clinic input] @@ -1173,7 +1209,10 @@ _curses_window_addnstr_impl(PyCursesWindowObject *self, int group_left_1, if (use_attr) { attr_old = getattrs(self->win); - (void)wattrset(self->win,attr); + if (curses_wattrset(self, attr, "addnstr") < 0) { + curses_release_wstr(strtype, wstr); + return NULL; + } } #ifdef HAVE_NCURSESW if (strtype == 2) { @@ -1201,9 +1240,15 @@ _curses_window_addnstr_impl(PyCursesWindowObject *self, int group_left_1, } Py_DECREF(bytesobj); } - if (use_attr) - (void)wattrset(self->win,attr_old); - return curses_window_check_err(self, rtn, funcname, "addnstr"); + if (rtn == ERR) { + curses_window_set_error(self, funcname, "addnstr"); + return NULL; + } + if (use_attr) { + rtn = wattrset(self->win, attr_old); + return curses_window_check_err(self, rtn, "wattrset", "addnstr"); + } + Py_RETURN_NONE; } /*[clinic input] @@ -1345,7 +1390,7 @@ _curses_window_border_impl(PyCursesWindowObject *self, PyObject *ls, /*[clinic end generated code: output=670ef38d3d7c2aa3 input=e015f735d67a240b]*/ { chtype ch[8]; - int i; + int i, rtn; /* Clear the array of parameters */ for(i=0; i<8; i++) @@ -1366,10 +1411,10 @@ _curses_window_border_impl(PyCursesWindowObject *self, PyObject *ls, #undef CONVERTTOCHTYPE - wborder(self->win, - ch[0], ch[1], ch[2], ch[3], - ch[4], ch[5], ch[6], ch[7]); - Py_RETURN_NONE; + rtn = wborder(self->win, + ch[0], ch[1], ch[2], ch[3], + ch[4], ch[5], ch[6], ch[7]); + return curses_window_check_err(self, rtn, "wborder", "border"); } /*[clinic input] @@ -1403,8 +1448,7 @@ _curses_window_box_impl(PyCursesWindowObject *self, int group_right_1, return NULL; } } - box(self->win,ch1,ch2); - Py_RETURN_NONE; + return curses_window_check_err(self, box(self->win, ch1, ch2), "box", NULL); } #if defined(HAVE_NCURSES_H) || defined(MVWDELCH_IS_EXPRESSION) @@ -1498,15 +1542,18 @@ PyCursesWindow_ChgAt(PyObject *op, PyObject *args) if (use_xy) { rtn = mvwchgat(self->win,y,x,num,attr,color,NULL); - touchline(self->win,y,1); funcname = "mvwchgat"; } else { getyx(self->win,y,x); rtn = wchgat(self->win,num,attr,color,NULL); - touchline(self->win,y,1); funcname = "wchgat"; } - return curses_window_check_err(self, rtn, funcname, "chgat"); + if (rtn == ERR) { + curses_window_set_error(self, funcname, "chgat"); + return NULL; + } + rtn = touchline(self->win,y,1); + return curses_window_check_err(self, rtn, "touchline", "chgat"); } #endif @@ -1643,16 +1690,21 @@ _curses_window_enclose_impl(PyCursesWindowObject *self, int y, int x) #endif /*[clinic input] -_curses.window.getbkgd -> long +_curses.window.getbkgd Return the window's current background character/attribute pair. [clinic start generated code]*/ -static long +static PyObject * _curses_window_getbkgd_impl(PyCursesWindowObject *self) -/*[clinic end generated code: output=c52b25dc16b215c3 input=a69db882fa35426c]*/ +/*[clinic end generated code: output=3ff953412b0e6028 input=7cf1f59a31f89df4]*/ { - return (long) getbkgd(self->win); + chtype rtn = getbkgd(self->win); + if (rtn == (chtype)ERR) { + curses_window_set_error(self, "getbkgd", NULL); + return NULL; + } + return PyLong_FromLong(rtn); } static PyObject * @@ -2011,7 +2063,7 @@ _curses_window_insch_impl(PyCursesWindowObject *self, int group_left_1, } /*[clinic input] -_curses.window.inch -> unsigned_long +_curses.window.inch [ y: int @@ -2026,21 +2078,27 @@ Return the character at the given position in the window. The bottom 8 bits are the character proper, and upper bits are the attributes. [clinic start generated code]*/ -static unsigned long +static PyObject * _curses_window_inch_impl(PyCursesWindowObject *self, int group_right_1, int y, int x) -/*[clinic end generated code: output=6c4719fe978fe86a input=fac23ee11e3b3a66]*/ +/*[clinic end generated code: output=97ca8581baaafd06 input=4b4fb43d85b177c3]*/ { - unsigned long rtn; + chtype rtn; + const char *funcname; if (!group_right_1) { rtn = winch(self->win); + funcname = "winch"; } else { rtn = mvwinch(self->win, y, x); + funcname = "mvwinch"; } - - return rtn; + if (rtn == (chtype)ERR) { + curses_window_set_error(self, funcname, "inch"); + return NULL; + } + return PyLong_FromUnsignedLong(rtn); } PyDoc_STRVAR(_curses_window_instr__doc__, @@ -2150,7 +2208,10 @@ _curses_window_insstr_impl(PyCursesWindowObject *self, int group_left_1, if (use_attr) { attr_old = getattrs(self->win); - (void)wattrset(self->win, (attr_t)attr); + if (curses_wattrset(self, attr, "insstr") < 0) { + curses_release_wstr(strtype, wstr); + return NULL; + } } #ifdef HAVE_NCURSESW if (strtype == 2) { @@ -2178,9 +2239,15 @@ _curses_window_insstr_impl(PyCursesWindowObject *self, int group_left_1, } Py_DECREF(bytesobj); } - if (use_attr) - (void)wattrset(self->win,attr_old); - return curses_window_check_err(self, rtn, funcname, "insstr"); + if (rtn == ERR) { + curses_window_set_error(self, funcname, "insstr"); + return NULL; + } + if (use_attr) { + rtn = wattrset(self->win, attr_old); + return curses_window_check_err(self, rtn, "wattrset", "insstr"); + } + Py_RETURN_NONE; } /*[clinic input] @@ -2241,7 +2308,10 @@ _curses_window_insnstr_impl(PyCursesWindowObject *self, int group_left_1, if (use_attr) { attr_old = getattrs(self->win); - (void)wattrset(self->win, (attr_t)attr); + if (curses_wattrset(self, attr, "insnstr") < 0) { + curses_release_wstr(strtype, wstr); + return NULL; + } } #ifdef HAVE_NCURSESW if (strtype == 2) { @@ -2269,9 +2339,15 @@ _curses_window_insnstr_impl(PyCursesWindowObject *self, int group_left_1, } Py_DECREF(bytesobj); } - if (use_attr) - (void)wattrset(self->win,attr_old); - return curses_window_check_err(self, rtn, funcname, "insnstr"); + if (rtn == ERR) { + curses_window_set_error(self, funcname, "insnstr"); + return NULL; + } + if (use_attr) { + rtn = wattrset(self->win, attr_old); + return curses_window_check_err(self, rtn, "wattrset", "insnstr"); + } + Py_RETURN_NONE; } /*[clinic input] @@ -2347,8 +2423,7 @@ _curses_window_noutrefresh_impl(PyCursesWindowObject *self) #ifdef py_is_pad if (py_is_pad(self->win)) { if (!group_right_1) { - cursesmodule_state *state = get_cursesmodule_state_by_win(self); - PyErr_SetString(state->error, + PyErr_SetString(PyExc_TypeError, "noutrefresh() called for a pad " "requires 6 arguments"); return NULL; @@ -2574,8 +2649,7 @@ _curses_window_refresh_impl(PyCursesWindowObject *self, int group_right_1, #ifdef py_is_pad if (py_is_pad(self->win)) { if (!group_right_1) { - cursesmodule_state *state = get_cursesmodule_state_by_win(self); - PyErr_SetString(state->error, + PyErr_SetString(PyExc_TypeError, "refresh() for a pad requires 6 arguments"); return NULL; } @@ -2964,12 +3038,15 @@ static PyType_Spec PyCursesWindow_Type_spec = { * * These macros should only be used for generating the body of * the module's methods since they need a module reference. + * + * The Python function name must be the same as the curses function name (X). */ -#define NoArgNoReturnFunctionBody(X) \ -{ \ - PyCursesStatefulInitialised(module); \ - return curses_check_err(module, X(), # X, NULL); } +#define NoArgNoReturnFunctionBody(X) \ +{ \ + PyCursesStatefulInitialised(module); \ + return curses_check_err(module, X(), # X, NULL); \ +} #define NoArgOrFlagNoReturnFunctionBody(X, FLAG) \ { \ @@ -2987,26 +3064,40 @@ static PyType_Spec PyCursesWindow_Type_spec = { return curses_check_err(module, rtn, funcname, # X); \ } -#define NoArgReturnIntFunctionBody(X) \ -{ \ - PyCursesStatefulInitialised(module); \ - return PyLong_FromLong((long) X()); } +#define NoArgReturnIntFunctionBody(X) \ +{ \ + PyCursesStatefulInitialised(module); \ + int rtn = X(); \ + if (rtn == ERR) { \ + curses_set_error(module, # X, NULL); \ + return NULL; \ + } \ + return PyLong_FromLong(rtn); \ +} -#define NoArgReturnStringFunctionBody(X) \ -{ \ - PyCursesStatefulInitialised(module); \ - return PyBytes_FromString(X()); } +#define NoArgReturnStringFunctionBody(X) \ +{ \ + PyCursesStatefulInitialised(module); \ + const char *res = X(); \ + if (res == NULL) { \ + curses_set_null_error(module, # X, NULL); \ + return NULL; \ + } \ + return PyBytes_FromString(res); \ +} -#define NoArgTrueFalseFunctionBody(X) \ -{ \ - PyCursesStatefulInitialised(module); \ - return PyBool_FromLong(X()); } +#define NoArgTrueFalseFunctionBody(X) \ +{ \ + PyCursesStatefulInitialised(module); \ + return PyBool_FromLong(X()); \ +} -#define NoArgNoReturnVoidFunctionBody(X) \ -{ \ - PyCursesStatefulInitialised(module); \ - X(); \ - Py_RETURN_NONE; } +#define NoArgNoReturnVoidFunctionBody(X) \ +{ \ + PyCursesStatefulInitialised(module); \ + X(); \ + Py_RETURN_NONE; \ +} /********************************************************************* Global Functions @@ -3617,8 +3708,12 @@ _curses_initscr_impl(PyObject *module) WINDOW *win; if (curses_initscr_called) { - wrefresh(stdscr); cursesmodule_state *state = get_cursesmodule_state(module); + int code = wrefresh(stdscr); + if (code == ERR) { + _curses_set_null_error(state, "wrefresh", "initscr"); + return NULL; + } return PyCursesWindow_New(state, stdscr, NULL, NULL); } @@ -4802,7 +4897,12 @@ _curses_unctrl(PyObject *module, PyObject *ch) if (!PyCurses_ConvertToChtype(NULL, ch, &ch_)) return NULL; - return PyBytes_FromString(unctrl(ch_)); + const char *res = unctrl(ch_); + if (res == NULL) { + curses_set_null_error(module, "unctrl", NULL); + return NULL; + } + return PyBytes_FromString(res); } /*[clinic input] @@ -4971,13 +5071,7 @@ _curses_assume_default_colors_impl(PyObject *module, int fg, int bg) PyCursesStatefulInitialisedColor(module); code = assume_default_colors(fg, bg); - if (code != ERR) { - Py_RETURN_NONE; - } else { - cursesmodule_state *state = get_cursesmodule_state(module); - PyErr_SetString(state->error, "assume_default_colors() returned ERR"); - return NULL; - } + return curses_check_err(module, code, "assume_default_colors", NULL); } #endif /* STRICT_SYSV_CURSES */ diff --git a/Modules/clinic/_cursesmodule.c.h b/Modules/clinic/_cursesmodule.c.h index a898a7e17cf8d1..49c864318c8aa8 100644 --- a/Modules/clinic/_cursesmodule.c.h +++ b/Modules/clinic/_cursesmodule.c.h @@ -733,23 +733,13 @@ PyDoc_STRVAR(_curses_window_getbkgd__doc__, #define _CURSES_WINDOW_GETBKGD_METHODDEF \ {"getbkgd", (PyCFunction)_curses_window_getbkgd, METH_NOARGS, _curses_window_getbkgd__doc__}, -static long +static PyObject * _curses_window_getbkgd_impl(PyCursesWindowObject *self); static PyObject * _curses_window_getbkgd(PyObject *self, PyObject *Py_UNUSED(ignored)) { - PyObject *return_value = NULL; - long _return_value; - - _return_value = _curses_window_getbkgd_impl((PyCursesWindowObject *)self); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromLong(_return_value); - -exit: - return return_value; + return _curses_window_getbkgd_impl((PyCursesWindowObject *)self); } PyDoc_STRVAR(_curses_window_getch__doc__, @@ -1050,7 +1040,7 @@ PyDoc_STRVAR(_curses_window_inch__doc__, #define _CURSES_WINDOW_INCH_METHODDEF \ {"inch", (PyCFunction)_curses_window_inch, METH_VARARGS, _curses_window_inch__doc__}, -static unsigned long +static PyObject * _curses_window_inch_impl(PyCursesWindowObject *self, int group_right_1, int y, int x); @@ -1061,7 +1051,6 @@ _curses_window_inch(PyObject *self, PyObject *args) int group_right_1 = 0; int y = 0; int x = 0; - unsigned long _return_value; switch (PyTuple_GET_SIZE(args)) { case 0: @@ -1076,11 +1065,7 @@ _curses_window_inch(PyObject *self, PyObject *args) PyErr_SetString(PyExc_TypeError, "_curses.window.inch requires 0 to 2 arguments"); goto exit; } - _return_value = _curses_window_inch_impl((PyCursesWindowObject *)self, group_right_1, y, x); - if ((_return_value == (unsigned long)-1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromUnsignedLong(_return_value); + return_value = _curses_window_inch_impl((PyCursesWindowObject *)self, group_right_1, y, x); exit: return return_value; @@ -4435,4 +4420,4 @@ _curses_has_extended_color_support(PyObject *module, PyObject *Py_UNUSED(ignored #ifndef _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #define _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #endif /* !defined(_CURSES_ASSUME_DEFAULT_COLORS_METHODDEF) */ -/*[clinic end generated code: output=7753612d7613903c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a083473003179b30 input=a9049054013a1b77]*/ From 176b059fa03b3d6abd58c6711bb24111f2245706 Mon Sep 17 00:00:00 2001 From: Wulian233 <1055917385@qq.com> Date: Tue, 27 May 2025 20:45:54 +0800 Subject: [PATCH 373/989] Update README.rst informations from 3.14 to 3.15 (#134649) Co-authored-by: Wulian233 <1055917385@qq.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- README.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 5bad7ea1c36dda..baea5e0978d8fa 100644 --- a/README.rst +++ b/README.rst @@ -135,8 +135,8 @@ libraries for additional performance gains. What's New ---------- -We have a comprehensive overview of the changes in the `What's New in Python -3.14 `_ document. For a more +We have a comprehensive overview of the changes in the `What's new in Python +3.15 `_ document. For a more detailed change log, read `Misc/NEWS `_, but a full accounting of changes can only be gleaned from the `commit history @@ -149,7 +149,7 @@ entitled "Installing multiple versions". Documentation ------------- -`Documentation for Python 3.14 `_ is online, +`Documentation for Python 3.15 `_ is online, updated daily. It can also be downloaded in many formats for faster access. The documentation @@ -200,15 +200,15 @@ intend to install multiple versions using the same prefix you must decide which version (if any) is your "primary" version. Install that version using ``make install``. Install all other versions using ``make altinstall``. -For example, if you want to install Python 2.7, 3.6, and 3.14 with 3.14 being the -primary version, you would execute ``make install`` in your 3.14 build directory +For example, if you want to install Python 2.7, 3.6, and 3.15 with 3.15 being the +primary version, you would execute ``make install`` in your 3.15 build directory and ``make altinstall`` in the others. Release Schedule ---------------- -See `PEP 745 `__ for Python 3.14 release details. +See `PEP 790 `__ for Python 3.15 release details. Copyright and License Information From 9300a596d37d058e6e58d00a2ad70617c863a3de Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 27 May 2025 15:09:46 +0200 Subject: [PATCH 374/989] gh-134744: Fix fcntl error handling (#134748) Fix also reference leak on buffer overflow. --- Lib/test/test_fcntl.py | 13 ++++++++++++- Lib/test/test_ioctl.py | 13 ++++++++++++- Modules/fcntlmodule.c | 6 ++++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_fcntl.py b/Lib/test/test_fcntl.py index e0e6782258fa78..7140a7b4f29188 100644 --- a/Lib/test/test_fcntl.py +++ b/Lib/test/test_fcntl.py @@ -11,7 +11,7 @@ cpython_only, get_pagesize, is_apple, requires_subprocess, verbose ) from test.support.import_helper import import_module -from test.support.os_helper import TESTFN, unlink +from test.support.os_helper import TESTFN, unlink, make_bad_fd # Skip test if no fcntl module. @@ -274,6 +274,17 @@ def test_fcntl_small_buffer(self): def test_fcntl_large_buffer(self): self._check_fcntl_not_mutate_len(2024) + @unittest.skipUnless(hasattr(fcntl, 'F_DUPFD'), 'need fcntl.F_DUPFD') + def test_bad_fd(self): + # gh-134744: Test error handling + fd = make_bad_fd() + with self.assertRaises(OSError): + fcntl.fcntl(fd, fcntl.F_DUPFD, 0) + with self.assertRaises(OSError): + fcntl.fcntl(fd, fcntl.F_DUPFD, b'\0' * 10) + with self.assertRaises(OSError): + fcntl.fcntl(fd, fcntl.F_DUPFD, b'\0' * 2048) + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_ioctl.py b/Lib/test/test_ioctl.py index 3c7a58aa2bc7bf..277d2fc99eaec6 100644 --- a/Lib/test/test_ioctl.py +++ b/Lib/test/test_ioctl.py @@ -5,7 +5,7 @@ import threading import unittest from test import support -from test.support import threading_helper +from test.support import os_helper, threading_helper from test.support.import_helper import import_module fcntl = import_module('fcntl') termios = import_module('termios') @@ -201,6 +201,17 @@ def test_ioctl_set_window_size(self): new_winsz = struct.unpack("HHHH", result) self.assertEqual(new_winsz[:2], (20, 40)) + @unittest.skipUnless(hasattr(fcntl, 'FICLONE'), 'need fcntl.FICLONE') + def test_bad_fd(self): + # gh-134744: Test error handling + fd = os_helper.make_bad_fd() + with self.assertRaises(OSError): + fcntl.ioctl(fd, fcntl.FICLONE, fd) + with self.assertRaises(OSError): + fcntl.ioctl(fd, fcntl.FICLONE, b'\0' * 10) + with self.assertRaises(OSError): + fcntl.ioctl(fd, fcntl.FICLONE, b'\0' * 2048) + if __name__ == "__main__": unittest.main() diff --git a/Modules/fcntlmodule.c b/Modules/fcntlmodule.c index 8b6379f1e6501b..90363b9dca3316 100644 --- a/Modules/fcntlmodule.c +++ b/Modules/fcntlmodule.c @@ -128,7 +128,7 @@ fcntl_fcntl_impl(PyObject *module, int fd, int code, PyObject *arg) Py_END_ALLOW_THREADS } while (ret == -1 && errno == EINTR && !(async_err = PyErr_CheckSignals())); if (ret < 0) { - if (async_err) { + if (!async_err) { PyErr_SetFromErrno(PyExc_OSError); } Py_DECREF(result); @@ -136,6 +136,7 @@ fcntl_fcntl_impl(PyObject *module, int fd, int code, PyObject *arg) } if (ptr[len] != '\0') { PyErr_SetString(PyExc_SystemError, "buffer overflow"); + Py_DECREF(result); return NULL; } return result; @@ -310,7 +311,7 @@ fcntl_ioctl_impl(PyObject *module, int fd, unsigned long code, PyObject *arg, Py_END_ALLOW_THREADS } while (ret == -1 && errno == EINTR && !(async_err = PyErr_CheckSignals())); if (ret < 0) { - if (async_err) { + if (!async_err) { PyErr_SetFromErrno(PyExc_OSError); } Py_DECREF(result); @@ -318,6 +319,7 @@ fcntl_ioctl_impl(PyObject *module, int fd, unsigned long code, PyObject *arg, } if (ptr[len] != '\0') { PyErr_SetString(PyExc_SystemError, "buffer overflow"); + Py_DECREF(result); return NULL; } return result; From f6f4e8a6622d556641799b02aed7ac018d878cdc Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 27 May 2025 15:59:45 +0100 Subject: [PATCH 375/989] GH-132554: "Virtual" iterators (GH-132555) * FOR_ITER now pushes either the iterator and NULL or leaves the iterable and pushes tagged zero * NEXT_ITER uses the tagged int as the index into the sequence or, if TOS is NULL, iterates as before. --- Include/internal/pycore_ceval.h | 2 + Include/internal/pycore_code.h | 2 +- Include/internal/pycore_compile.h | 1 + Include/internal/pycore_magic_number.h | 4 +- Include/internal/pycore_opcode_metadata.h | 40 +-- Include/internal/pycore_stackref.h | 74 +++-- Include/internal/pycore_uop_ids.h | 1 + Include/internal/pycore_uop_metadata.h | 6 +- Lib/test/test_dis.py | 207 ++++++------ Lib/test/test_list.py | 15 + ...-04-30-14-13-01.gh-issue-132554.GqQaUp.rst | 4 + Objects/frameobject.c | 4 + Programs/test_frozenmain.h | 2 +- Python/bytecodes.c | 300 +++++++++--------- Python/ceval.c | 20 ++ Python/codegen.c | 41 ++- Python/executor_cases.c.h | 189 +++++------ Python/flowgraph.c | 17 +- Python/generated_cases.c.h | 289 +++++++++-------- Python/optimizer_bytecodes.c | 4 +- Python/optimizer_cases.c.h | 11 + Python/specialize.c | 78 ++--- Python/stackrefs.c | 8 + Tools/cases_generator/analyzer.py | 3 + Tools/cases_generator/generators_common.py | 1 + 25 files changed, 709 insertions(+), 614 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-30-14-13-01.gh-issue-132554.GqQaUp.rst diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 3d8247df31ce32..092feeb40b04a3 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -353,6 +353,8 @@ PyAPI_FUNC(_PyStackRef) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyS extern int _PyRunRemoteDebugger(PyThreadState *tstate); #endif +_PyStackRef _PyForIter_NextWithIndex(PyObject *seq, _PyStackRef index); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 439989c60f6f24..8e1415f27b63f3 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -313,7 +313,7 @@ extern void _Py_Specialize_CompareOp(_PyStackRef lhs, _PyStackRef rhs, _Py_CODEUNIT *instr, int oparg); extern void _Py_Specialize_UnpackSequence(_PyStackRef seq, _Py_CODEUNIT *instr, int oparg); -extern void _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg); +extern void _Py_Specialize_ForIter(_PyStackRef iter, _PyStackRef null_or_index, _Py_CODEUNIT *instr, int oparg); extern void _Py_Specialize_Send(_PyStackRef receiver, _Py_CODEUNIT *instr); extern void _Py_Specialize_ToBool(_PyStackRef value, _Py_CODEUNIT *instr); extern void _Py_Specialize_ContainsOp(_PyStackRef value, _Py_CODEUNIT *instr); diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h index aecc50be1e6c34..c18e04bf67a5df 100644 --- a/Include/internal/pycore_compile.h +++ b/Include/internal/pycore_compile.h @@ -95,6 +95,7 @@ typedef enum { enum _PyCompile_FBlockType { COMPILE_FBLOCK_WHILE_LOOP, COMPILE_FBLOCK_FOR_LOOP, + COMPILE_FBLOCK_ASYNC_FOR_LOOP, COMPILE_FBLOCK_TRY_EXCEPT, COMPILE_FBLOCK_FINALLY_TRY, COMPILE_FBLOCK_FINALLY_END, diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index 3fd56c346b9446..cd1fc873623ed1 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -279,6 +279,8 @@ Known values: Python 3.14b1 3624 (Don't optimize LOAD_FAST when local is killed by DELETE_FAST) Python 3.15a0 3650 (Initial version) Python 3.15a1 3651 (Simplify LOAD_CONST) + Python 3.15a1 3652 (Virtual iterators) + Python 3.16 will start with 3700 @@ -291,7 +293,7 @@ PC/launcher.c must also be updated. */ -#define PYC_MAGIC_NUMBER 3651 +#define PYC_MAGIC_NUMBER 3652 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes (little-endian) and then appending b'\r\n'. */ #define PYC_MAGIC_NUMBER_TOKEN \ diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 39d6a912a5437f..00e918cb8f0cd1 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -205,15 +205,15 @@ int _PyOpcode_num_popped(int opcode, int oparg) { case FORMAT_WITH_SPEC: return 2; case FOR_ITER: - return 1; + return 2; case FOR_ITER_GEN: - return 1; + return 2; case FOR_ITER_LIST: - return 1; + return 2; case FOR_ITER_RANGE: - return 1; + return 2; case FOR_ITER_TUPLE: - return 1; + return 2; case GET_AITER: return 1; case GET_ANEXT: @@ -239,11 +239,11 @@ int _PyOpcode_num_popped(int opcode, int oparg) { case INSTRUMENTED_END_ASYNC_FOR: return 2; case INSTRUMENTED_END_FOR: - return 2; + return 3; case INSTRUMENTED_END_SEND: return 2; case INSTRUMENTED_FOR_ITER: - return 1; + return 2; case INSTRUMENTED_INSTRUCTION: return 0; case INSTRUMENTED_JUMP_BACKWARD: @@ -257,7 +257,7 @@ int _PyOpcode_num_popped(int opcode, int oparg) { case INSTRUMENTED_NOT_TAKEN: return 0; case INSTRUMENTED_POP_ITER: - return 1; + return 2; case INSTRUMENTED_POP_JUMP_IF_FALSE: return 1; case INSTRUMENTED_POP_JUMP_IF_NONE: @@ -395,7 +395,7 @@ int _PyOpcode_num_popped(int opcode, int oparg) { case POP_EXCEPT: return 1; case POP_ITER: - return 1; + return 2; case POP_JUMP_IF_FALSE: return 1; case POP_JUMP_IF_NONE: @@ -688,15 +688,15 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { case FORMAT_WITH_SPEC: return 1; case FOR_ITER: - return 2; + return 3; case FOR_ITER_GEN: - return 1; - case FOR_ITER_LIST: return 2; + case FOR_ITER_LIST: + return 3; case FOR_ITER_RANGE: - return 2; + return 3; case FOR_ITER_TUPLE: - return 2; + return 3; case GET_AITER: return 1; case GET_ANEXT: @@ -704,7 +704,7 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { case GET_AWAITABLE: return 1; case GET_ITER: - return 1; + return 2; case GET_LEN: return 2; case GET_YIELD_FROM_ITER: @@ -722,11 +722,11 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { case INSTRUMENTED_END_ASYNC_FOR: return 0; case INSTRUMENTED_END_FOR: - return 1; + return 2; case INSTRUMENTED_END_SEND: return 1; case INSTRUMENTED_FOR_ITER: - return 2; + return 3; case INSTRUMENTED_INSTRUCTION: return 0; case INSTRUMENTED_JUMP_BACKWARD: @@ -1157,7 +1157,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG }, [GET_AITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [GET_ANEXT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [GET_AWAITABLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1242,7 +1242,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [NOT_TAKEN] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, - [POP_ITER] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, + [POP_ITER] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, @@ -1453,7 +1453,7 @@ _PyOpcode_macro_expansion[256] = { [NOP] = { .nuops = 1, .uops = { { _NOP, OPARG_SIMPLE, 0 } } }, [NOT_TAKEN] = { .nuops = 1, .uops = { { _NOP, OPARG_SIMPLE, 0 } } }, [POP_EXCEPT] = { .nuops = 1, .uops = { { _POP_EXCEPT, OPARG_SIMPLE, 0 } } }, - [POP_ITER] = { .nuops = 1, .uops = { { _POP_TOP, OPARG_SIMPLE, 0 } } }, + [POP_ITER] = { .nuops = 1, .uops = { { _POP_ITER, OPARG_SIMPLE, 0 } } }, [POP_JUMP_IF_FALSE] = { .nuops = 1, .uops = { { _POP_JUMP_IF_FALSE, OPARG_REPLACED, 1 } } }, [POP_JUMP_IF_NONE] = { .nuops = 2, .uops = { { _IS_NONE, OPARG_SIMPLE, 1 }, { _POP_JUMP_IF_TRUE, OPARG_REPLACED, 1 } } }, [POP_JUMP_IF_NOT_NONE] = { .nuops = 2, .uops = { { _IS_NONE, OPARG_SIMPLE, 1 }, { _POP_JUMP_IF_FALSE, OPARG_REPLACED, 1 } } }, diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 40ec00c8119108..f2ecc30b053568 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -232,6 +232,9 @@ extern intptr_t PyStackRef_UntagInt(_PyStackRef ref); extern _PyStackRef PyStackRef_TagInt(intptr_t i); +/* Increments a tagged int, but does not check for overflow */ +extern _PyStackRef PyStackRef_IncrementTaggedIntNoOverflow(_PyStackRef ref); + extern bool PyStackRef_IsNullOrInt(_PyStackRef ref); @@ -239,11 +242,12 @@ PyStackRef_IsNullOrInt(_PyStackRef ref); #define Py_INT_TAG 3 #define Py_TAG_REFCNT 1 +#define Py_TAG_BITS 3 static inline bool PyStackRef_IsTaggedInt(_PyStackRef i) { - return (i.bits & Py_INT_TAG) == Py_INT_TAG; + return (i.bits & Py_TAG_BITS) == Py_INT_TAG; } static inline _PyStackRef @@ -262,12 +266,21 @@ PyStackRef_UntagInt(_PyStackRef i) } +static inline _PyStackRef +PyStackRef_IncrementTaggedIntNoOverflow(_PyStackRef ref) +{ + assert((ref.bits & Py_TAG_BITS) == Py_INT_TAG); // Is tagged int + assert((ref.bits & (~Py_TAG_BITS)) != (INT_MAX & (~Py_TAG_BITS))); // Isn't about to overflow + return (_PyStackRef){ .bits = ref.bits + 4 }; +} + +#define PyStackRef_IsDeferredOrTaggedInt(ref) (((ref).bits & Py_TAG_REFCNT) != 0) + #ifdef Py_GIL_DISABLED #define Py_TAG_DEFERRED Py_TAG_REFCNT #define Py_TAG_PTR ((uintptr_t)0) -#define Py_TAG_BITS ((uintptr_t)1) static const _PyStackRef PyStackRef_NULL = { .bits = Py_TAG_DEFERRED}; @@ -379,7 +392,7 @@ PyStackRef_FromPyObjectBorrow(PyObject *obj) do { \ _PyStackRef _close_tmp = (REF); \ assert(!PyStackRef_IsNull(_close_tmp)); \ - if (!PyStackRef_IsDeferred(_close_tmp)) { \ + if (!PyStackRef_IsDeferredOrTaggedInt(_close_tmp)) { \ Py_DECREF(PyStackRef_AsPyObjectBorrow(_close_tmp)); \ } \ } while (0) @@ -395,7 +408,7 @@ static inline _PyStackRef PyStackRef_DUP(_PyStackRef stackref) { assert(!PyStackRef_IsNull(stackref)); - if (PyStackRef_IsDeferred(stackref)) { + if (PyStackRef_IsDeferredOrTaggedInt(stackref)) { return stackref; } Py_INCREF(PyStackRef_AsPyObjectBorrow(stackref)); @@ -442,7 +455,6 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) /* References to immortal objects always have their tag bit set to Py_TAG_REFCNT * as they can (must) have their reclamation deferred */ -#define Py_TAG_BITS 3 #if _Py_IMMORTAL_FLAGS != Py_TAG_REFCNT # error "_Py_IMMORTAL_FLAGS != Py_TAG_REFCNT" #endif @@ -678,7 +690,13 @@ PyStackRef_XCLOSE(_PyStackRef ref) #endif // !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) -#define PyStackRef_TYPE(stackref) Py_TYPE(PyStackRef_AsPyObjectBorrow(stackref)) +static inline PyTypeObject * +PyStackRef_TYPE(_PyStackRef stackref) { + if (PyStackRef_IsTaggedInt(stackref)) { + return &PyLong_Type; + } + return Py_TYPE(PyStackRef_AsPyObjectBorrow(stackref)); +} // Converts a PyStackRef back to a PyObject *, converting the // stackref to a new reference. @@ -686,42 +704,30 @@ PyStackRef_XCLOSE(_PyStackRef ref) // StackRef type checks -static inline bool -PyStackRef_GenCheck(_PyStackRef stackref) -{ - return PyGen_Check(PyStackRef_AsPyObjectBorrow(stackref)); -} +#define STACKREF_CHECK_FUNC(T) \ + static inline bool \ + PyStackRef_ ## T ## Check(_PyStackRef stackref) { \ + if (PyStackRef_IsTaggedInt(stackref)) { \ + return false; \ + } \ + return Py ## T ## _Check(PyStackRef_AsPyObjectBorrow(stackref)); \ + } -static inline bool -PyStackRef_BoolCheck(_PyStackRef stackref) -{ - return PyBool_Check(PyStackRef_AsPyObjectBorrow(stackref)); -} +STACKREF_CHECK_FUNC(Gen) +STACKREF_CHECK_FUNC(Bool) +STACKREF_CHECK_FUNC(ExceptionInstance) +STACKREF_CHECK_FUNC(Code) +STACKREF_CHECK_FUNC(Function) static inline bool PyStackRef_LongCheck(_PyStackRef stackref) { + if (PyStackRef_IsTaggedInt(stackref)) { + return true; + } return PyLong_Check(PyStackRef_AsPyObjectBorrow(stackref)); } -static inline bool -PyStackRef_ExceptionInstanceCheck(_PyStackRef stackref) -{ - return PyExceptionInstance_Check(PyStackRef_AsPyObjectBorrow(stackref)); -} - -static inline bool -PyStackRef_CodeCheck(_PyStackRef stackref) -{ - return PyCode_Check(PyStackRef_AsPyObjectBorrow(stackref)); -} - -static inline bool -PyStackRef_FunctionCheck(_PyStackRef stackref) -{ - return PyFunction_Check(PyStackRef_AsPyObjectBorrow(stackref)); -} - static inline void _PyThreadState_PushCStackRef(PyThreadState *tstate, _PyCStackRef *ref) { diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index d08799487fda42..2b845527cf2ed5 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -272,6 +272,7 @@ extern "C" { #define _POP_CALL_TWO 489 #define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490 #define _POP_EXCEPT POP_EXCEPT +#define _POP_ITER POP_ITER #define _POP_JUMP_IF_FALSE 491 #define _POP_JUMP_IF_TRUE 492 #define _POP_TOP POP_TOP diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 5ebe124983b862..b08909e72c4f43 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -66,6 +66,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, + [_POP_ITER] = HAS_ESCAPES_FLAG, [_END_SEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNARY_NOT] = HAS_PURE_FLAG, @@ -205,7 +206,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_FOR_ITER_TIER_TWO] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_ITER_CHECK_LIST] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_LIST] = HAS_EXIT_FLAG, - [_ITER_NEXT_LIST_TIER_TWO] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, + [_ITER_NEXT_LIST_TIER_TWO] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_ITER_CHECK_TUPLE] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_EXIT_FLAG, [_ITER_NEXT_TUPLE] = 0, @@ -569,6 +570,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_CALL_TWO] = "_POP_CALL_TWO", [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", [_POP_EXCEPT] = "_POP_EXCEPT", + [_POP_ITER] = "_POP_ITER", [_POP_TOP] = "_POP_TOP", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", @@ -730,6 +732,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _END_FOR: return 1; + case _POP_ITER: + return 2; case _END_SEND: return 2; case _UNARY_NEGATIVE: diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 547f6de5f5aada..ec930a728aa5b3 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -851,7 +851,7 @@ def foo(x): %4d RETURN_GENERATOR POP_TOP L1: RESUME 0 - LOAD_FAST_BORROW 0 (.0) + LOAD_FAST 0 (.0) GET_ITER L2: FOR_ITER 14 (to L3) STORE_FAST 1 (z) @@ -1821,7 +1821,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_SMALL_INT', arg=10, argval=10, argrepr='', offset=12, start_offset=12, starts_line=False, line_number=3), make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=14, start_offset=14, starts_line=False, line_number=3, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='GET_ITER', arg=None, argval=None, argrepr='', offset=22, start_offset=22, starts_line=False, line_number=3), - make_inst(opname='FOR_ITER', arg=32, argval=92, argrepr='to L4', offset=24, start_offset=24, starts_line=False, line_number=3, label=1, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='FOR_ITER', arg=33, argval=94, argrepr='to L4', offset=24, start_offset=24, starts_line=False, line_number=3, label=1, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='STORE_FAST', arg=0, argval='i', argrepr='i', offset=28, start_offset=28, starts_line=False, line_number=3), make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=30, start_offset=30, starts_line=True, line_number=4, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=40, start_offset=40, starts_line=False, line_number=4), @@ -1840,110 +1840,111 @@ def _prepare_test_cases(): make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=82, start_offset=82, starts_line=False, line_number=7), make_inst(opname='JUMP_BACKWARD', arg=32, argval=24, argrepr='to L1', offset=84, start_offset=84, starts_line=False, line_number=7, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=88, start_offset=88, starts_line=True, line_number=8, label=3), - make_inst(opname='JUMP_FORWARD', arg=13, argval=118, argrepr='to L5', offset=90, start_offset=90, starts_line=False, line_number=8), - make_inst(opname='END_FOR', arg=None, argval=None, argrepr='', offset=92, start_offset=92, starts_line=True, line_number=3, label=4), - make_inst(opname='POP_ITER', arg=None, argval=None, argrepr='', offset=94, start_offset=94, starts_line=False, line_number=3), - make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=96, start_offset=96, starts_line=True, line_number=10, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_CONST', arg=1, argval='I can haz else clause?', argrepr="'I can haz else clause?'", offset=106, start_offset=106, starts_line=False, line_number=10), - make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=108, start_offset=108, starts_line=False, line_number=10, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=116, start_offset=116, starts_line=False, line_number=10), - make_inst(opname='LOAD_FAST_CHECK', arg=0, argval='i', argrepr='i', offset=118, start_offset=118, starts_line=True, line_number=11, label=5), - make_inst(opname='TO_BOOL', arg=None, argval=None, argrepr='', offset=120, start_offset=120, starts_line=False, line_number=11, cache_info=[('counter', 1, b'\x00\x00'), ('version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_JUMP_IF_FALSE', arg=40, argval=212, argrepr='to L8', offset=128, start_offset=128, starts_line=False, line_number=11, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=132, start_offset=132, starts_line=False, line_number=11), - make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=134, start_offset=134, starts_line=True, line_number=12, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=144, start_offset=144, starts_line=False, line_number=12), - make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=146, start_offset=146, starts_line=False, line_number=12, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=154, start_offset=154, starts_line=False, line_number=12), - make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=156, start_offset=156, starts_line=True, line_number=13), - make_inst(opname='LOAD_SMALL_INT', arg=1, argval=1, argrepr='', offset=158, start_offset=158, starts_line=False, line_number=13), - make_inst(opname='BINARY_OP', arg=23, argval=23, argrepr='-=', offset=160, start_offset=160, starts_line=False, line_number=13, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), - make_inst(opname='STORE_FAST', arg=0, argval='i', argrepr='i', offset=172, start_offset=172, starts_line=False, line_number=13), - make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=174, start_offset=174, starts_line=True, line_number=14), - make_inst(opname='LOAD_SMALL_INT', arg=6, argval=6, argrepr='', offset=176, start_offset=176, starts_line=False, line_number=14), - make_inst(opname='COMPARE_OP', arg=148, argval='>', argrepr='bool(>)', offset=178, start_offset=178, starts_line=False, line_number=14, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='POP_JUMP_IF_FALSE', arg=3, argval=192, argrepr='to L6', offset=182, start_offset=182, starts_line=False, line_number=14, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=186, start_offset=186, starts_line=False, line_number=14), - make_inst(opname='JUMP_BACKWARD', arg=37, argval=118, argrepr='to L5', offset=188, start_offset=188, starts_line=True, line_number=15, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=192, start_offset=192, starts_line=True, line_number=16, label=6), - make_inst(opname='LOAD_SMALL_INT', arg=4, argval=4, argrepr='', offset=194, start_offset=194, starts_line=False, line_number=16), - make_inst(opname='COMPARE_OP', arg=18, argval='<', argrepr='bool(<)', offset=196, start_offset=196, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='POP_JUMP_IF_TRUE', arg=3, argval=210, argrepr='to L7', offset=200, start_offset=200, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=204, start_offset=204, starts_line=False, line_number=16), - make_inst(opname='JUMP_BACKWARD', arg=46, argval=118, argrepr='to L5', offset=206, start_offset=206, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='JUMP_FORWARD', arg=11, argval=234, argrepr='to L9', offset=210, start_offset=210, starts_line=True, line_number=17, label=7), - make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=212, start_offset=212, starts_line=True, line_number=19, label=8, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_CONST', arg=2, argval='Who let lolcatz into this test suite?', argrepr="'Who let lolcatz into this test suite?'", offset=222, start_offset=222, starts_line=False, line_number=19), - make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=224, start_offset=224, starts_line=False, line_number=19, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=232, start_offset=232, starts_line=False, line_number=19), - make_inst(opname='NOP', arg=None, argval=None, argrepr='', offset=234, start_offset=234, starts_line=True, line_number=20, label=9), - make_inst(opname='LOAD_SMALL_INT', arg=1, argval=1, argrepr='', offset=236, start_offset=236, starts_line=True, line_number=21), - make_inst(opname='LOAD_SMALL_INT', arg=0, argval=0, argrepr='', offset=238, start_offset=238, starts_line=False, line_number=21), - make_inst(opname='BINARY_OP', arg=11, argval=11, argrepr='/', offset=240, start_offset=240, starts_line=False, line_number=21, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=252, start_offset=252, starts_line=False, line_number=21), - make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=254, start_offset=254, starts_line=True, line_number=25), - make_inst(opname='COPY', arg=1, argval=1, argrepr='', offset=256, start_offset=256, starts_line=False, line_number=25), - make_inst(opname='LOAD_SPECIAL', arg=1, argval=1, argrepr='__exit__', offset=258, start_offset=258, starts_line=False, line_number=25), - make_inst(opname='SWAP', arg=2, argval=2, argrepr='', offset=260, start_offset=260, starts_line=False, line_number=25), - make_inst(opname='SWAP', arg=3, argval=3, argrepr='', offset=262, start_offset=262, starts_line=False, line_number=25), - make_inst(opname='LOAD_SPECIAL', arg=0, argval=0, argrepr='__enter__', offset=264, start_offset=264, starts_line=False, line_number=25), - make_inst(opname='CALL', arg=0, argval=0, argrepr='', offset=266, start_offset=266, starts_line=False, line_number=25, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='STORE_FAST', arg=1, argval='dodgy', argrepr='dodgy', offset=274, start_offset=274, starts_line=False, line_number=25), - make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=276, start_offset=276, starts_line=True, line_number=26, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_CONST', arg=3, argval='Never reach this', argrepr="'Never reach this'", offset=286, start_offset=286, starts_line=False, line_number=26), - make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=288, start_offset=288, starts_line=False, line_number=26, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=296, start_offset=296, starts_line=False, line_number=26), - make_inst(opname='LOAD_CONST', arg=4, argval=None, argrepr='None', offset=298, start_offset=298, starts_line=True, line_number=25), - make_inst(opname='LOAD_CONST', arg=4, argval=None, argrepr='None', offset=300, start_offset=300, starts_line=False, line_number=25), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=90, start_offset=90, starts_line=False, line_number=8), + make_inst(opname='JUMP_FORWARD', arg=13, argval=120, argrepr='to L5', offset=92, start_offset=92, starts_line=False, line_number=8), + make_inst(opname='END_FOR', arg=None, argval=None, argrepr='', offset=94, start_offset=94, starts_line=True, line_number=3, label=4), + make_inst(opname='POP_ITER', arg=None, argval=None, argrepr='', offset=96, start_offset=96, starts_line=False, line_number=3), + make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=98, start_offset=98, starts_line=True, line_number=10, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + make_inst(opname='LOAD_CONST', arg=1, argval='I can haz else clause?', argrepr="'I can haz else clause?'", offset=108, start_offset=108, starts_line=False, line_number=10), + make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=110, start_offset=110, starts_line=False, line_number=10, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=118, start_offset=118, starts_line=False, line_number=10), + make_inst(opname='LOAD_FAST_CHECK', arg=0, argval='i', argrepr='i', offset=120, start_offset=120, starts_line=True, line_number=11, label=5), + make_inst(opname='TO_BOOL', arg=None, argval=None, argrepr='', offset=122, start_offset=122, starts_line=False, line_number=11, cache_info=[('counter', 1, b'\x00\x00'), ('version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_JUMP_IF_FALSE', arg=40, argval=214, argrepr='to L8', offset=130, start_offset=130, starts_line=False, line_number=11, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=134, start_offset=134, starts_line=False, line_number=11), + make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=136, start_offset=136, starts_line=True, line_number=12, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=146, start_offset=146, starts_line=False, line_number=12), + make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=148, start_offset=148, starts_line=False, line_number=12, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=156, start_offset=156, starts_line=False, line_number=12), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=158, start_offset=158, starts_line=True, line_number=13), + make_inst(opname='LOAD_SMALL_INT', arg=1, argval=1, argrepr='', offset=160, start_offset=160, starts_line=False, line_number=13), + make_inst(opname='BINARY_OP', arg=23, argval=23, argrepr='-=', offset=162, start_offset=162, starts_line=False, line_number=13, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), + make_inst(opname='STORE_FAST', arg=0, argval='i', argrepr='i', offset=174, start_offset=174, starts_line=False, line_number=13), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=176, start_offset=176, starts_line=True, line_number=14), + make_inst(opname='LOAD_SMALL_INT', arg=6, argval=6, argrepr='', offset=178, start_offset=178, starts_line=False, line_number=14), + make_inst(opname='COMPARE_OP', arg=148, argval='>', argrepr='bool(>)', offset=180, start_offset=180, starts_line=False, line_number=14, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='POP_JUMP_IF_FALSE', arg=3, argval=194, argrepr='to L6', offset=184, start_offset=184, starts_line=False, line_number=14, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=188, start_offset=188, starts_line=False, line_number=14), + make_inst(opname='JUMP_BACKWARD', arg=37, argval=120, argrepr='to L5', offset=190, start_offset=190, starts_line=True, line_number=15, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=194, start_offset=194, starts_line=True, line_number=16, label=6), + make_inst(opname='LOAD_SMALL_INT', arg=4, argval=4, argrepr='', offset=196, start_offset=196, starts_line=False, line_number=16), + make_inst(opname='COMPARE_OP', arg=18, argval='<', argrepr='bool(<)', offset=198, start_offset=198, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='POP_JUMP_IF_TRUE', arg=3, argval=212, argrepr='to L7', offset=202, start_offset=202, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=206, start_offset=206, starts_line=False, line_number=16), + make_inst(opname='JUMP_BACKWARD', arg=46, argval=120, argrepr='to L5', offset=208, start_offset=208, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='JUMP_FORWARD', arg=11, argval=236, argrepr='to L9', offset=212, start_offset=212, starts_line=True, line_number=17, label=7), + make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=214, start_offset=214, starts_line=True, line_number=19, label=8, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + make_inst(opname='LOAD_CONST', arg=2, argval='Who let lolcatz into this test suite?', argrepr="'Who let lolcatz into this test suite?'", offset=224, start_offset=224, starts_line=False, line_number=19), + make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=226, start_offset=226, starts_line=False, line_number=19, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=234, start_offset=234, starts_line=False, line_number=19), + make_inst(opname='NOP', arg=None, argval=None, argrepr='', offset=236, start_offset=236, starts_line=True, line_number=20, label=9), + make_inst(opname='LOAD_SMALL_INT', arg=1, argval=1, argrepr='', offset=238, start_offset=238, starts_line=True, line_number=21), + make_inst(opname='LOAD_SMALL_INT', arg=0, argval=0, argrepr='', offset=240, start_offset=240, starts_line=False, line_number=21), + make_inst(opname='BINARY_OP', arg=11, argval=11, argrepr='/', offset=242, start_offset=242, starts_line=False, line_number=21, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=254, start_offset=254, starts_line=False, line_number=21), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=256, start_offset=256, starts_line=True, line_number=25), + make_inst(opname='COPY', arg=1, argval=1, argrepr='', offset=258, start_offset=258, starts_line=False, line_number=25), + make_inst(opname='LOAD_SPECIAL', arg=1, argval=1, argrepr='__exit__', offset=260, start_offset=260, starts_line=False, line_number=25), + make_inst(opname='SWAP', arg=2, argval=2, argrepr='', offset=262, start_offset=262, starts_line=False, line_number=25), + make_inst(opname='SWAP', arg=3, argval=3, argrepr='', offset=264, start_offset=264, starts_line=False, line_number=25), + make_inst(opname='LOAD_SPECIAL', arg=0, argval=0, argrepr='__enter__', offset=266, start_offset=266, starts_line=False, line_number=25), + make_inst(opname='CALL', arg=0, argval=0, argrepr='', offset=268, start_offset=268, starts_line=False, line_number=25, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='STORE_FAST', arg=1, argval='dodgy', argrepr='dodgy', offset=276, start_offset=276, starts_line=False, line_number=25), + make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=278, start_offset=278, starts_line=True, line_number=26, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + make_inst(opname='LOAD_CONST', arg=3, argval='Never reach this', argrepr="'Never reach this'", offset=288, start_offset=288, starts_line=False, line_number=26), + make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=290, start_offset=290, starts_line=False, line_number=26, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=298, start_offset=298, starts_line=False, line_number=26), + make_inst(opname='LOAD_CONST', arg=4, argval=None, argrepr='None', offset=300, start_offset=300, starts_line=True, line_number=25), make_inst(opname='LOAD_CONST', arg=4, argval=None, argrepr='None', offset=302, start_offset=302, starts_line=False, line_number=25), - make_inst(opname='CALL', arg=3, argval=3, argrepr='', offset=304, start_offset=304, starts_line=False, line_number=25, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=312, start_offset=312, starts_line=False, line_number=25), - make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=314, start_offset=314, starts_line=True, line_number=28, label=10, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_CONST', arg=6, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=324, start_offset=324, starts_line=False, line_number=28), - make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=326, start_offset=326, starts_line=False, line_number=28, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=334, start_offset=334, starts_line=False, line_number=28), - make_inst(opname='LOAD_CONST', arg=4, argval=None, argrepr='None', offset=336, start_offset=336, starts_line=False, line_number=28), - make_inst(opname='RETURN_VALUE', arg=None, argval=None, argrepr='', offset=338, start_offset=338, starts_line=False, line_number=28), - make_inst(opname='PUSH_EXC_INFO', arg=None, argval=None, argrepr='', offset=340, start_offset=340, starts_line=True, line_number=25), - make_inst(opname='WITH_EXCEPT_START', arg=None, argval=None, argrepr='', offset=342, start_offset=342, starts_line=False, line_number=25), - make_inst(opname='TO_BOOL', arg=None, argval=None, argrepr='', offset=344, start_offset=344, starts_line=False, line_number=25, cache_info=[('counter', 1, b'\x00\x00'), ('version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_JUMP_IF_TRUE', arg=2, argval=360, argrepr='to L11', offset=352, start_offset=352, starts_line=False, line_number=25, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=356, start_offset=356, starts_line=False, line_number=25), - make_inst(opname='RERAISE', arg=2, argval=2, argrepr='', offset=358, start_offset=358, starts_line=False, line_number=25), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=360, start_offset=360, starts_line=False, line_number=25, label=11), - make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=362, start_offset=362, starts_line=False, line_number=25), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=364, start_offset=364, starts_line=False, line_number=25), + make_inst(opname='LOAD_CONST', arg=4, argval=None, argrepr='None', offset=304, start_offset=304, starts_line=False, line_number=25), + make_inst(opname='CALL', arg=3, argval=3, argrepr='', offset=306, start_offset=306, starts_line=False, line_number=25, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=314, start_offset=314, starts_line=False, line_number=25), + make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=316, start_offset=316, starts_line=True, line_number=28, label=10, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + make_inst(opname='LOAD_CONST', arg=6, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=326, start_offset=326, starts_line=False, line_number=28), + make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=328, start_offset=328, starts_line=False, line_number=28, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=336, start_offset=336, starts_line=False, line_number=28), + make_inst(opname='LOAD_CONST', arg=4, argval=None, argrepr='None', offset=338, start_offset=338, starts_line=False, line_number=28), + make_inst(opname='RETURN_VALUE', arg=None, argval=None, argrepr='', offset=340, start_offset=340, starts_line=False, line_number=28), + make_inst(opname='PUSH_EXC_INFO', arg=None, argval=None, argrepr='', offset=342, start_offset=342, starts_line=True, line_number=25), + make_inst(opname='WITH_EXCEPT_START', arg=None, argval=None, argrepr='', offset=344, start_offset=344, starts_line=False, line_number=25), + make_inst(opname='TO_BOOL', arg=None, argval=None, argrepr='', offset=346, start_offset=346, starts_line=False, line_number=25, cache_info=[('counter', 1, b'\x00\x00'), ('version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_JUMP_IF_TRUE', arg=2, argval=362, argrepr='to L11', offset=354, start_offset=354, starts_line=False, line_number=25, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=358, start_offset=358, starts_line=False, line_number=25), + make_inst(opname='RERAISE', arg=2, argval=2, argrepr='', offset=360, start_offset=360, starts_line=False, line_number=25), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=362, start_offset=362, starts_line=False, line_number=25, label=11), + make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=364, start_offset=364, starts_line=False, line_number=25), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=366, start_offset=366, starts_line=False, line_number=25), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=368, start_offset=368, starts_line=False, line_number=25), - make_inst(opname='JUMP_BACKWARD_NO_INTERRUPT', arg=29, argval=314, argrepr='to L10', offset=370, start_offset=370, starts_line=False, line_number=25), - make_inst(opname='COPY', arg=3, argval=3, argrepr='', offset=372, start_offset=372, starts_line=True, line_number=None), - make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=374, start_offset=374, starts_line=False, line_number=None), - make_inst(opname='RERAISE', arg=1, argval=1, argrepr='', offset=376, start_offset=376, starts_line=False, line_number=None), - make_inst(opname='PUSH_EXC_INFO', arg=None, argval=None, argrepr='', offset=378, start_offset=378, starts_line=False, line_number=None), - make_inst(opname='LOAD_GLOBAL', arg=4, argval='ZeroDivisionError', argrepr='ZeroDivisionError', offset=380, start_offset=380, starts_line=True, line_number=22, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='CHECK_EXC_MATCH', arg=None, argval=None, argrepr='', offset=390, start_offset=390, starts_line=False, line_number=22), - make_inst(opname='POP_JUMP_IF_FALSE', arg=15, argval=426, argrepr='to L12', offset=392, start_offset=392, starts_line=False, line_number=22, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=396, start_offset=396, starts_line=False, line_number=22), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=398, start_offset=398, starts_line=False, line_number=22), - make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=400, start_offset=400, starts_line=True, line_number=23, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_CONST', arg=5, argval='Here we go, here we go, here we go...', argrepr="'Here we go, here we go, here we go...'", offset=410, start_offset=410, starts_line=False, line_number=23), - make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=412, start_offset=412, starts_line=False, line_number=23, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=420, start_offset=420, starts_line=False, line_number=23), - make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=422, start_offset=422, starts_line=False, line_number=23), - make_inst(opname='JUMP_BACKWARD_NO_INTERRUPT', arg=56, argval=314, argrepr='to L10', offset=424, start_offset=424, starts_line=False, line_number=23), - make_inst(opname='RERAISE', arg=0, argval=0, argrepr='', offset=426, start_offset=426, starts_line=True, line_number=22, label=12), - make_inst(opname='COPY', arg=3, argval=3, argrepr='', offset=428, start_offset=428, starts_line=True, line_number=None), - make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=430, start_offset=430, starts_line=False, line_number=None), - make_inst(opname='RERAISE', arg=1, argval=1, argrepr='', offset=432, start_offset=432, starts_line=False, line_number=None), - make_inst(opname='PUSH_EXC_INFO', arg=None, argval=None, argrepr='', offset=434, start_offset=434, starts_line=False, line_number=None), - make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=436, start_offset=436, starts_line=True, line_number=28, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_CONST', arg=6, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=446, start_offset=446, starts_line=False, line_number=28), - make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=448, start_offset=448, starts_line=False, line_number=28, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=456, start_offset=456, starts_line=False, line_number=28), - make_inst(opname='RERAISE', arg=0, argval=0, argrepr='', offset=458, start_offset=458, starts_line=False, line_number=28), - make_inst(opname='COPY', arg=3, argval=3, argrepr='', offset=460, start_offset=460, starts_line=True, line_number=None), - make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=462, start_offset=462, starts_line=False, line_number=None), - make_inst(opname='RERAISE', arg=1, argval=1, argrepr='', offset=464, start_offset=464, starts_line=False, line_number=None), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=370, start_offset=370, starts_line=False, line_number=25), + make_inst(opname='JUMP_BACKWARD_NO_INTERRUPT', arg=29, argval=316, argrepr='to L10', offset=372, start_offset=372, starts_line=False, line_number=25), + make_inst(opname='COPY', arg=3, argval=3, argrepr='', offset=374, start_offset=374, starts_line=True, line_number=None), + make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=376, start_offset=376, starts_line=False, line_number=None), + make_inst(opname='RERAISE', arg=1, argval=1, argrepr='', offset=378, start_offset=378, starts_line=False, line_number=None), + make_inst(opname='PUSH_EXC_INFO', arg=None, argval=None, argrepr='', offset=380, start_offset=380, starts_line=False, line_number=None), + make_inst(opname='LOAD_GLOBAL', arg=4, argval='ZeroDivisionError', argrepr='ZeroDivisionError', offset=382, start_offset=382, starts_line=True, line_number=22, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + make_inst(opname='CHECK_EXC_MATCH', arg=None, argval=None, argrepr='', offset=392, start_offset=392, starts_line=False, line_number=22), + make_inst(opname='POP_JUMP_IF_FALSE', arg=15, argval=428, argrepr='to L12', offset=394, start_offset=394, starts_line=False, line_number=22, cache_info=[('counter', 1, b'\x00\x00')]), + make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=398, start_offset=398, starts_line=False, line_number=22), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=400, start_offset=400, starts_line=False, line_number=22), + make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=402, start_offset=402, starts_line=True, line_number=23, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + make_inst(opname='LOAD_CONST', arg=5, argval='Here we go, here we go, here we go...', argrepr="'Here we go, here we go, here we go...'", offset=412, start_offset=412, starts_line=False, line_number=23), + make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=414, start_offset=414, starts_line=False, line_number=23, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=422, start_offset=422, starts_line=False, line_number=23), + make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=424, start_offset=424, starts_line=False, line_number=23), + make_inst(opname='JUMP_BACKWARD_NO_INTERRUPT', arg=56, argval=316, argrepr='to L10', offset=426, start_offset=426, starts_line=False, line_number=23), + make_inst(opname='RERAISE', arg=0, argval=0, argrepr='', offset=428, start_offset=428, starts_line=True, line_number=22, label=12), + make_inst(opname='COPY', arg=3, argval=3, argrepr='', offset=430, start_offset=430, starts_line=True, line_number=None), + make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=432, start_offset=432, starts_line=False, line_number=None), + make_inst(opname='RERAISE', arg=1, argval=1, argrepr='', offset=434, start_offset=434, starts_line=False, line_number=None), + make_inst(opname='PUSH_EXC_INFO', arg=None, argval=None, argrepr='', offset=436, start_offset=436, starts_line=False, line_number=None), + make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=438, start_offset=438, starts_line=True, line_number=28, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + make_inst(opname='LOAD_CONST', arg=6, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=448, start_offset=448, starts_line=False, line_number=28), + make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=450, start_offset=450, starts_line=False, line_number=28, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=458, start_offset=458, starts_line=False, line_number=28), + make_inst(opname='RERAISE', arg=0, argval=0, argrepr='', offset=460, start_offset=460, starts_line=False, line_number=28), + make_inst(opname='COPY', arg=3, argval=3, argrepr='', offset=462, start_offset=462, starts_line=True, line_number=None), + make_inst(opname='POP_EXCEPT', arg=None, argval=None, argrepr='', offset=464, start_offset=464, starts_line=False, line_number=None), + make_inst(opname='RERAISE', arg=1, argval=1, argrepr='', offset=466, start_offset=466, starts_line=False, line_number=None), ] # One last piece of inspect fodder to check the default line number handling diff --git a/Lib/test/test_list.py b/Lib/test/test_list.py index 6894fba2ad1b00..223f34fb696a6e 100644 --- a/Lib/test/test_list.py +++ b/Lib/test/test_list.py @@ -365,5 +365,20 @@ def lappend(l, x, y): rc, _, _ = assert_python_ok("-c", code) self.assertEqual(rc, 0) + def test_list_overwrite_local(self): + """Test that overwriting the last reference to the + iterable doesn't prematurely free the iterable""" + + def foo(x): + self.assertEqual(sys.getrefcount(x), 1) + r = 0 + for i in x: + r += i + x = None + return r + + self.assertEqual(foo(list(range(10))), 45) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-30-14-13-01.gh-issue-132554.GqQaUp.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-30-14-13-01.gh-issue-132554.GqQaUp.rst new file mode 100644 index 00000000000000..bfe2d633309191 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-30-14-13-01.gh-issue-132554.GqQaUp.rst @@ -0,0 +1,4 @@ +Change iteration to use "virtual iterators" for sequences. Instead of +creating an iterator, a tagged integer representing the next index is pushed +to the stack above the iterable. For non-sequence iterators, ``NULL`` is +pushed. diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 76b52efccf804f..601fc69c4b1f60 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1386,6 +1386,10 @@ mark_stacks(PyCodeObject *code_obj, int len) stacks[j] = next_stack; break; case GET_ITER: + next_stack = push_value(pop_value(next_stack), Iterator); + next_stack = push_value(next_stack, Iterator); + stacks[next_i] = next_stack; + break; case GET_AITER: next_stack = push_value(pop_value(next_stack), Iterator); stacks[next_i] = next_stack; diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index b7d23f57018525..dbeedb7ffe0ce6 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,6 +1,6 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { - 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, + 227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0, 0,0,0,0,0,243,184,0,0,0,128,0,94,0,82,1, 73,0,116,0,94,0,82,1,73,1,116,1,93,2,33,0, 82,2,52,1,0,0,0,0,0,0,31,0,93,2,33,0, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index be22c7446f5402..a1f8d3605283e3 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -341,7 +341,7 @@ dummy_func( } pure inst(POP_TOP, (value --)) { - PyStackRef_CLOSE(value); + PyStackRef_XCLOSE(value); } tier2 op(_POP_TWO, (nos, tos --)) { @@ -362,9 +362,14 @@ dummy_func( PyStackRef_CLOSE(value); } - macro(POP_ITER) = POP_TOP; - no_save_ip tier1 inst(INSTRUMENTED_END_FOR, (receiver, value -- receiver)) { + inst(POP_ITER, (iter, index_or_null -- )) { + (void)index_or_null; + DEAD(index_or_null); + PyStackRef_CLOSE(iter); + } + + no_save_ip tier1 inst(INSTRUMENTED_END_FOR, (receiver, index_or_null, value -- receiver, index_or_null)) { /* Need to create a fake StopIteration error here, * to conform to PEP 380 */ if (PyStackRef_GenCheck(receiver)) { @@ -376,7 +381,9 @@ dummy_func( PyStackRef_CLOSE(value); } - tier1 inst(INSTRUMENTED_POP_ITER, (iter -- )) { + tier1 inst(INSTRUMENTED_POP_ITER, (iter, index_or_null -- )) { + (void)index_or_null; + DEAD(index_or_null); INSTRUMENTED_JUMP(prev_instr, this_instr+1, PY_MONITORING_EVENT_BRANCH_RIGHT); PyStackRef_CLOSE(iter); } @@ -3041,15 +3048,24 @@ dummy_func( values_or_none = PyStackRef_FromPyObjectSteal(values_or_none_o); } - inst(GET_ITER, (iterable -- iter)) { + inst(GET_ITER, (iterable -- iter, index_or_null)) { #ifdef Py_STATS _Py_GatherStats_GetIter(iterable); #endif /* before: [obj]; after [getiter(obj)] */ - PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); - PyStackRef_CLOSE(iterable); - ERROR_IF(iter_o == NULL); - iter = PyStackRef_FromPyObjectSteal(iter_o); + PyTypeObject *tp = PyStackRef_TYPE(iterable); + if (tp == &PyTuple_Type || tp == &PyList_Type) { + iter = iterable; + DEAD(iterable); + index_or_null = PyStackRef_TagInt(0); + } + else { + PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); + PyStackRef_CLOSE(iterable); + ERROR_IF(iter_o == NULL); + iter = PyStackRef_FromPyObjectSteal(iter_o); + index_or_null = PyStackRef_NULL; + } } inst(GET_YIELD_FROM_ITER, (iterable -- iter)) { @@ -3096,11 +3112,11 @@ dummy_func( FOR_ITER_GEN, }; - specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter -- iter)) { + specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter, null_or_index -- iter, null_or_index)) { #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; - _Py_Specialize_ForIter(iter, next_instr, oparg); + _Py_Specialize_ForIter(iter, null_or_index, next_instr, oparg); DISPATCH_SAME_OPARG(); } OPCODE_DEFERRED_INC(FOR_ITER); @@ -3108,33 +3124,44 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION_FT */ } - replaced op(_FOR_ITER, (iter -- iter, next)) { + replaced op(_FOR_ITER, (iter, null_or_index -- iter, null_or_index, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - if (!matches) { - ERROR_NO_POP(); + if (PyStackRef_IsTaggedInt(null_or_index)) { + next = _PyForIter_NextWithIndex(iter_o, null_or_index); + if (PyStackRef_IsNull(next)) { + null_or_index = PyStackRef_TagInt(-1); + JUMPBY(oparg + 1); + DISPATCH(); + } + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); + } + else { + PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + if (next_o == NULL) { + if (_PyErr_Occurred(tstate)) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (!matches) { + ERROR_NO_POP(); + } + _PyEval_MonitorRaise(tstate, frame, this_instr); + _PyErr_Clear(tstate); } - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); + /* iterator ended normally */ + assert(next_instr[oparg].op.code == END_FOR || + next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + /* Jump forward oparg, then skip following END_FOR */ + JUMPBY(oparg + 1); + DISPATCH(); } - /* iterator ended normally */ - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - /* Jump forward oparg, then skip following END_FOR */ - JUMPBY(oparg + 1); - DISPATCH(); + next = PyStackRef_FromPyObjectSteal(next_o); } - next = PyStackRef_FromPyObjectSteal(next_o); - // Common case: no jump, leave it to the code generator } - op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) { + op(_FOR_ITER_TIER_TWO, (iter, null_or_index -- iter, null_or_index, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + EXIT_IF(!PyStackRef_IsNull(null_or_index)); PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); if (next_o == NULL) { if (_PyErr_Occurred(tstate)) { @@ -3156,63 +3183,63 @@ dummy_func( macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER; - inst(INSTRUMENTED_FOR_ITER, (unused/1, iter -- iter, next)) { + inst(INSTRUMENTED_FOR_ITER, (unused/1, iter, null_or_index -- iter, null_or_index, next)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - if (next_o != NULL) { - next = PyStackRef_FromPyObjectSteal(next_o); + if (PyStackRef_IsTaggedInt(null_or_index)) { + next = _PyForIter_NextWithIndex(iter_o, null_or_index); + if (PyStackRef_IsNull(next)) { + JUMPBY(oparg + 1); + DISPATCH(); + } INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); } else { - if (_PyErr_Occurred(tstate)) { - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - if (!matches) { - ERROR_NO_POP(); + PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + if (next_o != NULL) { + next = PyStackRef_FromPyObjectSteal(next_o); + INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); + } + else { + if (_PyErr_Occurred(tstate)) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (!matches) { + ERROR_NO_POP(); + } + _PyEval_MonitorRaise(tstate, frame, this_instr); + _PyErr_Clear(tstate); } - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); + /* iterator ended normally */ + assert(next_instr[oparg].op.code == END_FOR || + next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + /* Skip END_FOR */ + JUMPBY(oparg + 1); + DISPATCH(); } - /* iterator ended normally */ - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - /* Skip END_FOR */ - JUMPBY(oparg + 1); - DISPATCH(); } } - op(_ITER_CHECK_LIST, (iter -- iter)) { + op(_ITER_CHECK_LIST, (iter, null_or_index -- iter, null_or_index)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - EXIT_IF(Py_TYPE(iter_o) != &PyListIter_Type); + EXIT_IF(Py_TYPE(iter_o) != &PyList_Type); + assert(PyStackRef_IsTaggedInt(null_or_index)); #ifdef Py_GIL_DISABLED - EXIT_IF(!_PyObject_IsUniquelyReferenced(iter_o)); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - EXIT_IF(!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || - !_PyObject_GC_IS_SHARED(it->it_seq)); + EXIT_IF(!_Py_IsOwnedByCurrentThread(iter_o) && !_PyObject_GC_IS_SHARED(iter_o)); #endif } - replaced op(_ITER_JUMP_LIST, (iter -- iter)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - assert(Py_TYPE(iter_o) == &PyListIter_Type); -// For free-threaded Python, the loop exit can happen at any point during -// item retrieval, so it doesn't make much sense to check and jump -// separately before item retrieval. Any length check we do here can be -// invalid by the time we actually try to fetch the item. + replaced op(_ITER_JUMP_LIST, (iter, null_or_index -- iter, null_or_index)) { #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - (void)iter_o; + // For free-threaded Python, the loop exit can happen at any point during + // item retrieval, so it doesn't make much sense to check and jump + // separately before item retrieval. Any length check we do here can be + // invalid by the time we actually try to fetch the item. #else - _PyListIterObject *it = (_PyListIterObject *)iter_o; + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(list_o) == &PyList_Type); STAT_INC(FOR_ITER, hit); - PyListObject *seq = it->it_seq; - if (seq == NULL || (size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { - it->it_index = -1; - if (seq != NULL) { - it->it_seq = NULL; - Py_DECREF(seq); - } + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { + null_or_index = PyStackRef_TagInt(-1); /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); @@ -3221,73 +3248,54 @@ dummy_func( } // Only used by Tier 2 - op(_GUARD_NOT_EXHAUSTED_LIST, (iter -- iter)) { + op(_GUARD_NOT_EXHAUSTED_LIST, (iter, null_or_index -- iter, null_or_index)) { #ifndef Py_GIL_DISABLED - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - EXIT_IF(seq == NULL); - if ((size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { - it->it_index = -1; - EXIT_IF(1); - } + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(list_o) == &PyList_Type); + EXIT_IF((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)); #endif } - replaced op(_ITER_NEXT_LIST, (iter -- iter, next)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - assert(seq); + replaced op(_ITER_NEXT_LIST, (iter, null_or_index -- iter, null_or_index, next)) { + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(PyList_CheckExact(list_o)); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + assert(_Py_IsOwnedByCurrentThread(list_o) || + _PyObject_GC_IS_SHARED(list_o)); STAT_INC(FOR_ITER, hit); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); + int result = _PyList_GetItemRefNoLock((PyListObject *)list_o, PyStackRef_UntagInt(null_or_index), &next); // A negative result means we lost a race with another thread // and we need to take the slow path. DEOPT_IF(result < 0); if (result == 0) { - it->it_index = -1; + null_or_index = PyStackRef_TagInt(-1); /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); } - it->it_index++; #else - assert(it->it_index < PyList_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(list_o, PyStackRef_UntagInt(null_or_index))); #endif + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } // Only used by Tier 2 - op(_ITER_NEXT_LIST_TIER_TWO, (iter -- iter, next)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - assert(seq); + op(_ITER_NEXT_LIST_TIER_TWO, (iter, null_or_index -- iter, null_or_index, next)) { + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(PyList_CheckExact(list_o)); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + assert(_Py_IsOwnedByCurrentThread((PyObject *)list_o) || + _PyObject_GC_IS_SHARED(list_o)); STAT_INC(FOR_ITER, hit); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); + int result = _PyList_GetItemRefNoLock((PyListObject *)list_o, PyStackRef_UntagInt(null_or_index), &next); // A negative result means we lost a race with another thread // and we need to take the slow path. - EXIT_IF(result < 0); - if (result == 0) { - it->it_index = -1; - EXIT_IF(1); - } - it->it_index++; + DEOPT_IF(result <= 0); #else - assert(it->it_index < PyList_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + assert(PyStackRef_UntagInt(null_or_index) < PyList_GET_SIZE(list_o)); + next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(list_o, PyStackRef_UntagInt(null_or_index))); #endif + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } macro(FOR_ITER_LIST) = @@ -3296,31 +3304,19 @@ dummy_func( _ITER_JUMP_LIST + _ITER_NEXT_LIST; - op(_ITER_CHECK_TUPLE, (iter -- iter)) { + op(_ITER_CHECK_TUPLE, (iter, null_or_index -- iter, null_or_index)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - EXIT_IF(Py_TYPE(iter_o) != &PyTupleIter_Type); -#ifdef Py_GIL_DISABLED - EXIT_IF(!_PyObject_IsUniquelyReferenced(iter_o)); -#endif + EXIT_IF(Py_TYPE(iter_o) != &PyTuple_Type); + assert(PyStackRef_IsTaggedInt(null_or_index)); } - replaced op(_ITER_JUMP_TUPLE, (iter -- iter)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - (void)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); -#ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); -#endif - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; + replaced op(_ITER_JUMP_TUPLE, (iter, null_or_index -- iter, null_or_index)) { + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + (void)tuple_o; + assert(Py_TYPE(tuple_o) == &PyTuple_Type); STAT_INC(FOR_ITER, hit); - PyTupleObject *seq = it->it_seq; - if (seq == NULL || (size_t)it->it_index >= (size_t)PyTuple_GET_SIZE(seq)) { -#ifndef Py_GIL_DISABLED - if (seq != NULL) { - it->it_seq = NULL; - Py_DECREF(seq); - } -#endif + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyTuple_GET_SIZE(tuple_o)) { + null_or_index = PyStackRef_TagInt(-1); /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); @@ -3328,29 +3324,19 @@ dummy_func( } // Only used by Tier 2 - op(_GUARD_NOT_EXHAUSTED_TUPLE, (iter -- iter)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); -#ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); -#endif - PyTupleObject *seq = it->it_seq; - EXIT_IF(seq == NULL); - EXIT_IF(it->it_index >= PyTuple_GET_SIZE(seq)); + op(_GUARD_NOT_EXHAUSTED_TUPLE, (iter, null_or_index -- iter, null_or_index)) { + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + EXIT_IF((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyTuple_GET_SIZE(tuple_o)); } - op(_ITER_NEXT_TUPLE, (iter -- iter, next)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - PyTupleObject *seq = it->it_seq; -#ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); -#endif - assert(seq); - assert(it->it_index < PyTuple_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); + op(_ITER_NEXT_TUPLE, (iter, null_or_index -- iter, null_or_index, next)) { + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + uintptr_t i = PyStackRef_UntagInt(null_or_index); + assert((size_t)i < (size_t)PyTuple_GET_SIZE(tuple_o)); + next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(tuple_o, i)); + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } macro(FOR_ITER_TUPLE) = @@ -3359,7 +3345,7 @@ dummy_func( _ITER_JUMP_TUPLE + _ITER_NEXT_TUPLE; - op(_ITER_CHECK_RANGE, (iter -- iter)) { + op(_ITER_CHECK_RANGE, (iter, null_or_index -- iter, null_or_index)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); EXIT_IF(Py_TYPE(r) != &PyRangeIter_Type); #ifdef Py_GIL_DISABLED @@ -3367,7 +3353,7 @@ dummy_func( #endif } - replaced op(_ITER_JUMP_RANGE, (iter -- iter)) { + replaced op(_ITER_JUMP_RANGE, (iter, null_or_index -- iter, null_or_index)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); #ifdef Py_GIL_DISABLED @@ -3382,13 +3368,13 @@ dummy_func( } // Only used by Tier 2 - op(_GUARD_NOT_EXHAUSTED_RANGE, (iter -- iter)) { + op(_GUARD_NOT_EXHAUSTED_RANGE, (iter, null_or_index -- iter, null_or_index)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); EXIT_IF(r->len <= 0); } - op(_ITER_NEXT_RANGE, (iter -- iter, next)) { + op(_ITER_NEXT_RANGE, (iter, null_or_index -- iter, null_or_index, next)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); #ifdef Py_GIL_DISABLED @@ -3409,7 +3395,7 @@ dummy_func( _ITER_JUMP_RANGE + _ITER_NEXT_RANGE; - op(_FOR_ITER_GEN_FRAME, (iter -- iter, gen_frame: _PyInterpreterFrame*)) { + op(_FOR_ITER_GEN_FRAME, (iter, null -- iter, null, gen_frame: _PyInterpreterFrame*)) { PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); DEOPT_IF(Py_TYPE(gen) != &PyGen_Type); #ifdef Py_GIL_DISABLED diff --git a/Python/ceval.c b/Python/ceval.c index d1b6b9f1a1a35f..309c4567f20e0e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3425,6 +3425,26 @@ _PyEval_LoadName(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject *na return value; } +_PyStackRef +_PyForIter_NextWithIndex(PyObject *seq, _PyStackRef index) +{ + assert(PyStackRef_IsTaggedInt(index)); + assert(PyTuple_CheckExact(seq) || PyList_CheckExact(seq)); + intptr_t i = PyStackRef_UntagInt(index); + if (PyTuple_CheckExact(seq)) { + size_t size = PyTuple_GET_SIZE(seq); + if ((size_t)i >= size) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, i)); + } + PyObject *item = _PyList_GetItemRef((PyListObject *)seq, i); + if (item == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectSteal(item); +} + /* Check if a 'cls' provides the given special method. */ static inline int type_has_special_method(PyTypeObject *cls, PyObject *name) diff --git a/Python/codegen.c b/Python/codegen.c index 683601103ec99d..0023d72cd5e91d 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -525,6 +525,15 @@ codegen_unwind_fblock(compiler *c, location *ploc, return SUCCESS; case COMPILE_FBLOCK_FOR_LOOP: + /* Pop the iterator */ + if (preserve_tos) { + ADDOP_I(c, *ploc, SWAP, 3); + } + ADDOP(c, *ploc, POP_TOP); + ADDOP(c, *ploc, POP_TOP); + return SUCCESS; + + case COMPILE_FBLOCK_ASYNC_FOR_LOOP: /* Pop the iterator */ if (preserve_tos) { ADDOP_I(c, *ploc, SWAP, 2); @@ -629,7 +638,8 @@ codegen_unwind_fblock_stack(compiler *c, location *ploc, c, *ploc, "'break', 'continue' and 'return' cannot appear in an except* block"); } if (loop != NULL && (top->fb_type == COMPILE_FBLOCK_WHILE_LOOP || - top->fb_type == COMPILE_FBLOCK_FOR_LOOP)) { + top->fb_type == COMPILE_FBLOCK_FOR_LOOP || + top->fb_type == COMPILE_FBLOCK_ASYNC_FOR_LOOP)) { *loop = top; return SUCCESS; } @@ -2125,7 +2135,7 @@ codegen_async_for(compiler *c, stmt_ty s) ADDOP(c, LOC(s->v.AsyncFor.iter), GET_AITER); USE_LABEL(c, start); - RETURN_IF_ERROR(_PyCompile_PushFBlock(c, loc, COMPILE_FBLOCK_FOR_LOOP, start, end, NULL)); + RETURN_IF_ERROR(_PyCompile_PushFBlock(c, loc, COMPILE_FBLOCK_ASYNC_FOR_LOOP, start, end, NULL)); /* SETUP_FINALLY to guard the __anext__ call */ ADDOP_JUMP(c, loc, SETUP_FINALLY, except); @@ -2142,7 +2152,7 @@ codegen_async_for(compiler *c, stmt_ty s) /* Mark jump as artificial */ ADDOP_JUMP(c, NO_LOCATION, JUMP, start); - _PyCompile_PopFBlock(c, COMPILE_FBLOCK_FOR_LOOP, start); + _PyCompile_PopFBlock(c, COMPILE_FBLOCK_ASYNC_FOR_LOOP, start); /* Except block for __anext__ */ USE_LABEL(c, except); @@ -3895,10 +3905,11 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end) NEW_JUMP_TARGET_LABEL(c, loop); NEW_JUMP_TARGET_LABEL(c, cleanup); + ADDOP(c, loc, PUSH_NULL); // Push NULL index for loop USE_LABEL(c, loop); ADDOP_JUMP(c, loc, FOR_ITER, cleanup); if (const_oparg == CONSTANT_BUILTIN_TUPLE) { - ADDOP_I(c, loc, LIST_APPEND, 2); + ADDOP_I(c, loc, LIST_APPEND, 3); ADDOP_JUMP(c, loc, JUMP, loop); } else { @@ -4442,13 +4453,12 @@ codegen_sync_comprehension_generator(compiler *c, location loc, } if (IS_JUMP_TARGET_LABEL(start)) { VISIT(c, expr, gen->iter); - ADDOP(c, LOC(gen->iter), GET_ITER); } } } if (IS_JUMP_TARGET_LABEL(start)) { - depth++; + depth += 2; ADDOP(c, LOC(gen->iter), GET_ITER); USE_LABEL(c, start); ADDOP_JUMP(c, LOC(gen->iter), FOR_ITER, anchor); @@ -4543,9 +4553,9 @@ codegen_async_comprehension_generator(compiler *c, location loc, else { /* Sub-iter - calculate on the fly */ VISIT(c, expr, gen->iter); - ADDOP(c, LOC(gen->iter), GET_AITER); } } + ADDOP(c, LOC(gen->iter), GET_AITER); USE_LABEL(c, start); /* Runtime will push a block here, so we need to account for that */ @@ -4757,19 +4767,6 @@ pop_inlined_comprehension_state(compiler *c, location loc, return SUCCESS; } -static inline int -codegen_comprehension_iter(compiler *c, comprehension_ty comp) -{ - VISIT(c, expr, comp->iter); - if (comp->is_async) { - ADDOP(c, LOC(comp->iter), GET_AITER); - } - else { - ADDOP(c, LOC(comp->iter), GET_ITER); - } - return SUCCESS; -} - static int codegen_comprehension(compiler *c, expr_ty e, int type, identifier name, asdl_comprehension_seq *generators, expr_ty elt, @@ -4789,9 +4786,7 @@ codegen_comprehension(compiler *c, expr_ty e, int type, outermost = (comprehension_ty) asdl_seq_GET(generators, 0); if (is_inlined) { - if (codegen_comprehension_iter(c, outermost)) { - goto error; - } + VISIT(c, expr, outermost->iter); if (push_inlined_comprehension_state(c, loc, entry, &inline_state)) { goto error; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 40090e692e4a72..35b29940cb4a15 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -534,7 +534,7 @@ stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(value); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); break; } @@ -577,6 +577,20 @@ break; } + case _POP_ITER: { + _PyStackRef index_or_null; + _PyStackRef iter; + index_or_null = stack_pointer[-1]; + iter = stack_pointer[-2]; + (void)index_or_null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(iter); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + case _END_SEND: { _PyStackRef value; _PyStackRef receiver; @@ -4172,25 +4186,37 @@ case _GET_ITER: { _PyStackRef iterable; _PyStackRef iter; + _PyStackRef index_or_null; iterable = stack_pointer[-1]; #ifdef Py_STATS _PyFrame_SetStackPointer(frame, stack_pointer); _Py_GatherStats_GetIter(iterable); stack_pointer = _PyFrame_GetStackPointer(frame); #endif - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(iterable); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (iter_o == NULL) { - JUMP_TO_ERROR(); + + PyTypeObject *tp = PyStackRef_TYPE(iterable); + if (tp == &PyTuple_Type || tp == &PyList_Type) { + iter = iterable; + index_or_null = PyStackRef_TagInt(0); } - iter = PyStackRef_FromPyObjectSteal(iter_o); - stack_pointer[0] = iter; + else { + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(iterable); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (iter_o == NULL) { + JUMP_TO_ERROR(); + } + iter = PyStackRef_FromPyObjectSteal(iter_o); + index_or_null = PyStackRef_NULL; + stack_pointer += 1; + } + stack_pointer[-1] = iter; + stack_pointer[0] = index_or_null; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -4237,10 +4263,16 @@ /* _FOR_ITER is not a viable micro-op for tier 2 because it is replaced */ case _FOR_ITER_TIER_TWO: { + _PyStackRef null_or_index; _PyStackRef iter; _PyStackRef next; - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + if (!PyStackRef_IsNull(null_or_index)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); stack_pointer = _PyFrame_GetStackPointer(frame); @@ -4272,21 +4304,18 @@ /* _INSTRUMENTED_FOR_ITER is not a viable micro-op for tier 2 because it is instrumented */ case _ITER_CHECK_LIST: { + _PyStackRef null_or_index; _PyStackRef iter; - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (Py_TYPE(iter_o) != &PyListIter_Type) { + if (Py_TYPE(iter_o) != &PyList_Type) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + assert(PyStackRef_IsTaggedInt(null_or_index)); #ifdef Py_GIL_DISABLED - if (!_PyObject_IsUniquelyReferenced(iter_o)) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - _PyListIterObject *it = (_PyListIterObject *)iter_o; - if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || - !_PyObject_GC_IS_SHARED(it->it_seq)) { + if (!_Py_IsOwnedByCurrentThread(iter_o) && !_PyObject_GC_IS_SHARED(iter_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -4297,24 +4326,17 @@ /* _ITER_JUMP_LIST is not a viable micro-op for tier 2 because it is replaced */ case _GUARD_NOT_EXHAUSTED_LIST: { + _PyStackRef null_or_index; _PyStackRef iter; - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; #ifndef Py_GIL_DISABLED - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - if (seq == NULL) { + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(list_o) == &PyList_Type); + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if ((size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { - it->it_index = -1; - if (1) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - } #endif break; } @@ -4322,38 +4344,30 @@ /* _ITER_NEXT_LIST is not a viable micro-op for tier 2 because it is replaced */ case _ITER_NEXT_LIST_TIER_TWO: { + _PyStackRef null_or_index; _PyStackRef iter; _PyStackRef next; - iter = stack_pointer[-1]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - assert(seq); + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(PyList_CheckExact(list_o)); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + assert(_Py_IsOwnedByCurrentThread((PyObject *)list_o) || + _PyObject_GC_IS_SHARED(list_o)); STAT_INC(FOR_ITER, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); + int result = _PyList_GetItemRefNoLock((PyListObject *)list_o, PyStackRef_UntagInt(null_or_index), &next); stack_pointer = _PyFrame_GetStackPointer(frame); - if (result < 0) { + if (result <= 0) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (result == 0) { - it->it_index = -1; - if (1) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - } - it->it_index++; #else - assert(it->it_index < PyList_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + assert(PyStackRef_UntagInt(null_or_index) < PyList_GET_SIZE(list_o)); + next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(list_o, PyStackRef_UntagInt(null_or_index))); #endif + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -4361,39 +4375,29 @@ } case _ITER_CHECK_TUPLE: { + _PyStackRef null_or_index; _PyStackRef iter; - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (Py_TYPE(iter_o) != &PyTupleIter_Type) { + if (Py_TYPE(iter_o) != &PyTuple_Type) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - #ifdef Py_GIL_DISABLED - if (!_PyObject_IsUniquelyReferenced(iter_o)) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - #endif + assert(PyStackRef_IsTaggedInt(null_or_index)); break; } /* _ITER_JUMP_TUPLE is not a viable micro-op for tier 2 because it is replaced */ case _GUARD_NOT_EXHAUSTED_TUPLE: { + _PyStackRef null_or_index; _PyStackRef iter; - iter = stack_pointer[-1]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - #endif - PyTupleObject *seq = it->it_seq; - if (seq == NULL) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - if (it->it_index >= PyTuple_GET_SIZE(seq)) { + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyTuple_GET_SIZE(tuple_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -4401,19 +4405,18 @@ } case _ITER_NEXT_TUPLE: { + _PyStackRef null_or_index; _PyStackRef iter; _PyStackRef next; - iter = stack_pointer[-1]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - PyTupleObject *seq = it->it_seq; - #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - #endif - assert(seq); - assert(it->it_index < PyTuple_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + uintptr_t i = PyStackRef_UntagInt(null_or_index); + assert((size_t)i < (size_t)PyTuple_GET_SIZE(tuple_o)); + next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(tuple_o, i)); + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -4422,7 +4425,7 @@ case _ITER_CHECK_RANGE: { _PyStackRef iter; - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); if (Py_TYPE(r) != &PyRangeIter_Type) { UOP_STAT_INC(uopcode, miss); @@ -4441,7 +4444,7 @@ case _GUARD_NOT_EXHAUSTED_RANGE: { _PyStackRef iter; - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); if (r->len <= 0) { @@ -4454,7 +4457,7 @@ case _ITER_NEXT_RANGE: { _PyStackRef iter; _PyStackRef next; - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); #ifdef Py_GIL_DISABLED @@ -4479,7 +4482,7 @@ _PyStackRef iter; _PyInterpreterFrame *gen_frame; oparg = CURRENT_OPARG(); - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); if (Py_TYPE(gen) != &PyGen_Type) { UOP_STAT_INC(uopcode, miss); diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 78ef02a911a72b..67ccf350b72ed6 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -299,26 +299,34 @@ basicblock_returns(const basicblock *b) { } static void -dump_basicblock(const basicblock *b) +dump_basicblock(const basicblock *b, bool highlight) { const char *b_return = basicblock_returns(b) ? "return " : ""; + if (highlight) { + fprintf(stderr, ">>> "); + } fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n", b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused, b->b_startdepth, b->b_predecessors, b_return); + int depth = b->b_startdepth; if (b->b_instr) { int i; for (i = 0; i < b->b_iused; i++) { - fprintf(stderr, " [%02d] ", i); + fprintf(stderr, " [%02d] depth: %d ", i, depth); dump_instr(b->b_instr + i); + + int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg); + int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg); + depth += (pushed - popped); } } } void -_PyCfgBuilder_DumpGraph(const basicblock *entryblock) +_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark) { for (const basicblock *b = entryblock; b != NULL; b = b->b_next) { - dump_basicblock(b); + dump_basicblock(b, b == mark); } } @@ -2863,6 +2871,7 @@ optimize_load_fast(cfg_builder *g) // Opcodes that consume no inputs case GET_ANEXT: + case GET_ITER: case GET_LEN: case IMPORT_FROM: case MATCH_KEYS: diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1c98e1ce4fcf5c..5be2671700aee7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5731,17 +5731,19 @@ _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef iter; + _PyStackRef null_or_index; _PyStackRef next; // _SPECIALIZE_FOR_ITER { - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); - _Py_Specialize_ForIter(iter, next_instr, oparg); + _Py_Specialize_ForIter(iter, null_or_index, next_instr, oparg); stack_pointer = _PyFrame_GetStackPointer(frame); DISPATCH_SAME_OPARG(); } @@ -5752,29 +5754,44 @@ // _FOR_ITER { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (!matches) { - JUMP_TO_LABEL(error); + if (PyStackRef_IsTaggedInt(null_or_index)) { + _PyFrame_SetStackPointer(frame, stack_pointer); + next = _PyForIter_NextWithIndex(iter_o, null_or_index); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (PyStackRef_IsNull(next)) { + null_or_index = PyStackRef_TagInt(-1); + JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; + DISPATCH(); + } + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); + } + else { + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (next_o == NULL) { + if (_PyErr_Occurred(tstate)) { + _PyFrame_SetStackPointer(frame, stack_pointer); + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (!matches) { + JUMP_TO_LABEL(error); + } + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyEval_MonitorRaise(tstate, frame, this_instr); + _PyErr_Clear(tstate); + stack_pointer = _PyFrame_GetStackPointer(frame); } - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); - stack_pointer = _PyFrame_GetStackPointer(frame); + assert(next_instr[oparg].op.code == END_FOR || + next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + JUMPBY(oparg + 1); + DISPATCH(); } - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - JUMPBY(oparg + 1); - DISPATCH(); + next = PyStackRef_FromPyObjectSteal(next_o); } - next = PyStackRef_FromPyObjectSteal(next_o); } + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -5806,7 +5823,7 @@ } // _FOR_ITER_GEN_FRAME { - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); if (Py_TYPE(gen) != &PyGen_Type) { UPDATE_MISS_STATS(FOR_ITER); @@ -5863,26 +5880,22 @@ INSTRUCTION_STATS(FOR_ITER_LIST); static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); _PyStackRef iter; + _PyStackRef null_or_index; _PyStackRef next; /* Skip 1 cache entry */ // _ITER_CHECK_LIST { - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (Py_TYPE(iter_o) != &PyListIter_Type) { + if (Py_TYPE(iter_o) != &PyList_Type) { UPDATE_MISS_STATS(FOR_ITER); assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); JUMP_TO_PREDICTED(FOR_ITER); } + assert(PyStackRef_IsTaggedInt(null_or_index)); #ifdef Py_GIL_DISABLED - if (!_PyObject_IsUniquelyReferenced(iter_o)) { - UPDATE_MISS_STATS(FOR_ITER); - assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); - JUMP_TO_PREDICTED(FOR_ITER); - } - _PyListIterObject *it = (_PyListIterObject *)iter_o; - if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || - !_PyObject_GC_IS_SHARED(it->it_seq)) { + if (!_Py_IsOwnedByCurrentThread(iter_o) && !_PyObject_GC_IS_SHARED(iter_o)) { UPDATE_MISS_STATS(FOR_ITER); assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); JUMP_TO_PREDICTED(FOR_ITER); @@ -5891,42 +5904,30 @@ } // _ITER_JUMP_LIST { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - assert(Py_TYPE(iter_o) == &PyListIter_Type); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - (void)iter_o; + #else - _PyListIterObject *it = (_PyListIterObject *)iter_o; + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(list_o) == &PyList_Type); STAT_INC(FOR_ITER, hit); - PyListObject *seq = it->it_seq; - if (seq == NULL || (size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { - it->it_index = -1; - if (seq != NULL) { - it->it_seq = NULL; - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_DECREF(seq); - stack_pointer = _PyFrame_GetStackPointer(frame); - } + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { + null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; DISPATCH(); } #endif } // _ITER_NEXT_LIST { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - assert(seq); + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(PyList_CheckExact(list_o)); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + assert(_Py_IsOwnedByCurrentThread(list_o) || + _PyObject_GC_IS_SHARED(list_o)); STAT_INC(FOR_ITER, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); + int result = _PyList_GetItemRefNoLock((PyListObject *)list_o, PyStackRef_UntagInt(null_or_index), &next); stack_pointer = _PyFrame_GetStackPointer(frame); if (result < 0) { UPDATE_MISS_STATS(FOR_ITER); @@ -5934,16 +5935,17 @@ JUMP_TO_PREDICTED(FOR_ITER); } if (result == 0) { - it->it_index = -1; + null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; DISPATCH(); } - it->it_index++; #else - assert(it->it_index < PyList_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(list_o, PyStackRef_UntagInt(null_or_index))); #endif + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -5966,7 +5968,7 @@ /* Skip 1 cache entry */ // _ITER_CHECK_RANGE { - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); if (Py_TYPE(r) != &PyRangeIter_Type) { UPDATE_MISS_STATS(FOR_ITER); @@ -6029,63 +6031,44 @@ INSTRUCTION_STATS(FOR_ITER_TUPLE); static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); _PyStackRef iter; + _PyStackRef null_or_index; _PyStackRef next; /* Skip 1 cache entry */ // _ITER_CHECK_TUPLE { - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (Py_TYPE(iter_o) != &PyTupleIter_Type) { + if (Py_TYPE(iter_o) != &PyTuple_Type) { UPDATE_MISS_STATS(FOR_ITER); assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); JUMP_TO_PREDICTED(FOR_ITER); } - #ifdef Py_GIL_DISABLED - if (!_PyObject_IsUniquelyReferenced(iter_o)) { - UPDATE_MISS_STATS(FOR_ITER); - assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); - JUMP_TO_PREDICTED(FOR_ITER); - } - #endif + assert(PyStackRef_IsTaggedInt(null_or_index)); } // _ITER_JUMP_TUPLE { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - (void)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - #endif - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + (void)tuple_o; + assert(Py_TYPE(tuple_o) == &PyTuple_Type); STAT_INC(FOR_ITER, hit); - PyTupleObject *seq = it->it_seq; - if (seq == NULL || (size_t)it->it_index >= (size_t)PyTuple_GET_SIZE(seq)) { - #ifndef Py_GIL_DISABLED - if (seq != NULL) { - it->it_seq = NULL; - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_DECREF(seq); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - #endif - + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyTuple_GET_SIZE(tuple_o)) { + null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; DISPATCH(); } } // _ITER_NEXT_TUPLE { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - PyTupleObject *seq = it->it_seq; - #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - #endif - assert(seq); - assert(it->it_index < PyTuple_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + uintptr_t i = PyStackRef_UntagInt(null_or_index); + assert((size_t)i < (size_t)PyTuple_GET_SIZE(tuple_o)); + next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(tuple_o, i)); + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -6216,25 +6199,37 @@ INSTRUCTION_STATS(GET_ITER); _PyStackRef iterable; _PyStackRef iter; + _PyStackRef index_or_null; iterable = stack_pointer[-1]; #ifdef Py_STATS _PyFrame_SetStackPointer(frame, stack_pointer); _Py_GatherStats_GetIter(iterable); stack_pointer = _PyFrame_GetStackPointer(frame); #endif - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(iterable); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (iter_o == NULL) { - JUMP_TO_LABEL(error); + + PyTypeObject *tp = PyStackRef_TYPE(iterable); + if (tp == &PyTuple_Type || tp == &PyList_Type) { + iter = iterable; + index_or_null = PyStackRef_TagInt(0); } - iter = PyStackRef_FromPyObjectSteal(iter_o); - stack_pointer[0] = iter; + else { + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(iterable); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (iter_o == NULL) { + JUMP_TO_LABEL(error); + } + iter = PyStackRef_FromPyObjectSteal(iter_o); + index_or_null = PyStackRef_NULL; + stack_pointer += 1; + } + stack_pointer[-1] = iter; + stack_pointer[0] = index_or_null; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); DISPATCH(); @@ -6999,7 +6994,7 @@ _PyStackRef receiver; _PyStackRef value; value = stack_pointer[-1]; - receiver = stack_pointer[-2]; + receiver = stack_pointer[-3]; if (PyStackRef_GenCheck(receiver)) { _PyFrame_SetStackPointer(frame, stack_pointer); int err = monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value)); @@ -7061,34 +7056,48 @@ next_instr += 2; INSTRUCTION_STATS(INSTRUMENTED_FOR_ITER); _PyStackRef iter; + _PyStackRef null_or_index; _PyStackRef next; /* Skip 1 cache entry */ - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (next_o != NULL) { - next = PyStackRef_FromPyObjectSteal(next_o); + if (PyStackRef_IsTaggedInt(null_or_index)) { + _PyFrame_SetStackPointer(frame, stack_pointer); + next = _PyForIter_NextWithIndex(iter_o, null_or_index); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (PyStackRef_IsNull(next)) { + JUMPBY(oparg + 1); + DISPATCH(); + } INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); } else { - if (_PyErr_Occurred(tstate)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (!matches) { - JUMP_TO_LABEL(error); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (next_o != NULL) { + next = PyStackRef_FromPyObjectSteal(next_o); + INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); + } + else { + if (_PyErr_Occurred(tstate)) { + _PyFrame_SetStackPointer(frame, stack_pointer); + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (!matches) { + JUMP_TO_LABEL(error); + } + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyEval_MonitorRaise(tstate, frame, this_instr); + _PyErr_Clear(tstate); + stack_pointer = _PyFrame_GetStackPointer(frame); } - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); - stack_pointer = _PyFrame_GetStackPointer(frame); + assert(next_instr[oparg].op.code == END_FOR || + next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + JUMPBY(oparg + 1); + DISPATCH(); } - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - JUMPBY(oparg + 1); - DISPATCH(); } stack_pointer[0] = next; stack_pointer += 1; @@ -7356,9 +7365,12 @@ next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_POP_ITER); _PyStackRef iter; - iter = stack_pointer[-1]; + _PyStackRef index_or_null; + index_or_null = stack_pointer[-1]; + iter = stack_pointer[-2]; + (void)index_or_null; INSTRUMENTED_JUMP(prev_instr, this_instr+1, PY_MONITORING_EVENT_BRANCH_RIGHT); - stack_pointer += -1; + stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(iter); @@ -10122,12 +10134,15 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(POP_ITER); - _PyStackRef value; - value = stack_pointer[-1]; - stack_pointer += -1; + _PyStackRef iter; + _PyStackRef index_or_null; + index_or_null = stack_pointer[-1]; + iter = stack_pointer[-2]; + (void)index_or_null; + stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(value); + PyStackRef_CLOSE(iter); stack_pointer = _PyFrame_GetStackPointer(frame); DISPATCH(); } @@ -10275,7 +10290,7 @@ stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(value); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 49c6bfb6c1b01a..34250fd4385d34 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -840,7 +840,7 @@ dummy_func(void) { value = sym_new_unknown(ctx); } - op(_FOR_ITER_GEN_FRAME, (unused -- unused, gen_frame: _Py_UOpsAbstractFrame*)) { + op(_FOR_ITER_GEN_FRAME, (unused, unused -- unused, unused, gen_frame: _Py_UOpsAbstractFrame*)) { gen_frame = NULL; /* We are about to hit the end of the trace */ ctx->done = true; @@ -914,7 +914,7 @@ dummy_func(void) { } } - op(_ITER_NEXT_RANGE, (iter -- iter, next)) { + op(_ITER_NEXT_RANGE, (iter, null_or_index -- iter, null_or_index, next)) { next = sym_new_type(ctx, &PyLong_Type); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index bf7ac72d4579e7..bbd45067103679 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -126,6 +126,12 @@ break; } + case _POP_ITER: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _END_SEND: { JitOptSymbol *val; val = sym_new_not_null(ctx); @@ -1557,8 +1563,13 @@ case _GET_ITER: { JitOptSymbol *iter; + JitOptSymbol *index_or_null; iter = sym_new_not_null(ctx); + index_or_null = sym_new_not_null(ctx); stack_pointer[-1] = iter; + stack_pointer[0] = index_or_null; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/specialize.c b/Python/specialize.c index 06995d46d8b5e0..92f79d39d55208 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2904,53 +2904,57 @@ int #endif // Py_STATS Py_NO_INLINE void -_Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) +_Py_Specialize_ForIter(_PyStackRef iter, _PyStackRef null_or_index, _Py_CODEUNIT *instr, int oparg) { assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[FOR_ITER] == INLINE_CACHE_ENTRIES_FOR_ITER); PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); PyTypeObject *tp = Py_TYPE(iter_o); + + if (PyStackRef_IsNull(null_or_index)) { #ifdef Py_GIL_DISABLED - // Only specialize for uniquely referenced iterators, so that we know - // they're only referenced by this one thread. This is more limiting - // than we need (even `it = iter(mylist); for item in it:` won't get - // specialized) but we don't have a way to check whether we're the only - // _thread_ who has access to the object. - if (!_PyObject_IsUniquelyReferenced(iter_o)) - goto failure; -#endif - if (tp == &PyListIter_Type) { -#ifdef Py_GIL_DISABLED - _PyListIterObject *it = (_PyListIterObject *)iter_o; - if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) && - !_PyObject_GC_IS_SHARED(it->it_seq)) { - // Maybe this should just set GC_IS_SHARED in a critical - // section, instead of leaving it to the first iteration? + // Only specialize for uniquely referenced iterators, so that we know + // they're only referenced by this one thread. This is more limiting + // than we need (even `it = iter(mylist); for item in it:` won't get + // specialized) but we don't have a way to check whether we're the only + // _thread_ who has access to the object. + if (!_PyObject_IsUniquelyReferenced(iter_o)) { goto failure; } #endif - specialize(instr, FOR_ITER_LIST); - return; - } - else if (tp == &PyTupleIter_Type) { - specialize(instr, FOR_ITER_TUPLE); - return; - } - else if (tp == &PyRangeIter_Type) { - specialize(instr, FOR_ITER_RANGE); - return; + if (tp == &PyRangeIter_Type) { + specialize(instr, FOR_ITER_RANGE); + return; + } + else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { + // Generators are very much not thread-safe, so don't worry about + // the specialization not being thread-safe. + assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || + instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR + ); + /* Don't specialize if PEP 523 is active */ + if (_PyInterpreterState_GET()->eval_frame) { + goto failure; + } + specialize(instr, FOR_ITER_GEN); + return; + } } - else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { - // Generators are very much not thread-safe, so don't worry about - // the specialization not being thread-safe. - assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || - instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR - ); - /* Don't specialize if PEP 523 is active */ - if (_PyInterpreterState_GET()->eval_frame) - goto failure; - specialize(instr, FOR_ITER_GEN); - return; + else { + if (tp == &PyList_Type) { +#ifdef Py_GIL_DISABLED + // Only specialize for lists owned by this thread or shared + if (!_Py_IsOwnedByCurrentThread(iter_o) && !_PyObject_GC_IS_SHARED(iter_o)) { + goto failure; + } +#endif + specialize(instr, FOR_ITER_LIST); + return; + } + else if (tp == &PyTuple_Type) { + specialize(instr, FOR_ITER_TUPLE); + return; + } } failure: SPECIALIZATION_FAIL(FOR_ITER, diff --git a/Python/stackrefs.c b/Python/stackrefs.c index 69d4e8b943159f..b2a1369031ad2b 100644 --- a/Python/stackrefs.c +++ b/Python/stackrefs.c @@ -216,4 +216,12 @@ PyStackRef_IsNullOrInt(_PyStackRef ref) return PyStackRef_IsNull(ref) || PyStackRef_IsTaggedInt(ref); } +_PyStackRef +PyStackRef_IncrementTaggedIntNoOverflow(_PyStackRef ref) +{ + assert(ref.index <= INT_MAX - 2); // No overflow + return (_PyStackRef){ .index = ref.index + 2 }; +} + + #endif diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 96d64a27e0a6ed..3070559db8ae57 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -679,8 +679,11 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsTaggedInt", "PyStackRef_TagInt", "PyStackRef_UntagInt", + "PyStackRef_IncrementTaggedIntNoOverflow", + "PyStackRef_IsNullOrInt", ) + def check_escaping_calls(instr: parser.CodeDef, escapes: dict[SimpleStmt, EscapingCall]) -> None: error: lexer.Token | None = None calls = {e.call for e in escapes.values()} diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 9e60d219a71c4a..02f9a952754e66 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -140,6 +140,7 @@ def dispatch( ) -> bool: if storage.spilled: raise analysis_error("stack_pointer needs reloading before dispatch", tkn) + storage.stack.flush(self.out) self.emit(tkn) return False From c3c88064f5905889057dcdfaaaa4a48f8e2d53d4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 27 May 2025 17:38:30 +0200 Subject: [PATCH 376/989] gh-133678: Document C API third party tools (#134526) Co-authored-by: Petr Viktorin Co-authored-by: Steve Dower --- Doc/c-api/intro.rst | 38 ++++++++++++++++++++++++++++++++++++++ Doc/extending/index.rst | 16 +++------------- Doc/faq/extending.rst | 21 +++------------------ Doc/howto/cporting.rst | 6 ++---- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index 112b141032f55a..41856922110eee 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -838,3 +838,41 @@ after every statement run by the interpreter.) Please refer to :file:`Misc/SpecialBuilds.txt` in the Python source distribution for more detailed information. + + +.. _c-api-tools: + +Recommended third party tools +============================= + +The following third party tools offer both simpler and more sophisticated +approaches to creating C, C++ and Rust extensions for Python: + +* `Cython `_ +* `cffi `_ +* `HPy `_ +* `nanobind `_ (C++) +* `Numba `_ +* `pybind11 `_ (C++) +* `PyO3 `_ (Rust) +* `SWIG `_ + +Using tools such as these can help avoid writing code that is tightly bound to +a particular version of CPython, avoid reference counting errors, and focus +more on your own code than on using the CPython API. In general, new versions +of Python can be supported by updating the tool, and your code will often use +newer and more efficient APIs automatically. Some tools also support compiling +for other implementations of Python from a single set of sources. + +These projects are not supported by the same people who maintain Python, and +issues need to be raised with the projects directly. Remember to check that the +project is still maintained and supported, as the list above may become +outdated. + +.. seealso:: + + `Python Packaging User Guide: Binary Extensions `_ + The Python Packaging User Guide not only covers several available + tools that simplify the creation of binary extensions, but also + discusses the various reasons why creating an extension module may be + desirable in the first place. diff --git a/Doc/extending/index.rst b/Doc/extending/index.rst index c1d8eb54ec1859..4cc2c96d8d5b47 100644 --- a/Doc/extending/index.rst +++ b/Doc/extending/index.rst @@ -26,19 +26,9 @@ Recommended third party tools ============================= This guide only covers the basic tools for creating extensions provided -as part of this version of CPython. Third party tools like -`Cython `_, `cffi `_, -`SWIG `_ and `Numba `_ -offer both simpler and more sophisticated approaches to creating C and C++ -extensions for Python. - -.. seealso:: - - `Python Packaging User Guide: Binary Extensions `_ - The Python Packaging User Guide not only covers several available - tools that simplify the creation of binary extensions, but also - discusses the various reasons why creating an extension module may be - desirable in the first place. +as part of this version of CPython. Some :ref:`third party tools +` offer both simpler and more sophisticated approaches to creating +C and C++ extensions for Python. Creating extensions without third party tools diff --git a/Doc/faq/extending.rst b/Doc/faq/extending.rst index 3147fda7c37124..1d5abed2317b0c 100644 --- a/Doc/faq/extending.rst +++ b/Doc/faq/extending.rst @@ -37,24 +37,9 @@ Writing C is hard; are there any alternatives? ---------------------------------------------- There are a number of alternatives to writing your own C extensions, depending -on what you're trying to do. - -.. XXX make sure these all work - -`Cython `_ and its relative `Pyrex -`_ are compilers -that accept a slightly modified form of Python and generate the corresponding -C code. Cython and Pyrex make it possible to write an extension without having -to learn Python's C API. - -If you need to interface to some C or C++ library for which no Python extension -currently exists, you can try wrapping the library's data types and functions -with a tool such as `SWIG `_. `SIP -`__, `CXX -`_ `Boost -`_, or `Weave -`_ are also -alternatives for wrapping C++ libraries. +on what you're trying to do. :ref:`Recommended third party tools ` +offer both simpler and more sophisticated approaches to creating C and C++ +extensions for Python. How can I execute arbitrary Python statements from C? diff --git a/Doc/howto/cporting.rst b/Doc/howto/cporting.rst index 7773620b40b973..cf857aed0425ec 100644 --- a/Doc/howto/cporting.rst +++ b/Doc/howto/cporting.rst @@ -14,13 +14,11 @@ We recommend the following resources for porting extension modules to Python 3: module. * The `Porting guide`_ from the *py3c* project provides opinionated suggestions with supporting code. -* The `Cython`_ and `CFFI`_ libraries offer abstractions over - Python's C API. +* :ref:`Recommended third party tools ` offer abstractions over + the Python's C API. Extensions generally need to be re-written to use one of them, but the library then handles differences between various Python versions and implementations. .. _Migrating C extensions: http://python3porting.com/cextensions.html .. _Porting guide: https://py3c.readthedocs.io/en/latest/guide.html -.. _Cython: https://cython.org/ -.. _CFFI: https://cffi.readthedocs.io/en/latest/ From 9b5e80000ee179eb028841709f10dac9af7c36e7 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 27 May 2025 09:42:24 -0600 Subject: [PATCH 377/989] gh-132775: Always Set __builtins__ In _PyFunction_FromXIData() (gh-134758) This is a small follow-up to gh-133481. There's a corner case in the behavior of PyImport_ImportModuleAttrString(), where it expects __builtins__ to be set if __globals__ is set. --- Python/crossinterp_data_lookup.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Python/crossinterp_data_lookup.h b/Python/crossinterp_data_lookup.h index 88eb41da89ee40..b16f38b847fc66 100644 --- a/Python/crossinterp_data_lookup.h +++ b/Python/crossinterp_data_lookup.h @@ -701,6 +701,14 @@ _PyFunction_FromXIData(_PyXIData_t *xidata) Py_DECREF(code); return NULL; } + PyThreadState *tstate = _PyThreadState_GET(); + if (PyDict_SetItem(globals, &_Py_ID(__builtins__), + tstate->interp->builtins) < 0) + { + Py_DECREF(code); + Py_DECREF(globals); + return NULL; + } PyObject *func = PyFunction_New(code, globals); Py_DECREF(code); Py_DECREF(globals); From 3f9eb55e090a8de80503e565f508f341c5f4c8da Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Tue, 27 May 2025 09:02:17 -0700 Subject: [PATCH 378/989] gh-134262: increase retries in `Tools/build/generate_sbom.py` (#134558) --- Tools/build/generate_sbom.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index 5845f2d85c7fdb..ecb7b54f6d8a13 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -4,6 +4,7 @@ import hashlib import json import os +import random import re import subprocess import sys @@ -164,16 +165,18 @@ def get_externals() -> list[str]: def download_with_retries(download_location: str, - max_retries: int = 5, - base_delay: float = 2.0) -> typing.Any: + max_retries: int = 7, + base_delay: float = 2.25, + max_jitter: float = 1.0) -> typing.Any: """Download a file with exponential backoff retry.""" for attempt in range(max_retries): try: resp = urllib.request.urlopen(download_location) except urllib.error.URLError as ex: if attempt == max_retries: - raise ex - time.sleep(base_delay**attempt) + msg = f"Download from {download_location} failed." + raise OSError(msg) from ex + time.sleep(base_delay**attempt + random.uniform(0, max_jitter)) else: return resp From a380d578737be1cd51e1d1be2b83bbc0b0619e7e Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Tue, 27 May 2025 22:28:27 +0530 Subject: [PATCH 379/989] gh-134043: use stackrefs in vectorcalling methods (#134044) Adds `_PyObject_GetMethodStackRef` which uses stackrefs and takes advantage of deferred reference counting in free-threading while calling method objects in vectorcall. --- Include/internal/pycore_object.h | 3 + Objects/call.c | 35 +++++--- Objects/object.c | 110 +++++++++++++++++++++++++ Tools/ftscalingbench/ftscalingbench.py | 13 +++ 4 files changed, 148 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index b7e162c8abcabf..3aaee7d008155a 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -897,6 +897,9 @@ extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, extern unsigned int _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out); +extern int _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, + PyObject *name, _PyStackRef *method); + // Cache the provided init method in the specialization cache of type if the // provided type version matches the current version of the type. // diff --git a/Objects/call.c b/Objects/call.c index b1610dababd466..c9a18bcc3da60b 100644 --- a/Objects/call.c +++ b/Objects/call.c @@ -834,12 +834,15 @@ PyObject_VectorcallMethod(PyObject *name, PyObject *const *args, assert(PyVectorcall_NARGS(nargsf) >= 1); PyThreadState *tstate = _PyThreadState_GET(); - PyObject *callable = NULL; + _PyCStackRef method; + _PyThreadState_PushCStackRef(tstate, &method); /* Use args[0] as "self" argument */ - int unbound = _PyObject_GetMethod(args[0], name, &callable); - if (callable == NULL) { + int unbound = _PyObject_GetMethodStackRef(tstate, args[0], name, &method.ref); + if (PyStackRef_IsNull(method.ref)) { + _PyThreadState_PopCStackRef(tstate, &method); return NULL; } + PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref); if (unbound) { /* We must remove PY_VECTORCALL_ARGUMENTS_OFFSET since @@ -855,7 +858,7 @@ PyObject_VectorcallMethod(PyObject *name, PyObject *const *args, EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_METHOD, callable); PyObject *result = _PyObject_VectorcallTstate(tstate, callable, args, nargsf, kwnames); - Py_DECREF(callable); + _PyThreadState_PopCStackRef(tstate, &method); return result; } @@ -868,11 +871,14 @@ PyObject_CallMethodObjArgs(PyObject *obj, PyObject *name, ...) return null_error(tstate); } - PyObject *callable = NULL; - int is_method = _PyObject_GetMethod(obj, name, &callable); - if (callable == NULL) { + _PyCStackRef method; + _PyThreadState_PushCStackRef(tstate, &method); + int is_method = _PyObject_GetMethodStackRef(tstate, obj, name, &method.ref); + if (PyStackRef_IsNull(method.ref)) { + _PyThreadState_PopCStackRef(tstate, &method); return NULL; } + PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref); obj = is_method ? obj : NULL; va_list vargs; @@ -880,7 +886,7 @@ PyObject_CallMethodObjArgs(PyObject *obj, PyObject *name, ...) PyObject *result = object_vacall(tstate, obj, callable, vargs); va_end(vargs); - Py_DECREF(callable); + _PyThreadState_PopCStackRef(tstate, &method); return result; } @@ -897,12 +903,15 @@ _PyObject_CallMethodIdObjArgs(PyObject *obj, _Py_Identifier *name, ...) if (!oname) { return NULL; } - - PyObject *callable = NULL; - int is_method = _PyObject_GetMethod(obj, oname, &callable); - if (callable == NULL) { + _PyCStackRef method; + _PyThreadState_PushCStackRef(tstate, &method); + int is_method = _PyObject_GetMethodStackRef(tstate, obj, oname, &method.ref); + if (PyStackRef_IsNull(method.ref)) { + _PyThreadState_PopCStackRef(tstate, &method); return NULL; } + PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref); + obj = is_method ? obj : NULL; va_list vargs; @@ -910,7 +919,7 @@ _PyObject_CallMethodIdObjArgs(PyObject *obj, _Py_Identifier *name, ...) PyObject *result = object_vacall(tstate, obj, callable, vargs); va_end(vargs); - Py_DECREF(callable); + _PyThreadState_PopCStackRef(tstate, &method); return result; } diff --git a/Objects/object.c b/Objects/object.c index af1aa217f75462..68c8bfeae33e33 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1664,6 +1664,116 @@ _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) return 0; } +int +_PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, + PyObject *name, _PyStackRef *method) +{ + int meth_found = 0; + + assert(PyStackRef_IsNull(*method)); + + PyTypeObject *tp = Py_TYPE(obj); + if (!_PyType_IsReady(tp)) { + if (PyType_Ready(tp) < 0) { + return 0; + } + } + + if (tp->tp_getattro != PyObject_GenericGetAttr || !PyUnicode_CheckExact(name)) { + PyObject *res = PyObject_GetAttr(obj, name); + if (res != NULL) { + *method = PyStackRef_FromPyObjectSteal(res); + } + return 0; + } + + _PyType_LookupStackRefAndVersion(tp, name, method); + PyObject *descr = PyStackRef_AsPyObjectBorrow(*method); + descrgetfunc f = NULL; + if (descr != NULL) { + if (_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR)) { + meth_found = 1; + } + else { + f = Py_TYPE(descr)->tp_descr_get; + if (f != NULL && PyDescr_IsData(descr)) { + PyObject *value = f(descr, obj, (PyObject *)Py_TYPE(obj)); + PyStackRef_CLEAR(*method); + if (value != NULL) { + *method = PyStackRef_FromPyObjectSteal(value); + } + return 0; + } + } + } + PyObject *dict, *attr; + if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && + _PyObject_TryGetInstanceAttribute(obj, name, &attr)) { + if (attr != NULL) { + PyStackRef_CLEAR(*method); + *method = PyStackRef_FromPyObjectSteal(attr); + return 0; + } + dict = NULL; + } + else if ((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT)) { + dict = (PyObject *)_PyObject_GetManagedDict(obj); + } + else { + PyObject **dictptr = _PyObject_ComputedDictPointer(obj); + if (dictptr != NULL) { + dict = FT_ATOMIC_LOAD_PTR_ACQUIRE(*dictptr); + } + else { + dict = NULL; + } + } + if (dict != NULL) { + // TODO: use _Py_dict_lookup_threadsafe_stackref + Py_INCREF(dict); + PyObject *value; + if (PyDict_GetItemRef(dict, name, &value) != 0) { + // found or error + Py_DECREF(dict); + PyStackRef_CLEAR(*method); + if (value != NULL) { + *method = PyStackRef_FromPyObjectSteal(value); + } + return 0; + } + // not found + Py_DECREF(dict); + } + + if (meth_found) { + assert(!PyStackRef_IsNull(*method)); + return 1; + } + + if (f != NULL) { + PyObject *value = f(descr, obj, (PyObject *)Py_TYPE(obj)); + PyStackRef_CLEAR(*method); + if (value) { + *method = PyStackRef_FromPyObjectSteal(value); + } + return 0; + } + + if (descr != NULL) { + assert(!PyStackRef_IsNull(*method)); + return 0; + } + + PyErr_Format(PyExc_AttributeError, + "'%.100s' object has no attribute '%U'", + tp->tp_name, name); + + _PyObject_SetAttributeErrorContext(obj, name); + assert(PyStackRef_IsNull(*method)); + return 0; +} + + /* Generic GetAttr functions - put these in your tp_[gs]etattro slot. */ PyObject * diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index 926bc66b944c6f..1a59e25189d5dd 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -27,6 +27,7 @@ import sys import threading import time +from operator import methodcaller # The iterations in individual benchmarks are scaled by this factor. WORK_SCALE = 100 @@ -188,6 +189,18 @@ def thread_local_read(): _ = tmp.x _ = tmp.x +class MyClass: + __slots__ = () + + def func(self): + pass + +@register_benchmark +def method_caller(): + mc = methodcaller("func") + obj = MyClass() + for i in range(1000 * WORK_SCALE): + mc(obj) def bench_one_thread(func): t0 = time.perf_counter_ns() From ac539e7e0d1622c61988f30c91acfd4c06521ced Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Tue, 27 May 2025 22:42:08 +0530 Subject: [PATCH 380/989] gh-132917: fix data race on `last_mem` in free-threading gc (#134692) --- Python/gc_free_threading.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index d2ea5b5e06ba43..5aaa68c5b51f95 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -2062,7 +2062,7 @@ gc_should_collect_mem_usage(GCState *gcstate) // 70,000 new container objects. return true; } - Py_ssize_t last_mem = gcstate->last_mem; + Py_ssize_t last_mem = _Py_atomic_load_ssize_relaxed(&gcstate->last_mem); Py_ssize_t mem_threshold = Py_MAX(last_mem / 10, 128); if ((mem - last_mem) > mem_threshold) { // The process memory usage has increased too much, do a collection. @@ -2245,7 +2245,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Store the current memory usage, can be smaller now if breaking cycles // freed some memory. - state->gcstate->last_mem = get_process_mem_usage(); + Py_ssize_t last_mem = get_process_mem_usage(); + _Py_atomic_store_ssize_relaxed(&state->gcstate->last_mem, last_mem); // Append objects with legacy finalizers to the "gc.garbage" list. handle_legacy_finalizers(state); From 79d81f7cba236e2cbdfa95d0de0a75ba9716ed32 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Tue, 27 May 2025 21:30:17 +0300 Subject: [PATCH 381/989] gh-131798: Optimize `_ITER_CHECK_TUPLE` (GH-134803) --- .../2025-05-27-20-21-34.gh-issue-131798.b32zkl.rst | 1 + Python/optimizer_bytecodes.c | 7 +++++++ Python/optimizer_cases.c.h | 6 ++++++ 3 files changed, 14 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-21-34.gh-issue-131798.b32zkl.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-21-34.gh-issue-131798.b32zkl.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-21-34.gh-issue-131798.b32zkl.rst new file mode 100644 index 00000000000000..ed4b31bd7bedce --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-21-34.gh-issue-131798.b32zkl.rst @@ -0,0 +1 @@ +Allow the JIT to remove unnecessary ``_ITER_CHECK_TUPLE`` ops. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 34250fd4385d34..e1209209660f92 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -914,6 +914,13 @@ dummy_func(void) { } } + op(_ITER_CHECK_TUPLE, (iter, null_or_index -- iter, null_or_index)) { + if (sym_matches_type(iter, &PyTuple_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type(iter, &PyTuple_Type); + } + op(_ITER_NEXT_RANGE, (iter, null_or_index -- iter, null_or_index, next)) { next = sym_new_type(ctx, &PyLong_Type); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index bbd45067103679..db86edcc7859b5 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1615,6 +1615,12 @@ } case _ITER_CHECK_TUPLE: { + JitOptSymbol *iter; + iter = stack_pointer[-2]; + if (sym_matches_type(iter, &PyTuple_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type(iter, &PyTuple_Type); break; } From 967f361993c9c97eb3ff3076a409b78ea32938df Mon Sep 17 00:00:00 2001 From: Rishabh Singh <67859818+rishabh11336@users.noreply.github.com> Date: Wed, 28 May 2025 00:18:04 +0530 Subject: [PATCH 382/989] gh-134789: Document del s[i] operation for mutable sequences (#134804) [main] Update stdtypes.rst - Added explicit mention of `del s[i]` (item deletion by index) to the Mutable Sequence Types section. - Clarified that this operation removes the item at the specified index from the sequence. - Addresses issue #134789. --- Doc/library/stdtypes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 31d71031bca12c..f0b4b09ff10dce 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1214,6 +1214,8 @@ accepts integers that meet the value restriction ``0 <= x <= 255``). | ``s[i] = x`` | item *i* of *s* is replaced by | | | | *x* | | +------------------------------+--------------------------------+---------------------+ +| ``del s[i]`` | removes item *i* of *s* | | ++------------------------------+--------------------------------+---------------------+ | ``s[i:j] = t`` | slice of *s* from *i* to *j* | | | | is replaced by the contents of | | | | the iterable *t* | | From a4d37f88b66bc9a66b2ab277aa66a2a6b20821fa Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 27 May 2025 16:21:16 -0400 Subject: [PATCH 383/989] gh-134679: Fix assertion failure in QSBR (gh-134811) This is the same underlying bug as gh-130519. The destructor may call arbitrary code, changing the `tstate->qsbr pointer` and invalidating the old `struct _qsbr_thread_state`. --- ...-05-27-18-59-54.gh-issue-134679.FWPBu6.rst | 2 ++ Objects/obmalloc.c | 22 +++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-18-59-54.gh-issue-134679.FWPBu6.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-18-59-54.gh-issue-134679.FWPBu6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-18-59-54.gh-issue-134679.FWPBu6.rst new file mode 100644 index 00000000000000..22f1282fea13e9 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-18-59-54.gh-issue-134679.FWPBu6.rst @@ -0,0 +1,2 @@ +Fix crash in the :term:`free threading` build's QSBR code that could occur +when changing an object's ``__dict__`` attribute. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index b209808da902da..d3931aab623b70 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1238,7 +1238,7 @@ work_queue_first(struct llist_node *head) } static void -process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, +process_queue(struct llist_node *head, _PyThreadStateImpl *tstate, bool keep_empty, delayed_dealloc_cb cb, void *state) { while (!llist_empty(head)) { @@ -1246,7 +1246,7 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, if (buf->rd_idx < buf->wr_idx) { struct _mem_work_item *item = &buf->array[buf->rd_idx]; - if (!_Py_qsbr_poll(qsbr, item->qsbr_goal)) { + if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) { return; } @@ -1270,11 +1270,11 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, static void process_interp_queue(struct _Py_mem_interp_free_queue *queue, - struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, + _PyThreadStateImpl *tstate, delayed_dealloc_cb cb, void *state) { assert(PyMutex_IsLocked(&queue->mutex)); - process_queue(&queue->head, qsbr, false, cb, state); + process_queue(&queue->head, tstate, false, cb, state); int more_work = !llist_empty(&queue->head); _Py_atomic_store_int_relaxed(&queue->has_work, more_work); @@ -1282,7 +1282,7 @@ process_interp_queue(struct _Py_mem_interp_free_queue *queue, static void maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue, - struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, + _PyThreadStateImpl *tstate, delayed_dealloc_cb cb, void *state) { if (!_Py_atomic_load_int_relaxed(&queue->has_work)) { @@ -1291,7 +1291,7 @@ maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue, // Try to acquire the lock, but don't block if it's already held. if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) { - process_interp_queue(queue, qsbr, cb, state); + process_interp_queue(queue, tstate, cb, state); PyMutex_Unlock(&queue->mutex); } } @@ -1303,10 +1303,10 @@ _PyMem_ProcessDelayed(PyThreadState *tstate) _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; // Process thread-local work - process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, NULL, NULL); + process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL); // Process shared interpreter work - maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL); } void @@ -1316,10 +1316,10 @@ _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, voi _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; // Process thread-local work - process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, cb, state); + process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state); // Process shared interpreter work - maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state); } void @@ -1348,7 +1348,7 @@ _PyMem_AbandonDelayed(PyThreadState *tstate) // Process the merged queue now (see gh-130794). _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); - process_interp_queue(&interp->mem_free_queue, this_tstate->qsbr, NULL, NULL); + process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL); PyMutex_Unlock(&interp->mem_free_queue.mutex); From 7ca6d79fa32d7203ee8d64f47b6b3539a027fdea Mon Sep 17 00:00:00 2001 From: Wulian233 <1055917385@qq.com> Date: Wed, 28 May 2025 08:46:41 +0800 Subject: [PATCH 384/989] gh-134580: Modernizing `difflib.HtmlDiff` for HTML Output (#134581) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/whatsnew/3.15.rst | 7 +++ Lib/difflib.py | 50 +++++++++++++------ Lib/test/test_difflib.py | 6 +-- Lib/test/test_difflib_expect.html | 48 +++++++++++++----- ...-05-23-20-01-52.gh-issue-134580.xnaJ70.rst | 3 ++ 5 files changed, 84 insertions(+), 30 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-23-20-01-52.gh-issue-134580.xnaJ70.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index cd4b2e8b3dd8ed..4d1a27354fc620 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -89,6 +89,13 @@ New modules Improved modules ================ +difflib +------- + +* Improved the styling of HTML diff pages generated by the :class:`difflib.HtmlDiff` + class, and migrated the output to the HTML5 standard. + (Contributed by Jiahao Li in :gh:`134580`.) + ssl --- diff --git a/Lib/difflib.py b/Lib/difflib.py index f1f4e62514a7bd..18801a9b19eb9d 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1615,16 +1615,13 @@ def _line_pair_iterator(): _file_template = """ - - - - + + - - - @@ -1636,13 +1633,36 @@ def _line_pair_iterator(): _styles = """ :root {color-scheme: light dark} - table.diff {font-family: Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace; border:medium} - .diff_header {background-color:#e0e0e0} - td.diff_header {text-align:right} - .diff_next {background-color:#c0c0c0} + table.diff { + font-family: Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace; + border: medium; + } + .diff_header { + background-color: #e0e0e0; + font-weight: bold; + } + td.diff_header { + text-align: right; + padding: 0 8px; + } + .diff_next { + background-color: #c0c0c0; + padding: 4px 0; + } .diff_add {background-color:palegreen} .diff_chg {background-color:#ffff77} .diff_sub {background-color:#ffaaaa} + table.diff[summary="Legends"] { + margin-top: 20px; + border: 1px solid #ccc; + } + table.diff[summary="Legends"] th { + background-color: #e0e0e0; + padding: 4px 8px; + } + table.diff[summary="Legends"] td { + padding: 4px 8px; + } @media (prefers-color-scheme: dark) { .diff_header {background-color:#666} @@ -1650,6 +1670,8 @@ def _line_pair_iterator(): .diff_add {background-color:darkgreen} .diff_chg {background-color:#847415} .diff_sub {background-color:darkred} + table.diff[summary="Legends"] {border-color:#555} + table.diff[summary="Legends"] th{background-color:#666} }""" _table_template = """ @@ -1692,7 +1714,7 @@ class HtmlDiff(object): make_table -- generates HTML for a single side by side table make_file -- generates complete HTML file with a single side by side table - See tools/scripts/diff.py for an example usage of this class. + See Doc/includes/diff.py for an example usage of this class. """ _file_template = _file_template diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index 9e217249be7332..6ac584a08d1e86 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -255,21 +255,21 @@ def test_make_file_default_charset(self): html_diff = difflib.HtmlDiff() output = html_diff.make_file(patch914575_from1.splitlines(), patch914575_to1.splitlines()) - self.assertIn('content="text/html; charset=utf-8"', output) + self.assertIn('charset="utf-8"', output) def test_make_file_iso88591_charset(self): html_diff = difflib.HtmlDiff() output = html_diff.make_file(patch914575_from1.splitlines(), patch914575_to1.splitlines(), charset='iso-8859-1') - self.assertIn('content="text/html; charset=iso-8859-1"', output) + self.assertIn('charset="iso-8859-1"', output) def test_make_file_usascii_charset_with_nonascii_input(self): html_diff = difflib.HtmlDiff() output = html_diff.make_file(patch914575_nonascii_from1.splitlines(), patch914575_nonascii_to1.splitlines(), charset='us-ascii') - self.assertIn('content="text/html; charset=us-ascii"', output) + self.assertIn('charset="us-ascii"', output) self.assertIn('ımplıcıt', output) class TestDiffer(unittest.TestCase): diff --git a/Lib/test/test_difflib_expect.html b/Lib/test/test_difflib_expect.html index 9f33a9e9c9cf58..2346a6f9f8dddf 100644 --- a/Lib/test/test_difflib_expect.html +++ b/Lib/test/test_difflib_expect.html @@ -1,22 +1,42 @@ - - - - + + - - - diff --git a/Misc/NEWS.d/next/Library/2025-05-23-20-01-52.gh-issue-134580.xnaJ70.rst b/Misc/NEWS.d/next/Library/2025-05-23-20-01-52.gh-issue-134580.xnaJ70.rst new file mode 100644 index 00000000000000..979d310d3cee58 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-23-20-01-52.gh-issue-134580.xnaJ70.rst @@ -0,0 +1,3 @@ +Improved the styling of HTML diff pages generated by the +:class:`difflib.HtmlDiff` class, and migrated the output to the HTML5 +standard. From fbbbc10055e0ee0011cf683132be706815480839 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 27 May 2025 18:27:41 -0700 Subject: [PATCH 385/989] gh-127266: avoid data races when updating type slots (gh-133177) In the free-threaded build, avoid data races caused by updating type slots or type flags after the type was initially created. For those (typically rare) cases, use the stop-the-world mechanism. Remove the use of atomics when reading or writing type flags. --- Include/internal/pycore_interp_structs.h | 3 + Include/internal/pycore_object.h | 2 +- Include/internal/pycore_typeobject.h | 1 - Include/object.h | 12 +- Include/refcount.h | 3 - Lib/test/test_descr.py | 28 + ...-03-14-13-08-20.gh-issue-127266._tyfBp.rst | 6 + Objects/typeobject.c | 630 ++++++++++++++---- Python/ceval.c | 14 + Tools/tsan/suppressions_free_threading.txt | 7 - 10 files changed, 574 insertions(+), 132 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 8a29c533b99058..f25f5847b3b307 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -677,8 +677,11 @@ struct _Py_interp_cached_objects { /* object.__reduce__ */ PyObject *objreduce; +#ifndef Py_GIL_DISABLED + /* resolve_slotdups() */ PyObject *type_slots_pname; pytype_slotdef *type_slots_ptrs[MAX_EQUIV]; +#endif /* TypeVar and related types */ PyTypeObject *generic_type; diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 3aaee7d008155a..3fc1e7d7dcbd15 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -313,7 +313,7 @@ extern int _PyDict_CheckConsistency(PyObject *mp, int check_content); // Fast inlined version of PyType_HasFeature() static inline int _PyType_HasFeature(PyTypeObject *type, unsigned long feature) { - return ((FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags) & feature) != 0); + return ((type->tp_flags) & feature) != 0; } extern void _PyType_InitCache(PyInterpreterState *interp); diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 1a4f89fd2449a0..0ee7d555c56cdd 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -134,7 +134,6 @@ extern int _PyType_AddMethod(PyTypeObject *, PyMethodDef *); extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long flags); -extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp); PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version); PyTypeObject *_PyType_LookupByVersion(unsigned int version); diff --git a/Include/object.h b/Include/object.h index 8cc83abb8574e3..994cac1ad17501 100644 --- a/Include/object.h +++ b/Include/object.h @@ -620,6 +620,12 @@ given type object has a specified feature. #define Py_TPFLAGS_HAVE_FINALIZE (1UL << 0) #define Py_TPFLAGS_HAVE_VERSION_TAG (1UL << 18) +// Flag values for ob_flags (16 bits available, if SIZEOF_VOID_P > 4). +#define _Py_IMMORTAL_FLAGS (1 << 0) +#define _Py_STATICALLY_ALLOCATED_FLAG (1 << 2) +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) +#define _Py_TYPE_REVEALED_FLAG (1 << 3) +#endif #define Py_CONSTANT_NONE 0 #define Py_CONSTANT_FALSE 1 @@ -776,11 +782,7 @@ PyType_HasFeature(PyTypeObject *type, unsigned long feature) // PyTypeObject is opaque in the limited C API flags = PyType_GetFlags(type); #else -# ifdef Py_GIL_DISABLED - flags = _Py_atomic_load_ulong_relaxed(&type->tp_flags); -# else - flags = type->tp_flags; -# endif + flags = type->tp_flags; #endif return ((flags & feature) != 0); } diff --git a/Include/refcount.h b/Include/refcount.h index 177bbdaf0c5977..ebd1dba6d15e1a 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -19,9 +19,6 @@ immortal. The latter should be the only instances that require cleanup during runtime finalization. */ -#define _Py_STATICALLY_ALLOCATED_FLAG 4 -#define _Py_IMMORTAL_FLAGS 1 - #if SIZEOF_VOID_P > 4 /* In 64+ bit systems, any object whose 32 bit reference count is >= 2**31 diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index ea076ba4fef2db..f6ec2cf5ce8b5c 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -4114,6 +4114,34 @@ class E(D): else: self.fail("shouldn't be able to create inheritance cycles") + def test_assign_bases_many_subclasses(self): + # This is intended to check that typeobject.c:queue_slot_update() can + # handle updating many subclasses when a slot method is re-assigned. + class A: + x = 'hello' + def __call__(self): + return 123 + def __getitem__(self, index): + return None + + class X: + x = 'bye' + + class B(A): + pass + + subclasses = [] + for i in range(1000): + sc = type(f'Sub{i}', (B,), {}) + subclasses.append(sc) + + self.assertEqual(subclasses[0]()(), 123) + self.assertEqual(subclasses[0]().x, 'hello') + B.__bases__ = (X,) + with self.assertRaises(TypeError): + subclasses[0]()() + self.assertEqual(subclasses[0]().x, 'bye') + def test_builtin_bases(self): # Make sure all the builtin types can have their base queried without # segfaulting. See issue #5787. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst new file mode 100644 index 00000000000000..b26977628de136 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst @@ -0,0 +1,6 @@ +In the free-threaded build, avoid data races caused by updating type slots +or type flags after the type was initially created. For those (typically +rare) cases, use the stop-the-world mechanism. Remove the use of atomics +when reading or writing type flags. The use of atomics is not sufficient to +avoid races (since flags are sometimes read without a lock and without +atomics) and are no longer required. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index ee09289425b91a..07f01e3bcb229c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -48,7 +48,7 @@ class object "PyObject *" "&PyBaseObject_Type" & ((1 << MCACHE_SIZE_EXP) - 1)) #define MCACHE_HASH_METHOD(type, name) \ - MCACHE_HASH(FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag), \ + MCACHE_HASH(FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag), \ ((Py_ssize_t)(name)) >> 3) #define MCACHE_CACHEABLE_NAME(name) \ PyUnicode_CheckExact(name) && \ @@ -60,11 +60,19 @@ class object "PyObject *" "&PyBaseObject_Type" #ifdef Py_GIL_DISABLED -// There's a global lock for mutation of types. This avoids having to take -// additional locks while doing various subclass processing which may result -// in odd behaviors w.r.t. running with the GIL as the outer type lock could -// be released and reacquired during a subclass update if there's contention -// on the subclass lock. +// There's a global lock for types that ensures that tp_version_tag and +// _spec_cache are correctly updated if the type is modified. It also protects +// tp_mro, tp_bases, and tp_base. This avoids having to take additional locks +// while doing various subclass processing which may result in odd behaviors +// w.r.t. running with the GIL as the outer type lock could be released and +// reacquired during a subclass update if there's contention on the subclass +// lock. +// +// Note that this lock does not protect updates of other type slots or the +// tp_flags member. Instead, we either ensure those updates are done before +// the type has been revealed to other threads or we only do those updates +// while the stop-the-world mechanism is active. The slots and flags are read +// in many places without holding a lock and without atomics. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex #define BEGIN_TYPE_LOCK() Py_BEGIN_CRITICAL_SECTION_MUT(TYPE_LOCK) #define END_TYPE_LOCK() Py_END_CRITICAL_SECTION() @@ -74,8 +82,100 @@ class object "PyObject *" "&PyBaseObject_Type" #define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() +#ifdef Py_DEBUG +// Return true if the world is currently stopped. +static bool +types_world_is_stopped(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return interp->stoptheworld.world_stopped; +} +#endif + +// Checks that the type has not yet been revealed (exposed) to other +// threads. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and +// PyType_FromMetaclass() to indicate that a newly initialized type might be +// revealed. We only have ob_flags on 64-bit platforms. +#if SIZEOF_VOID_P > 4 +#define TYPE_IS_REVEALED(tp) ((((PyObject *)(tp))->ob_flags & _Py_TYPE_REVEALED_FLAG) != 0) +#else +#define TYPE_IS_REVEALED(tp) 0 +#endif + +#ifdef Py_DEBUG #define ASSERT_TYPE_LOCK_HELD() \ - _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK) + if (!types_world_is_stopped()) { _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK); } + +// Checks if we can safely update type slots or tp_flags. +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) \ + assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) + +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) \ + if (TYPE_IS_REVEALED(tp)) { ASSERT_TYPE_LOCK_HELD(); } +#else +#define ASSERT_TYPE_LOCK_HELD() +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) +#endif + +static void +types_stop_world(void) +{ + assert(!types_world_is_stopped()); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); + assert(types_world_is_stopped()); +} + +static void +types_start_world(void) +{ + assert(types_world_is_stopped()); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StartTheWorld(interp); + assert(!types_world_is_stopped()); +} + +// This is used to temporarily prevent the TYPE_LOCK from being suspended +// when held by the topmost critical section. +static void +type_lock_prevent_release(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t *tagptr = &tstate->critical_section; + PyCriticalSection *c = (PyCriticalSection *)(*tagptr & ~_Py_CRITICAL_SECTION_MASK); + if (!(*tagptr & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { + assert(c->_cs_mutex == TYPE_LOCK); + c->_cs_mutex = NULL; + } + else { + PyCriticalSection2 *c2 = (PyCriticalSection2 *)c; + if (c->_cs_mutex == TYPE_LOCK) { + c->_cs_mutex = c2->_cs_mutex2; + c2->_cs_mutex2 = NULL; + } else { + assert(c2->_cs_mutex2 == TYPE_LOCK); + c2->_cs_mutex2 = NULL; + } + } +} + +static void +type_lock_allow_release(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t *tagptr = &tstate->critical_section; + PyCriticalSection *c = (PyCriticalSection *)(*tagptr & ~_Py_CRITICAL_SECTION_MASK); + if (!(*tagptr & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { + assert(c->_cs_mutex == NULL); + c->_cs_mutex = TYPE_LOCK; + } + else { + PyCriticalSection2 *c2 = (PyCriticalSection2 *)c; + assert(c2->_cs_mutex2 == NULL); + c2->_cs_mutex2 = TYPE_LOCK; + } +} #else @@ -84,6 +184,12 @@ class object "PyObject *" "&PyBaseObject_Type" #define BEGIN_TYPE_DICT_LOCK(d) #define END_TYPE_DICT_LOCK() #define ASSERT_TYPE_LOCK_HELD() +#define TYPE_IS_REVEALED(tp) 0 +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) +#define types_world_is_stopped() 1 +#define types_stop_world() +#define types_start_world() #endif @@ -106,6 +212,9 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static int slot_tp_setattro(PyObject *self, PyObject *name, PyObject *value); +static PyObject * +slot_tp_call(PyObject *self, PyObject *args, PyObject *kwds); + static inline PyTypeObject * type_from_ref(PyObject *ref) { @@ -346,21 +455,14 @@ _PyStaticType_GetBuiltins(void) static void type_set_flags(PyTypeObject *tp, unsigned long flags) { - if (tp->tp_flags & Py_TPFLAGS_READY) { - // It's possible the type object has been exposed to other threads - // if it's been marked ready. In that case, the type lock should be - // held when flags are modified. - ASSERT_TYPE_LOCK_HELD(); - } - // Since PyType_HasFeature() reads the flags without holding the type - // lock, we need an atomic store here. - FT_ATOMIC_STORE_ULONG_RELAXED(tp->tp_flags, flags); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp); + tp->tp_flags = flags; } static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { - ASSERT_TYPE_LOCK_HELD(); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -498,6 +600,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_Check(bases)); + ASSERT_NEW_TYPE_OR_LOCKED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -542,7 +645,7 @@ clear_tp_bases(PyTypeObject *self, int final) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { - ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_TYPE_OR_LOCKED(self); return self->tp_mro; } @@ -1027,7 +1130,6 @@ PyType_Unwatch(int watcher_id, PyObject* obj) static void set_version_unlocked(PyTypeObject *tp, unsigned int version) { - ASSERT_TYPE_LOCK_HELD(); assert(version == 0 || (tp->tp_versions_used != _Py_ATTR_CACHE_UNUSED)); #ifndef Py_GIL_DISABLED PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -1075,7 +1177,12 @@ type_modified_unlocked(PyTypeObject *type) We don't assign new version tags eagerly, but only as needed. */ - ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_TYPE_OR_LOCKED(type); +#ifdef Py_GIL_DISABLED + // This function is re-entrant and it's not safe to call it + // with the world stopped. + assert(!types_world_is_stopped()); +#endif if (type->tp_version_tag == 0) { return; } @@ -1106,6 +1213,8 @@ type_modified_unlocked(PyTypeObject *type) while (bits) { assert(i < TYPE_MAX_WATCHERS); if (bits & 1) { + // Note that PyErr_FormatUnraisable is potentially re-entrant + // and the watcher callback might be too. PyType_WatchCallback cb = interp->type_watchers[i]; if (cb && (cb(type) < 0)) { PyErr_FormatUnraisable( @@ -1245,14 +1354,6 @@ _PyType_LookupByVersion(unsigned int version) #endif } -unsigned int -_PyType_GetVersionForCurrentState(PyTypeObject *tp) -{ - return tp->tp_version_tag; -} - - - #define MAX_VERSIONS_PER_CLASS 1000 #if _Py_ATTR_CACHE_UNUSED < MAX_VERSIONS_PER_CLASS #error "_Py_ATTR_CACHE_UNUSED must be bigger than max" @@ -1586,10 +1687,13 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) BEGIN_TYPE_LOCK(); type_modified_unlocked(type); + types_stop_world(); if (abstract) type_add_flags(type, Py_TPFLAGS_IS_ABSTRACT); else type_clear_flags(type, Py_TPFLAGS_IS_ABSTRACT); + types_start_world(); + ASSERT_TYPE_LOCK_HELD(); END_TYPE_LOCK(); return 0; @@ -1624,15 +1728,15 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) return mro; } -static PyTypeObject *best_base(PyObject *); -static int mro_internal(PyTypeObject *, PyObject **); +static PyTypeObject *find_best_base(PyObject *); +static int mro_internal(PyTypeObject *, int, PyObject **); static int type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *); static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, const char *); static int add_subclass(PyTypeObject*, PyTypeObject*); static int add_all_subclasses(PyTypeObject *type, PyObject *bases); static void remove_subclass(PyTypeObject *, PyTypeObject *); static void remove_all_subclasses(PyTypeObject *type, PyObject *bases); -static void update_all_slots(PyTypeObject *); +static int update_all_slots(PyTypeObject *); typedef int (*update_callback)(PyTypeObject *, void *); static int update_subclasses(PyTypeObject *type, PyObject *attr_name, @@ -1640,13 +1744,15 @@ static int update_subclasses(PyTypeObject *type, PyObject *attr_name, static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, update_callback callback, void *data); +// Compute tp_mro for this type and all of its subclasses. This +// is called after __bases__ is assigned to an existing type. static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { ASSERT_TYPE_LOCK_HELD(); PyObject *old_mro; - int res = mro_internal(type, &old_mro); + int res = mro_internal(type, 0, &old_mro); if (res <= 0) { /* error / reentrance */ return res; @@ -1708,9 +1814,9 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) } static int -type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) +type_check_new_bases(PyTypeObject *type, PyObject *new_bases, PyTypeObject **best_base) { - // Check arguments + // Check arguments, this is re-entrant due to the PySys_Audit() call if (!check_set_special_type_attr(type, new_bases, "__bases__")) { return -1; } @@ -1759,20 +1865,29 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) } // Compute the new MRO and the new base class - PyTypeObject *new_base = best_base(new_bases); - if (new_base == NULL) + *best_base = find_best_base(new_bases); + if (*best_base == NULL) return -1; - if (!compatible_for_assignment(type->tp_base, new_base, "__bases__")) { + if (!compatible_for_assignment(type->tp_base, *best_base, "__bases__")) { return -1; } + return 0; +} + +static int +type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, PyTypeObject *best_base) +{ + ASSERT_TYPE_LOCK_HELD(); + + Py_ssize_t n; PyObject *old_bases = lookup_tp_bases(type); assert(old_bases != NULL); PyTypeObject *old_base = type->tp_base; set_tp_bases(type, Py_NewRef(new_bases), 0); - type->tp_base = (PyTypeObject *)Py_NewRef(new_base); + type->tp_base = (PyTypeObject *)Py_NewRef(best_base); PyObject *temp = PyList_New(0); if (temp == NULL) { @@ -1796,7 +1911,11 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) add to all new_bases */ remove_all_subclasses(type, old_bases); res = add_all_subclasses(type, new_bases); - update_all_slots(type); + if (update_all_slots(type) < 0) { + goto bail; + } + /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ + type_modified_unlocked(type); } else { res = 0; @@ -1827,13 +1946,13 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) bail: if (lookup_tp_bases(type) == new_bases) { - assert(type->tp_base == new_base); + assert(type->tp_base == best_base); set_tp_bases(type, old_bases, 0); type->tp_base = old_base; Py_DECREF(new_bases); - Py_DECREF(new_base); + Py_DECREF(best_base); } else { Py_DECREF(old_bases); @@ -1848,9 +1967,13 @@ static int type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); + PyTypeObject *best_base; int res; BEGIN_TYPE_LOCK(); - res = type_set_bases_unlocked(type, new_bases); + res = type_check_new_bases(type, new_bases, &best_base); + if (res == 0) { + res = type_set_bases_unlocked(type, new_bases, best_base); + } END_TYPE_LOCK(); return res; } @@ -3078,6 +3201,7 @@ static PyObject * class_name(PyObject *cls) { PyObject *name; + // Note that this is potentially re-entrant. if (PyObject_GetOptionalAttr(cls, &_Py_ID(__name__), &name) == 0) { name = PyObject_Repr(cls); } @@ -3414,9 +3538,13 @@ mro_invoke(PyTypeObject *type) const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { + // Custom mro() method on metaclass. This is potentially re-entrant. + // We are called either from type_ready() or from type_set_bases(). mro_result = call_method_noarg((PyObject *)type, &_Py_ID(mro)); } else { + // In this case, the mro() method on the type object is being used and + // we know that these calls are not re-entrant. mro_result = mro_implementation_unlocked(type); } if (mro_result == NULL) @@ -3464,7 +3592,7 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); @@ -3512,21 +3640,11 @@ mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) return 1; } -static int -mro_internal(PyTypeObject *type, PyObject **p_old_mro) -{ - int res; - BEGIN_TYPE_LOCK(); - res = mro_internal_unlocked(type, 0, p_old_mro); - END_TYPE_LOCK(); - return res; -} - /* Calculate the best base amongst multiple base classes. This is the first one that's on the path to the "solid base". */ static PyTypeObject * -best_base(PyObject *bases) +find_best_base(PyObject *bases) { Py_ssize_t i, n; PyTypeObject *base, *winner, *candidate; @@ -3608,13 +3726,167 @@ solid_base(PyTypeObject *type) } } +#ifdef Py_GIL_DISABLED + +// The structures and functions below are used in the free-threaded build +// to safely make updates to type slots, on type_setattro() for a slot +// or when __bases__ is re-assigned. Since the slots are read without atomic +// operations and without locking, we can only safely update them while the +// world is stopped. However, with the world stopped, we are very limited on +// which APIs can be safely used. For example, calling _PyObject_HashFast() +// or _PyDict_GetItemRef_KnownHash() are not safe and can potentially cause +// deadlocks. Hashing can be re-entrant and _PyDict_GetItemRef_KnownHash can +// acquire a lock if the dictionary is not owned by the current thread, to +// mark it shared on reading. +// +// We do the slot updates in two steps. First, with TYPE_LOCK held, we lookup +// the descriptor for each slot, for each subclass. We build a queue of +// updates to perform but don't actually update the type structures. After we +// are finished the lookups, we stop-the-world and apply all of the updates. +// The apply_slot_updates() code is simple and easy to confirm that it is +// safe. + +typedef struct { + PyTypeObject *type; + void **slot_ptr; + void *slot_value; +} slot_update_item_t; + +// The number of slot updates performed is based on the number of changed +// slots and the number of subclasses. It's possible there are many updates +// required if there are many subclasses (potentially an unbounded amount). +// Usually the number of slot updates is small, most often zero or one. When +// running the unit tests, we don't exceed 20. The chunk size is set to +// handle the common case with a single chunk and to not require too many +// chunk allocations if there are many subclasses. +#define SLOT_UPDATE_CHUNK_SIZE 30 + +typedef struct _slot_update { + struct _slot_update *prev; + Py_ssize_t n; + slot_update_item_t updates[SLOT_UPDATE_CHUNK_SIZE]; +} slot_update_chunk_t; + +// a queue of updates to be performed +typedef struct { + slot_update_chunk_t *head; +} slot_update_t; + +static slot_update_chunk_t * +slot_update_new_chunk(void) +{ + slot_update_chunk_t *chunk = PyMem_Malloc(sizeof(slot_update_chunk_t)); + if (chunk == NULL) { + PyErr_NoMemory(); + return NULL; + } + chunk->prev = NULL; + chunk->n = 0; + return chunk; +} + +static void +slot_update_free_chunks(slot_update_t *updates) +{ + slot_update_chunk_t *chunk = updates->head; + while (chunk != NULL) { + slot_update_chunk_t *prev = chunk->prev; + PyMem_Free(chunk); + chunk = prev; + } +} + +static int +queue_slot_update(slot_update_t *updates, PyTypeObject *type, + void **slot_ptr, void *slot_value) +{ + if (*slot_ptr == slot_value) { + return 0; // slot pointer not actually changed, don't queue update + } + if (updates->head == NULL || updates->head->n == SLOT_UPDATE_CHUNK_SIZE) { + slot_update_chunk_t *chunk = slot_update_new_chunk(); + if (chunk == NULL) { + return -1; // out-of-memory + } + chunk->prev = updates->head; + updates->head = chunk; + } + slot_update_item_t *item = &updates->head->updates[updates->head->n]; + item->type = type; + item->slot_ptr = slot_ptr; + item->slot_value = slot_value; + updates->head->n++; + assert(updates->head->n <= SLOT_UPDATE_CHUNK_SIZE); + return 0; +} + +static void +apply_slot_updates(slot_update_t *updates) +{ + assert(types_world_is_stopped()); + slot_update_chunk_t *chunk = updates->head; + while (chunk != NULL) { + for (Py_ssize_t i = 0; i < chunk->n; i++) { + slot_update_item_t *item = &chunk->updates[i]; + *(item->slot_ptr) = item->slot_value; + if (item->slot_value == slot_tp_call) { + /* A generic __call__ is incompatible with vectorcall */ + type_clear_flags(item->type, Py_TPFLAGS_HAVE_VECTORCALL); + } + } + chunk = chunk->prev; + } +} + +static void +apply_type_slot_updates(slot_update_t *updates) +{ + // This must be done carefully to avoid data races and deadlocks. We + // have just updated the type __dict__, while holding TYPE_LOCK. We have + // collected all of the required type slot updates into the 'updates' + // queue. Note that those updates can apply to multiple types since + // subclasses might also be affected by the dict change. + // + // We need to prevent other threads from writing to the dict before we can + // finish updating the slots. The actual stores to the slots are done + // with the world stopped. If we block on the stop-the-world mutex then + // we could release TYPE_LOCK mutex and potentially allow other threads + // to update the dict. That's because TYPE_LOCK was acquired using a + // critical section. + // + // The type_lock_prevent_release() call prevents the TYPE_LOCK mutex from + // being released even if we block on the STM mutex. We need to take care + // that we do not deadlock because of that. It is safe because we always + // acquire locks in the same order: first the TYPE_LOCK mutex and then the + // STM mutex. + type_lock_prevent_release(); + types_stop_world(); + apply_slot_updates(updates); + types_start_world(); + type_lock_allow_release(); +} + +#else + +// dummy definition, this parameter is only NULL in the default build +typedef void slot_update_t; + +#endif + +/// data passed to update_slots_callback() +typedef struct { + slot_update_t *queued_updates; + pytype_slotdef **defs; +} update_callback_data_t; + static void object_dealloc(PyObject *); static PyObject *object_new(PyTypeObject *, PyObject *, PyObject *); static int object_init(PyObject *, PyObject *, PyObject *); -static int update_slot(PyTypeObject *, PyObject *); +static int update_slot(PyTypeObject *, PyObject *, slot_update_t *update); static void fixup_slot_dispatchers(PyTypeObject *); static int type_new_set_names(PyTypeObject *); static int type_new_init_subclass(PyTypeObject *, PyObject *); +static bool has_slotdef(PyObject *); /* * Helpers for __dict__ descriptor. We don't want to expose the dicts @@ -3812,7 +4084,7 @@ type_init(PyObject *cls, PyObject *args, PyObject *kwds) unsigned long PyType_GetFlags(PyTypeObject *type) { - return FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags); + return type->tp_flags; } @@ -4590,6 +4862,10 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) && SIZEOF_VOID_P > 4 + // After this point, other threads can potentally use this type. + ((PyObject*)type)->ob_flags |= _Py_TYPE_REVEALED_FLAG; +#endif return (PyObject *)type; @@ -4652,7 +4928,7 @@ type_new_get_bases(type_new_ctx *ctx, PyObject **type) } /* Calculate best base, and check that all bases are type objects */ - PyTypeObject *base = best_base(ctx->bases); + PyTypeObject *base = find_best_base(ctx->bases); if (base == NULL) { return -1; } @@ -5067,12 +5343,12 @@ PyType_FromMetaclass( } /* Calculate best base, and check that all bases are type objects */ - PyTypeObject *base = best_base(bases); // borrowed ref + PyTypeObject *base = find_best_base(bases); // borrowed ref if (base == NULL) { goto finally; } - // best_base should check Py_TPFLAGS_BASETYPE & raise a proper exception, - // here we just check its work + // find_best_base() should check Py_TPFLAGS_BASETYPE & raise a proper + // exception, here we just check its work assert(_PyType_HasFeature(base, Py_TPFLAGS_BASETYPE)); /* Calculate sizes */ @@ -5303,6 +5579,10 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) && SIZEOF_VOID_P > 4 + // After this point, other threads can potentally use this type. + ((PyObject*)type)->ob_flags |= _Py_TYPE_REVEALED_FLAG; +#endif finally: if (PyErr_Occurred()) { @@ -5598,8 +5878,6 @@ PyObject_GetItemData(PyObject *obj) static PyObject * find_name_in_mro(PyTypeObject *type, PyObject *name, int *error) { - ASSERT_TYPE_LOCK_HELD(); - Py_hash_t hash = _PyObject_HashFast(name); if (hash == -1) { *error = -1; @@ -5908,9 +6186,13 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) { - BEGIN_TYPE_LOCK(); - type_set_flags_with_mask(self, mask, flags); - END_TYPE_LOCK(); + unsigned long new_flags = (self->tp_flags & ~mask) | flags; + if (new_flags != self->tp_flags) { + types_stop_world(); + // can't use new_flags here since they could be out-of-date + self->tp_flags = (self->tp_flags & ~mask) | flags; + types_start_world(); + } } int @@ -5957,9 +6239,9 @@ set_flags_recursive(PyTypeObject *self, unsigned long mask, unsigned long flags) void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long flags) { - BEGIN_TYPE_LOCK(); + types_stop_world(); set_flags_recursive(self, mask, flags); - END_TYPE_LOCK(); + types_start_world(); } /* This is similar to PyObject_GenericGetAttr(), @@ -6073,6 +6355,8 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } +// Called by type_setattro(). Updates both the type dict and +// the type versions. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) @@ -6102,10 +6386,30 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } - if (is_dunder_name(name)) { - return update_slot(type, name); - } + return 0; +} + +static int +update_slot_after_setattr(PyTypeObject *type, PyObject *name) +{ +#ifdef Py_GIL_DISABLED + // stack allocate one chunk since that's all we need + assert(SLOT_UPDATE_CHUNK_SIZE >= MAX_EQUIV); + slot_update_chunk_t chunk = {0}; + slot_update_t queued_updates = {&chunk}; + if (update_slot(type, name, &queued_updates) < 0) { + return -1; + } + if (queued_updates.head->n > 0) { + apply_type_slot_updates(&queued_updates); + ASSERT_TYPE_LOCK_HELD(); + // should never allocate another chunk + assert(chunk.prev == NULL); + } +#else + update_slot(type, name, NULL); +#endif return 0; } @@ -6163,7 +6467,9 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) PyObject *dict = type->tp_dict; if (dict == NULL) { - // We don't just do PyType_Ready because we could already be readying + // This is an unlikely case. PyType_Ready has not yet been done and + // we need to initialize tp_dict. We don't just do PyType_Ready + // because we could already be readying. BEGIN_TYPE_LOCK(); dict = type->tp_dict; if (dict == NULL) { @@ -6179,6 +6485,12 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) BEGIN_TYPE_DICT_LOCK(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); assert(_PyType_CheckConsistency(type)); + if (res == 0) { + if (is_dunder_name(name) && has_slotdef(name)) { + // The name corresponds to a type slot. + res = update_slot_after_setattr(type, name); + } + } END_TYPE_DICT_LOCK(); done: @@ -7108,15 +7420,10 @@ object_set_class(PyObject *self, PyObject *value, void *closure) return -1; } -#ifdef Py_GIL_DISABLED - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); -#endif + types_stop_world(); PyTypeObject *oldto = Py_TYPE(self); int res = object_set_class_world_stopped(self, newto); -#ifdef Py_GIL_DISABLED - _PyEval_StartTheWorld(interp); -#endif + types_start_world(); if (res == 0) { if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) { Py_DECREF(oldto); @@ -8524,7 +8831,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal_unlocked(type, initial, NULL) < 0) { + if (mro_internal(type, initial, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -11047,12 +11354,21 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) { /* XXX Maybe this could be optimized more -- but is it worth it? */ +#ifdef Py_GIL_DISABLED + pytype_slotdef *ptrs[MAX_EQUIV]; + pytype_slotdef **pp = ptrs; + /* Collect all slotdefs that match name into ptrs. */ + for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) { + if (p->name_strobj == name) + *pp++ = p; + } + *pp = NULL; +#else /* pname and ptrs act as a little cache */ PyInterpreterState *interp = _PyInterpreterState_GET(); #define pname _Py_INTERP_CACHED_OBJECT(interp, type_slots_pname) #define ptrs _Py_INTERP_CACHED_OBJECT(interp, type_slots_ptrs) pytype_slotdef *p, **pp; - void **res, **ptr; if (pname != name) { /* Collect all slotdefs that match name into ptrs. */ @@ -11064,10 +11380,12 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) } *pp = NULL; } +#endif /* Look in all slots of the type matching the name. If exactly one of these has a filled-in slot, return a pointer to that slot. Otherwise, return NULL. */ + void **res, **ptr; res = NULL; for (pp = ptrs; *pp; pp++) { ptr = slotptr(type, (*pp)->offset); @@ -11077,11 +11395,25 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) return NULL; res = ptr; } - return res; +#ifndef Py_GIL_DISABLED #undef pname #undef ptrs +#endif + return res; } +// Return true if "name" corresponds to at least one slot definition. This is +// a more accurate but more expensive test compared to is_dunder_name(). +static bool +has_slotdef(PyObject *name) +{ + for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) { + if (p->name_strobj == name) { + return true; + } + } + return false; +} /* Common code for update_slots_callback() and fixup_slot_dispatchers(). * @@ -11134,13 +11466,22 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) * There are some further special cases for specific slots, like supporting * __hash__ = None for tp_hash and special code for tp_new. * - * When done, return a pointer to the next slotdef with a different offset, - * because that's convenient for fixup_slot_dispatchers(). This function never - * sets an exception: if an internal error happens (unlikely), it's ignored. */ -static pytype_slotdef * -update_one_slot(PyTypeObject *type, pytype_slotdef *p) + * When done, next_p is set to the next slotdef with a different offset, + * because that's convenient for fixup_slot_dispatchers(). + * + * If the queued_updates pointer is provided, the actual updates to the slot + * pointers are queued, rather than being immediately performed. That argument + * is only used for the free-threaded build since those updates need to be + * done while the world is stopped. + * + * This function will only return an error if the queued_updates argument is + * provided and allocating memory for the queue fails. Other exceptions that + * occur internally are ignored, such as when looking up descriptors. */ +static int +update_one_slot(PyTypeObject *type, pytype_slotdef *p, pytype_slotdef **next_p, + slot_update_t *queued_updates) { - ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_TYPE_OR_LOCKED(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11163,7 +11504,10 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) do { ++p; } while (p->offset == offset); - return p; + if (next_p != NULL) { + *next_p = p; + } + return 0; } /* We may end up clearing live exceptions below, so make sure it's ours. */ assert(!PyErr_Occurred()); @@ -11246,16 +11590,41 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) } if (p->function == slot_tp_call) { /* A generic __call__ is incompatible with vectorcall */ - type_clear_flags(type, Py_TPFLAGS_HAVE_VECTORCALL); + if (queued_updates == NULL) { + type_clear_flags(type, Py_TPFLAGS_HAVE_VECTORCALL); + } } } Py_DECREF(descr); } while ((++p)->offset == offset); - if (specific && !use_generic) - *ptr = specific; - else - *ptr = generic; - return p; + + void *slot_value; + if (specific && !use_generic) { + slot_value = specific; + } else { + slot_value = generic; + } + +#ifdef Py_GIL_DISABLED + if (queued_updates != NULL) { + // queue the update to perform later, while world is stopped + if (queue_slot_update(queued_updates, type, ptr, slot_value) < 0) { + return -1; + } + } else { + // do the update to the type structure now + *ptr = slot_value; + } +#else + // always do the update immediately + assert(queued_updates == NULL); + *ptr = slot_value; +#endif + + if (next_p != NULL) { + *next_p = p; + } + return 0; } /* In the type, update the slots whose slotdefs are gathered in the pp array. @@ -11263,18 +11632,21 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) static int update_slots_callback(PyTypeObject *type, void *data) { - ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_TYPE_OR_LOCKED(type); - pytype_slotdef **pp = (pytype_slotdef **)data; + update_callback_data_t *update_data = (update_callback_data_t *)data; + pytype_slotdef **pp = update_data->defs; for (; *pp; pp++) { - update_one_slot(type, *pp); + if (update_one_slot(type, *pp, NULL, update_data->queued_updates) < 0) { + return -1; + } } return 0; } /* Update the slots after assignment to a class (type) attribute. */ static int -update_slot(PyTypeObject *type, PyObject *name) +update_slot(PyTypeObject *type, PyObject *name, slot_update_t *queued_updates) { pytype_slotdef *ptrs[MAX_EQUIV]; pytype_slotdef *p; @@ -11305,8 +11677,12 @@ update_slot(PyTypeObject *type, PyObject *name) } if (ptrs[0] == NULL) return 0; /* Not an attribute that affects any slots */ + + update_callback_data_t callback_data; + callback_data.defs = ptrs; + callback_data.queued_updates = queued_updates; return update_subclasses(type, name, - update_slots_callback, (void *)ptrs); + update_slots_callback, (void *)&callback_data); } /* Store the proper functions in the slot dispatches at class (type) @@ -11315,35 +11691,56 @@ update_slot(PyTypeObject *type, PyObject *name) static void fixup_slot_dispatchers(PyTypeObject *type) { - // This lock isn't strictly necessary because the type has not been - // exposed to anyone else yet, but update_ont_slot calls find_name_in_mro - // where we'd like to assert that the type is locked. - BEGIN_TYPE_LOCK(); - assert(!PyErr_Occurred()); for (pytype_slotdef *p = slotdefs; p->name; ) { - p = update_one_slot(type, p); + update_one_slot(type, p, &p, NULL); } - - END_TYPE_LOCK(); } -static void +#ifdef Py_GIL_DISABLED + +// Called when __bases__ is re-assigned. +static int update_all_slots(PyTypeObject* type) { - pytype_slotdef *p; - - ASSERT_TYPE_LOCK_HELD(); + // Note that update_slot() can fail due to out-of-memory when allocating + // the queue chunks to hold the updates. That's unlikely since the number + // of updates is normally small but we handle that case. update_slot() + // can fail internally for other reasons (a lookup fails) but those + // errors are suppressed. + slot_update_t queued_updates = {0}; + for (pytype_slotdef *p = slotdefs; p->name; p++) { + if (update_slot(type, p->name_strobj, &queued_updates) < 0) { + if (queued_updates.head) { + slot_update_free_chunks(&queued_updates); + } + return -1; + } + } + if (queued_updates.head != NULL) { + apply_type_slot_updates(&queued_updates); + ASSERT_TYPE_LOCK_HELD(); + slot_update_free_chunks(&queued_updates); + } + return 0; +} - /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ - type_modified_unlocked(type); +#else +// Called when __bases__ is re-assigned. +static int +update_all_slots(PyTypeObject* type) +{ + pytype_slotdef *p; for (p = slotdefs; p->name; p++) { - /* update_slot returns int but can't actually fail */ - update_slot(type, p->name_strobj); + /* update_slot returns int but can't actually fail in this case*/ + update_slot(type, p->name_strobj, NULL); } + return 0; } +#endif + PyObject * _PyType_GetSlotWrapperNames(void) @@ -11613,7 +12010,10 @@ PyType_Freeze(PyTypeObject *type) } BEGIN_TYPE_LOCK(); + types_stop_world(); type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); + types_start_world(); + ASSERT_TYPE_LOCK_HELD(); type_modified_unlocked(type); END_TYPE_LOCK(); diff --git a/Python/ceval.c b/Python/ceval.c index 309c4567f20e0e..4efa4b697fb59a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -139,6 +139,19 @@ #endif +static void +check_invalid_reentrancy(void) +{ +#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) + // In the free-threaded build, the interpreter must not be re-entered if + // the world-is-stopped. If so, that's a bug somewhere (quite likely in + // the painfully complex typeobject code). + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(!interp->stoptheworld.world_stopped); +#endif +} + + #ifdef Py_DEBUG static void dump_item(_PyStackRef item) @@ -996,6 +1009,7 @@ PyObject* _Py_HOT_FUNCTION DONT_SLP_VECTORIZE _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { _Py_EnsureTstateNotNULL(tstate); + check_invalid_reentrancy(); CALL_STAT_INC(pyeval_calls); #if USE_COMPUTED_GOTOS && !Py_TAIL_CALL_INTERP diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 3230f969436c82..93421b623b92f9 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -12,15 +12,12 @@ # These warnings trigger directly in a CPython function. -race_top:assign_version_tag -race_top:_Py_slot_tp_getattr_hook race_top:dump_traceback race_top:fatal_error race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize race_top:_PyObject_TryGetInstanceAttribute race_top:PyUnstable_InterpreterFrame_GetLine -race_top:type_modified_unlocked race_top:write_thread_id # gh-129068: race on shared range iterators (test_free_threading.test_zip.ZipThreading.test_threading) @@ -29,9 +26,6 @@ race_top:rangeiter_next # gh-129748: test.test_free_threading.test_slots.TestSlots.test_object race_top:mi_block_set_nextx -# gh-127266: type slot updates are not thread-safe (test_opcache.test_load_attr_method_lazy_dict) -race_top:update_one_slot - # https://gist.github.com/mpage/6962e8870606cfc960e159b407a0cb40 thread:pthread_create @@ -49,7 +43,6 @@ race:list_inplace_repeat_lock_held race:PyObject_Realloc # gh-133467. Some of these could be hard to trigger. -race_top:update_one_slot race_top:_Py_slot_tp_getattr_hook race_top:slot_tp_descr_get race_top:type_set_name From d7256ae4d781932b3b43b162e8425abdb134afa6 Mon Sep 17 00:00:00 2001 From: larryhastings Date: Wed, 28 May 2025 02:08:52 -0400 Subject: [PATCH 386/989] Fix typing.TYPE_CHECKING docs to reflect PEP 649. (#134813) typing.TYPE_CHECKING should no longer steer users towards manual or automatic stringization (and PEP 563); PEP 649 makes all that unnecessary. --- Doc/library/typing.rst | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index dd8ea3c364f49a..69df09c779592a 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -3530,28 +3530,32 @@ Constant .. data:: TYPE_CHECKING A special constant that is assumed to be ``True`` by 3rd party static - type checkers. It is ``False`` at runtime. + type checkers. It's ``False`` at runtime. + + A module which is expensive to import, and which only contain types + used for typing annotations, can be safely imported inside an + ``if TYPE_CHECKING:`` block. This prevents the module from actually + being imported at runtime; annotations aren't eagerly evaluated + (see :pep:`649`) so using undefined symbols in annotations is + harmless--as long as you don't later examine them. + Your static type analysis tool will set ``TYPE_CHECKING`` to + ``True`` during static type analysis, which means the module will + be imported and the types will be checked properly during such analysis. Usage:: if TYPE_CHECKING: import expensive_mod - def fun(arg: 'expensive_mod.SomeType') -> None: + def fun(arg: expensive_mod.SomeType) -> None: local_var: expensive_mod.AnotherType = other_fun() - The first type annotation must be enclosed in quotes, making it a - "forward reference", to hide the ``expensive_mod`` reference from the - interpreter runtime. Type annotations for local variables are not - evaluated, so the second annotation does not need to be enclosed in quotes. - - .. note:: - - If ``from __future__ import annotations`` is used, - annotations are not evaluated at function definition time. - Instead, they are stored as strings in ``__annotations__``. - This makes it unnecessary to use quotes around the annotation - (see :pep:`563`). + If you occasionally need to examine type annotations at runtime + which may contain undefined symbols, use + :meth:`annotationlib.get_annotations` with a ``format`` parameter + of :attr:`annotationlib.Format.STRING` or + :attr:`annotationlib.Format.FORWARDREF` to safely retrieve the + annotations without raising :exc:`NameError`. .. versionadded:: 3.5.2 From 7be5916f6dc3db95744b5fec945327d82cce0183 Mon Sep 17 00:00:00 2001 From: Allen Hernandez Date: Wed, 28 May 2025 02:44:38 -0400 Subject: [PATCH 387/989] gh-134817: Document [Timed]RotatingFileHandler shouldRollover method (GH-134818) Co-authored-by: Allen Hernandez <2349718+AllenSH12@users.noreply.github.com> --- Doc/library/logging.handlers.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index 63ef533e82c658..8f3aa1dfdd0cde 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -352,6 +352,10 @@ module, supports rotation of disk log files. Outputs the record to the file, catering for rollover as described previously. + .. method:: shouldRollover(record) + + See if the supplied record would cause the file to exceed the configured size limit. + .. _timed-rotating-file-handler: TimedRotatingFileHandler @@ -459,7 +463,11 @@ timed intervals. .. method:: getFilesToDelete() Returns a list of filenames which should be deleted as part of rollover. These - are the absolute paths of the oldest backup log files written by the handler. + + .. method:: shouldRollover(record) + + See if enough time has passed for a rollover to occur and if it has, compute + the next rollover time. .. _socket-handler: From 21672b694bd1786bad6eb71a9eff89291a91554b Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 28 May 2025 10:30:04 +0300 Subject: [PATCH 388/989] Update outdated statement from `math` about C standard (#134621) Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com> --- Doc/library/math.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 394a462b9468e5..11d3b756e21322 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -10,8 +10,8 @@ -------------- -This module provides access to the mathematical functions defined by the C -standard. +This module provides access to common mathematical functions and constants, +including those defined by the C standard. These functions cannot be used with complex numbers; use the functions of the same name from the :mod:`cmath` module if you require support for complex From 469a56470b1d8df03862d8ceb73aebca14772dcf Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 28 May 2025 09:53:04 +0200 Subject: [PATCH 389/989] gh-134160: Block multiple module initialization (#134773) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/extending/extending.rst | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst index 918c751b009761..fd63495674651b 100644 --- a/Doc/extending/extending.rst +++ b/Doc/extending/extending.rst @@ -204,17 +204,32 @@ value must be in a particular range or must satisfy other conditions, :c:data:`PyExc_ValueError` is appropriate. You can also define a new exception that is unique to your module. -For this, you can declare a static global object variable at the beginning -of the file:: +The simplest way to do this is to declare a static global object variable at +the beginning of the file:: - static PyObject *SpamError; + static PyObject *SpamError = NULL; -and initialize it with an exception object in the module's +and initialize it by calling :c:func:`PyErr_NewException` in the module's :c:data:`Py_mod_exec` function (:c:func:`!spam_module_exec`):: + SpamError = PyErr_NewException("spam.error", NULL, NULL); + +Since :c:data:`!SpamError` is a global variable, it will be overwitten every time +the module is reinitialized, when the :c:data:`Py_mod_exec` function is called. + +For now, let's avoid the issue: we will block repeated initialization by raising an +:py:exc:`ImportError`:: + + static PyObject *SpamError = NULL; + static int spam_module_exec(PyObject *m) { + if (SpamError != NULL) { + PyErr_SetString(PyExc_ImportError, + "cannot initialize spam module more than once"); + return -1; + } SpamError = PyErr_NewException("spam.error", NULL, NULL); if (PyModule_AddObjectRef(m, "SpamError", SpamError) < 0) { return -1; @@ -253,6 +268,11 @@ needed to ensure that it will not be discarded, causing :c:data:`!SpamError` to become a dangling pointer. Should it become a dangling pointer, C code which raises the exception could cause a core dump or other unintended side effects. +For now, the :c:func:`Py_DECREF` call to remove this reference is missing. +Even when the Python interpreter shuts down, the global :c:data:`!SpamError` +variable will not be garbage-collected. It will "leak". +We did, however, ensure that this will happen at most once per process. + We discuss the use of :c:macro:`PyMODINIT_FUNC` as a function return type later in this sample. From 0d499c7e938d58916648a0032f73aa38034b6a93 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 28 May 2025 15:24:40 +0200 Subject: [PATCH 390/989] gh-128629: Add _Py_PACK_VERSION for CPython's own definitions (GH-134247) Add _Py_PACK_VERSION for CPython's own definitions Py_PACK_VERSION was added to limited API in 3.14, so if Py_LIMITED_API is lower, the macro can't be used. Add a private version that can be used in CPython headers for checks like `Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 14)`. --- Include/pymacro.h | 5 +++-- Python/modsupport.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Include/pymacro.h b/Include/pymacro.h index 218987a80b0d91..d410645034d848 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -231,12 +231,13 @@ // "comparison of unsigned expression in '< 0' is always false". #define _Py_IS_TYPE_SIGNED(type) ((type)(-1) <= 0) -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030E0000 // 3.14 // Version helpers. These are primarily macros, but have exported equivalents. +#define _Py_PACK_VERSION(X, Y) _Py_PACK_FULL_VERSION(X, Y, 0, 0, 0) +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 14) PyAPI_FUNC(uint32_t) Py_PACK_FULL_VERSION(int x, int y, int z, int level, int serial); PyAPI_FUNC(uint32_t) Py_PACK_VERSION(int x, int y); #define Py_PACK_FULL_VERSION _Py_PACK_FULL_VERSION -#define Py_PACK_VERSION(X, Y) Py_PACK_FULL_VERSION(X, Y, 0, 0, 0) +#define Py_PACK_VERSION _Py_PACK_VERSION #endif // Py_LIMITED_API < 3.14 diff --git a/Python/modsupport.c b/Python/modsupport.c index 2caf595949d52f..437ad412027e28 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -669,5 +669,5 @@ Py_PACK_FULL_VERSION(int x, int y, int z, int level, int serial) uint32_t Py_PACK_VERSION(int x, int y) { - return Py_PACK_FULL_VERSION(x, y, 0, 0, 0); + return _Py_PACK_VERSION(x, y); } From 11f7a939debd7731d1cb79ed84a473fa87c279c8 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Wed, 28 May 2025 15:45:08 +0100 Subject: [PATCH 391/989] gh-132983: Split ``_zstd_set_c_parameters`` (#133921) --- Lib/compression/zstd/_zstdfile.py | 3 +- Lib/test/test_zstd.py | 149 +++++++++++++++++-------- Modules/_zstd/_zstdmodule.c | 25 ++--- Modules/_zstd/_zstdmodule.h | 3 +- Modules/_zstd/compressor.c | 173 +++++++++++++++++------------- Modules/_zstd/decompressor.c | 32 +++--- 6 files changed, 228 insertions(+), 157 deletions(-) diff --git a/Lib/compression/zstd/_zstdfile.py b/Lib/compression/zstd/_zstdfile.py index 8770e576f509f4..d709f5efc658fa 100644 --- a/Lib/compression/zstd/_zstdfile.py +++ b/Lib/compression/zstd/_zstdfile.py @@ -1,7 +1,6 @@ import io from os import PathLike -from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdError, - ZSTD_DStreamOutSize) +from _zstd import ZstdCompressor, ZstdDecompressor, ZSTD_DStreamOutSize from compression._common import _streams __all__ = ('ZstdFile', 'open') diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index bc809603cbc629..014634e450e449 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -64,6 +64,10 @@ SUPPORT_MULTITHREADING = False +C_INT_MIN = -(2**31) +C_INT_MAX = (2**31) - 1 + + def setUpModule(): global SUPPORT_MULTITHREADING SUPPORT_MULTITHREADING = CompressionParameter.nb_workers.bounds() != (0, 0) @@ -195,14 +199,21 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict=b"abcd1234") self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) - with self.assertRaises(ValueError): - ZstdCompressor(2**31) - with self.assertRaises(ValueError): - ZstdCompressor(options={2**31: 100}) + # valid range for compression level is [-(1<<17), 22] + msg = r'illegal compression level {}; the valid range is \[-?\d+, -?\d+\]' + with self.assertRaisesRegex(ValueError, msg.format(C_INT_MAX)): + ZstdCompressor(C_INT_MAX) + with self.assertRaisesRegex(ValueError, msg.format(C_INT_MIN)): + ZstdCompressor(C_INT_MIN) + msg = r'illegal compression level; the valid range is \[-?\d+, -?\d+\]' + with self.assertRaisesRegex(ValueError, msg): + ZstdCompressor(level=-(2**1000)) + with self.assertRaisesRegex(ValueError, msg): + ZstdCompressor(level=2**1000) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.window_log: 100}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={3333: 100}) # Method bad arguments @@ -253,18 +264,32 @@ def test_compress_parameters(self): } ZstdCompressor(options=d) - # larger than signed int, ValueError d1 = d.copy() - d1[CompressionParameter.ldm_bucket_size_log] = 2**31 - self.assertRaises(ValueError, ZstdCompressor, options=d1) + # larger than signed int + d1[CompressionParameter.ldm_bucket_size_log] = C_INT_MAX + with self.assertRaises(ValueError): + ZstdCompressor(options=d1) + # smaller than signed int + d1[CompressionParameter.ldm_bucket_size_log] = C_INT_MIN + with self.assertRaises(ValueError): + ZstdCompressor(options=d1) - # clamp compressionLevel + # out of bounds compression level level_min, level_max = CompressionParameter.compression_level.bounds() - compress(b'', level_max+1) - compress(b'', level_min-1) - - compress(b'', options={CompressionParameter.compression_level:level_max+1}) - compress(b'', options={CompressionParameter.compression_level:level_min-1}) + with self.assertRaises(ValueError): + compress(b'', level_max+1) + with self.assertRaises(ValueError): + compress(b'', level_min-1) + with self.assertRaises(ValueError): + compress(b'', 2**1000) + with self.assertRaises(ValueError): + compress(b'', -(2**1000)) + with self.assertRaises(ValueError): + compress(b'', options={ + CompressionParameter.compression_level: level_max+1}) + with self.assertRaises(ValueError): + compress(b'', options={ + CompressionParameter.compression_level: level_min-1}) # zstd lib doesn't support MT compression if not SUPPORT_MULTITHREADING: @@ -277,19 +302,19 @@ def test_compress_parameters(self): # out of bounds error msg option = {CompressionParameter.window_log:100} - with self.assertRaisesRegex(ZstdError, - (r'Error when setting zstd compression parameter "window_log", ' - r'it should \d+ <= value <= \d+, provided value is 100\. ' - r'\((?:32|64)-bit build\)')): + with self.assertRaisesRegex( + ValueError, + "compression parameter 'window_log' received an illegal value 100; " + r'the valid range is \[-?\d+, -?\d+\]', + ): compress(b'', options=option) def test_unknown_compression_parameter(self): KEY = 100001234 option = {CompressionParameter.compression_level: 10, KEY: 200000000} - pattern = (r'Invalid zstd compression parameter.*?' - fr'"unknown parameter \(key {KEY}\)"') - with self.assertRaisesRegex(ZstdError, pattern): + pattern = rf"invalid compression parameter 'unknown parameter \(key {KEY}\)'" + with self.assertRaisesRegex(ValueError, pattern): ZstdCompressor(options=option) @unittest.skipIf(not SUPPORT_MULTITHREADING, @@ -384,12 +409,22 @@ def test_simple_decompress_bad_args(self): self.assertRaises(TypeError, ZstdDecompressor, options=b'abc') with self.assertRaises(ValueError): - ZstdDecompressor(options={2**31 : 100}) + ZstdDecompressor(options={C_INT_MAX: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={C_INT_MIN: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={0: C_INT_MAX}) + with self.assertRaises(OverflowError): + ZstdDecompressor(options={2**1000: 100}) + with self.assertRaises(OverflowError): + ZstdDecompressor(options={-(2**1000): 100}) + with self.assertRaises(OverflowError): + ZstdDecompressor(options={0: -(2**1000)}) - with self.assertRaises(ZstdError): - ZstdDecompressor(options={DecompressionParameter.window_log_max:100}) - with self.assertRaises(ZstdError): - ZstdDecompressor(options={3333 : 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={DecompressionParameter.window_log_max: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={3333: 100}) empty = compress(b'') lzd = ZstdDecompressor() @@ -402,26 +437,52 @@ def test_decompress_parameters(self): d = {DecompressionParameter.window_log_max : 15} ZstdDecompressor(options=d) - # larger than signed int, ValueError d1 = d.copy() - d1[DecompressionParameter.window_log_max] = 2**31 - self.assertRaises(ValueError, ZstdDecompressor, None, d1) + # larger than signed int + d1[DecompressionParameter.window_log_max] = 2**1000 + with self.assertRaises(OverflowError): + ZstdDecompressor(None, d1) + # smaller than signed int + d1[DecompressionParameter.window_log_max] = -(2**1000) + with self.assertRaises(OverflowError): + ZstdDecompressor(None, d1) + + d1[DecompressionParameter.window_log_max] = C_INT_MAX + with self.assertRaises(ValueError): + ZstdDecompressor(None, d1) + d1[DecompressionParameter.window_log_max] = C_INT_MIN + with self.assertRaises(ValueError): + ZstdDecompressor(None, d1) # out of bounds error msg options = {DecompressionParameter.window_log_max:100} - with self.assertRaisesRegex(ZstdError, - (r'Error when setting zstd decompression parameter "window_log_max", ' - r'it should \d+ <= value <= \d+, provided value is 100\. ' - r'\((?:32|64)-bit build\)')): + with self.assertRaisesRegex( + ValueError, + "decompression parameter 'window_log_max' received an illegal value 100; " + r'the valid range is \[-?\d+, -?\d+\]', + ): + decompress(b'', options=options) + + # out of bounds deecompression parameter + options[DecompressionParameter.window_log_max] = C_INT_MAX + with self.assertRaises(ValueError): + decompress(b'', options=options) + options[DecompressionParameter.window_log_max] = C_INT_MIN + with self.assertRaises(ValueError): + decompress(b'', options=options) + options[DecompressionParameter.window_log_max] = 2**1000 + with self.assertRaises(OverflowError): + decompress(b'', options=options) + options[DecompressionParameter.window_log_max] = -(2**1000) + with self.assertRaises(OverflowError): decompress(b'', options=options) def test_unknown_decompression_parameter(self): KEY = 100001234 options = {DecompressionParameter.window_log_max: DecompressionParameter.window_log_max.bounds()[1], KEY: 200000000} - pattern = (r'Invalid zstd decompression parameter.*?' - fr'"unknown parameter \(key {KEY}\)"') - with self.assertRaisesRegex(ZstdError, pattern): + pattern = rf"invalid decompression parameter 'unknown parameter \(key {KEY}\)'" + with self.assertRaisesRegex(ValueError, pattern): ZstdDecompressor(options=options) def test_decompress_epilogue_flags(self): @@ -1424,11 +1485,11 @@ def test_init_bad_mode(self): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), "rw") with self.assertRaisesRegex(TypeError, - r"NOT be a CompressionParameter"): + r"not be a CompressionParameter"): ZstdFile(io.BytesIO(), 'rb', options={CompressionParameter.compression_level:5}) with self.assertRaisesRegex(TypeError, - r"NOT be a DecompressionParameter"): + r"not be a DecompressionParameter"): ZstdFile(io.BytesIO(), 'wb', options={DecompressionParameter.window_log_max:21}) @@ -1439,19 +1500,19 @@ def test_init_bad_check(self): with self.assertRaises(TypeError): ZstdFile(io.BytesIO(), "w", level='asd') # CHECK_UNKNOWN and anything above CHECK_ID_MAX should be invalid. - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(io.BytesIO(), "w", options={999:9999}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(io.BytesIO(), "w", options={CompressionParameter.window_log:99}) with self.assertRaises(TypeError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), "r", options=33) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), options={DecompressionParameter.window_log_max:2**31}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), options={444:333}) @@ -1467,7 +1528,7 @@ def test_init_close_fp(self): tmp_f.write(DAT_130K_C) filename = tmp_f.name - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): ZstdFile(filename, options={'a':'b'}) # for PyPy diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 56ad999e5cd4e4..5ad697d2b83dd6 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -103,16 +103,13 @@ static const ParameterInfo dp_list[] = { }; void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v) +set_parameter_error(int is_compress, int key_v, int value_v) { ParameterInfo const *list; int list_size; - char const *name; char *type; ZSTD_bounds bounds; - int i; - char pos_msg[128]; + char pos_msg[64]; if (is_compress) { list = cp_list; @@ -126,8 +123,8 @@ set_parameter_error(const _zstd_state* const state, int is_compress, } /* Find parameter's name */ - name = NULL; - for (i = 0; i < list_size; i++) { + char const *name = NULL; + for (int i = 0; i < list_size; i++) { if (key_v == (list+i)->parameter) { name = (list+i)->parameter_name; break; @@ -149,20 +146,16 @@ set_parameter_error(const _zstd_state* const state, int is_compress, bounds = ZSTD_dParam_getBounds(key_v); } if (ZSTD_isError(bounds.error)) { - PyErr_Format(state->ZstdError, - "Invalid zstd %s parameter \"%s\".", + PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'", type, name); return; } /* Error message */ - PyErr_Format(state->ZstdError, - "Error when setting zstd %s parameter \"%s\", it " - "should %d <= value <= %d, provided value is %d. " - "(%d-bit build)", - type, name, - bounds.lowerBound, bounds.upperBound, value_v, - 8*(int)sizeof(Py_ssize_t)); + PyErr_Format(PyExc_ValueError, + "%s parameter '%s' received an illegal value %d; " + "the valid range is [%d, %d]", + type, name, value_v, bounds.lowerBound, bounds.upperBound); } static inline _zstd_state* diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h index b36486442c6567..1f4160f474f0b0 100644 --- a/Modules/_zstd/_zstdmodule.h +++ b/Modules/_zstd/_zstdmodule.h @@ -49,7 +49,6 @@ set_zstd_error(const _zstd_state* const state, const error_type type, size_t zstd_ret); extern void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v); +set_parameter_error(int is_compress, int key_v, int value_v); #endif // !ZSTD_MODULE_H diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 7f0558909b4422..0fc3d7d36c68fe 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -49,98 +49,106 @@ typedef struct { #include "clinic/compressor.c.h" static int -_zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, - const char *arg_name, const char* arg_type) +_zstd_set_c_level(ZstdCompressor *self, int level) { - size_t zstd_ret; - _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { + /* Set integer compression level */ + int min_level = ZSTD_minCLevel(); + int max_level = ZSTD_maxCLevel(); + if (level < min_level || level > max_level) { + PyErr_Format(PyExc_ValueError, + "illegal compression level %d; the valid range is [%d, %d]", + level, min_level, max_level); return -1; } - /* Integer compression level */ - if (PyLong_Check(level_or_options)) { - int level = PyLong_AsInt(level_or_options); - if (level == -1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Compression level should be an int value between " - "%d and %d.", ZSTD_minCLevel(), ZSTD_maxCLevel()); + /* Save for generating ZSTD_CDICT */ + self->compression_level = level; + + /* Set compressionLevel to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter( + self->cctx, ZSTD_c_compressionLevel, level); + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { return -1; } + set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + return -1; + } + return 0; +} - /* Save for generating ZSTD_CDICT */ - self->compression_level = level; +static int +_zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } - /* Set compressionLevel to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, - ZSTD_c_compressionLevel, - level); + if (!PyDict_Check(options)) { + PyErr_Format(PyExc_TypeError, + "ZstdCompressor() argument 'options' must be dict, not %T", + options); + return -1; + } - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + Py_ssize_t pos = 0; + PyObject *key, *value; + while (PyDict_Next(options, &pos, &key, &value)) { + /* Check key type */ + if (Py_TYPE(key) == mod_state->DParameter_type) { + PyErr_SetString(PyExc_TypeError, + "compression options dictionary key must not be a " + "DecompressionParameter attribute"); return -1; } - return 0; - } - /* Options dict */ - if (PyDict_Check(level_or_options)) { - PyObject *key, *value; - Py_ssize_t pos = 0; + Py_INCREF(key); + Py_INCREF(value); + int key_v = PyLong_AsInt(key); + Py_DECREF(key); + if (key_v == -1 && PyErr_Occurred()) { + Py_DECREF(value); + return -1; + } - while (PyDict_Next(level_or_options, &pos, &key, &value)) { - /* Check key type */ - if (Py_TYPE(key) == mod_state->DParameter_type) { - PyErr_SetString(PyExc_TypeError, - "Key of compression options dict should " - "NOT be a DecompressionParameter attribute."); - return -1; - } + int value_v = PyLong_AsInt(value); + Py_DECREF(value); + if (value_v == -1 && PyErr_Occurred()) { + return -1; + } - int key_v = PyLong_AsInt(key); - if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be either a " - "CompressionParameter attribute or an int."); + if (key_v == ZSTD_c_compressionLevel) { + if (_zstd_set_c_level(self, value_v) < 0) { return -1; } - - int value_v = PyLong_AsInt(value); - if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of options dict should be an int."); - return -1; + continue; + } + if (key_v == ZSTD_c_nbWorkers) { + /* From the zstd library docs: + 1. When nbWorkers >= 1, triggers asynchronous mode when + used with ZSTD_compressStream2(). + 2, Default value is `0`, aka "single-threaded mode" : no + worker is spawned, compression is performed inside + caller's thread, all invocations are blocking. */ + if (value_v != 0) { + self->use_multithread = 1; } + } - if (key_v == ZSTD_c_compressionLevel) { - /* Save for generating ZSTD_CDICT */ - self->compression_level = value_v; - } - else if (key_v == ZSTD_c_nbWorkers) { - /* From the zstd library docs: - 1. When nbWorkers >= 1, triggers asynchronous mode when - used with ZSTD_compressStream2(). - 2, Default value is `0`, aka "single-threaded mode" : no - worker is spawned, compression is performed inside - caller's thread, all invocations are blocking. */ - if (value_v != 0) { - self->use_multithread = 1; - } - } + /* Set parameter to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - /* Set parameter to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 1, key_v, value_v); - return -1; - } + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_parameter_error(1, key_v, value_v); + return -1; } - return 0; } - PyErr_Format(PyExc_TypeError, - "Invalid type for %s. Expected %s", arg_name, arg_type); - return -1; + return 0; } static void @@ -354,20 +362,35 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, self->last_mode = ZSTD_e_end; if (level != Py_None && options != Py_None) { - PyErr_SetString(PyExc_RuntimeError, + PyErr_SetString(PyExc_TypeError, "Only one of level or options should be used."); goto error; } - /* Set compressLevel/options to compression context */ + /* Set compression level */ if (level != Py_None) { - if (_zstd_set_c_parameters(self, level, "level", "int") < 0) { + if (!PyLong_Check(level)) { + PyErr_SetString(PyExc_TypeError, + "invalid type for level, expected int"); + goto error; + } + int level_v = PyLong_AsInt(level); + if (level_v == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "illegal compression level; the valid range is [%d, %d]", + ZSTD_minCLevel(), ZSTD_maxCLevel()); + } + goto error; + } + if (_zstd_set_c_level(self, level_v) < 0) { goto error; } } + /* Set options dictionary */ if (options != Py_None) { - if (_zstd_set_c_parameters(self, options, "options", "dict") < 0) { + if (_zstd_set_c_parameters(self, options) < 0) { goto error; } } diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 015cb774ed2d76..26e568cf433308 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -86,56 +86,52 @@ _get_DDict(ZstdDict *self) return self->d_dict; } -/* Set decompression parameters to decompression context */ static int _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) { - size_t zstd_ret; - PyObject *key, *value; - Py_ssize_t pos; _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; } if (!PyDict_Check(options)) { - PyErr_SetString(PyExc_TypeError, - "options argument should be dict object."); + PyErr_Format(PyExc_TypeError, + "ZstdDecompressor() argument 'options' must be dict, not %T", + options); return -1; } - pos = 0; + Py_ssize_t pos = 0; + PyObject *key, *value; while (PyDict_Next(options, &pos, &key, &value)) { /* Check key type */ if (Py_TYPE(key) == mod_state->CParameter_type) { PyErr_SetString(PyExc_TypeError, - "Key of decompression options dict should " - "NOT be a CompressionParameter attribute."); + "compression options dictionary key must not be a " + "CompressionParameter attribute"); return -1; } - /* Both key & value should be 32-bit signed int */ + Py_INCREF(key); + Py_INCREF(value); int key_v = PyLong_AsInt(key); + Py_DECREF(key); if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be either a " - "DecompressionParameter attribute or an int."); return -1; } int value_v = PyLong_AsInt(value); + Py_DECREF(value); if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of options dict should be an int."); return -1; } /* Set parameter to compression context */ - zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); + size_t zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); /* Check error */ if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 0, key_v, value_v); + set_parameter_error(0, key_v, value_v); return -1; } } @@ -577,7 +573,7 @@ _zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, self->dict = zstd_dict; } - /* Set option to decompression context */ + /* Set options dictionary */ if (options != Py_None) { if (_zstd_set_d_parameters(self, options) < 0) { goto error; From 91618278e7cd1ffc222efa03bb5334bd5a13dc5b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 28 May 2025 17:19:50 +0200 Subject: [PATCH 392/989] gh-133711: Fix test_regrtest for PYTHONUTF8=1 (#134839) Use "backslashreplace" error handler to decode stdout and stderr. Example: vstinner@WIN C:\victor\python\main\build\test_python_worker_8360\x91> "C:\victor\python\main\PCbuild\amd64\python_d.exe" -m test --fast-ci --slow-ci --testdir C:\Users\vstinner\AppData\Local\Temp\tmp0t59e8da test_regrtest_noop1 test_regrtest_noop2 test_regrtest_noop3 test_regrtest_noop4 Notice the "\x91" byte at the end of the first line: it's the non-ASCII U+00E6 character encoded to the OEM cp437 code page. --- Lib/test/test_regrtest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 7e317d5ab943ff..8f4fc09442e083 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -874,7 +874,10 @@ def test_script_autotest(self): self.run_tests(args) def run_batch(self, *args): - proc = self.run_command(args) + proc = self.run_command(args, + # gh-133711: cmd.exe uses the OEM code page + # to display the non-ASCII current directory + errors="backslashreplace") self.check_output(proc.stdout) @unittest.skipUnless(sysconfig.is_python_build(), From d83576bf48d07d5e29d5d171c4e25afb048622aa Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 28 May 2025 18:24:24 +0300 Subject: [PATCH 393/989] gh-128840: Fix parsing long IPv6 addresses with embedded IPv4 address (#134836) --- Lib/ipaddress.py | 10 ++++++---- Lib/test/test_ipaddress.py | 19 +++++++++++++++++-- ...-05-28-15-53-27.gh-issue-128840.Nur2pB.rst | 1 + 3 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-28-15-53-27.gh-issue-128840.Nur2pB.rst diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py index 69e933a6541f78..8b60b9d5c9cd51 100644 --- a/Lib/ipaddress.py +++ b/Lib/ipaddress.py @@ -1660,10 +1660,12 @@ def _ip_int_from_string(cls, ip_str): """ if not ip_str: raise AddressValueError('Address cannot be empty') - if len(ip_str) > 39: - msg = ("At most 39 characters expected in " - f"{ip_str[:14]!r}({len(ip_str)-28} chars elided){ip_str[-14:]!r}") - raise AddressValueError(msg) + if len(ip_str) > 45: + shorten = ip_str + if len(shorten) > 100: + shorten = f'{ip_str[:45]}({len(ip_str)-90} chars elided){ip_str[-45:]}' + raise AddressValueError(f"At most 45 characters expected in " + f"{shorten!r}") # We want to allow more parts than the max to be 'split' # to preserve the correct error message when there are diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py index ee95454e64bb0d..db1c38243e2268 100644 --- a/Lib/test/test_ipaddress.py +++ b/Lib/test/test_ipaddress.py @@ -399,14 +399,16 @@ def assertBadSplit(addr): def test_bad_address_split_v6_too_long(self): def assertBadSplit(addr): - msg = r"At most 39 characters expected in %s" - with self.assertAddressError(msg, repr(re.escape(addr[:14]))): + msg = r"At most 45 characters expected in '%s" + with self.assertAddressError(msg, re.escape(addr[:45])): ipaddress.IPv6Address(addr) # Long IPv6 address long_addr = ("0:" * 10000) + "0" assertBadSplit(long_addr) assertBadSplit(long_addr + "%zoneid") + assertBadSplit(long_addr + ":255.255.255.255") + assertBadSplit(long_addr + ":ffff:255.255.255.255") def test_bad_address_split_v6_too_many_parts(self): def assertBadSplit(addr): @@ -2189,6 +2191,11 @@ def testIPv6AddressTooLarge(self): self.assertEqual(ipaddress.ip_address('FFFF::192.0.2.1'), ipaddress.ip_address('FFFF::c000:201')) + self.assertEqual(ipaddress.ip_address('0000:0000:0000:0000:0000:FFFF:192.168.255.255'), + ipaddress.ip_address('::ffff:c0a8:ffff')) + self.assertEqual(ipaddress.ip_address('FFFF:0000:0000:0000:0000:0000:192.168.255.255'), + ipaddress.ip_address('ffff::c0a8:ffff')) + self.assertEqual(ipaddress.ip_address('::FFFF:192.0.2.1%scope'), ipaddress.ip_address('::FFFF:c000:201%scope')) self.assertEqual(ipaddress.ip_address('FFFF::192.0.2.1%scope'), @@ -2201,6 +2208,10 @@ def testIPv6AddressTooLarge(self): ipaddress.ip_address('::FFFF:c000:201%scope')) self.assertNotEqual(ipaddress.ip_address('FFFF::192.0.2.1'), ipaddress.ip_address('FFFF::c000:201%scope')) + self.assertEqual(ipaddress.ip_address('0000:0000:0000:0000:0000:FFFF:192.168.255.255%scope'), + ipaddress.ip_address('::ffff:c0a8:ffff%scope')) + self.assertEqual(ipaddress.ip_address('FFFF:0000:0000:0000:0000:0000:192.168.255.255%scope'), + ipaddress.ip_address('ffff::c0a8:ffff%scope')) def testIPVersion(self): self.assertEqual(ipaddress.IPv4Address.version, 4) @@ -2610,6 +2621,10 @@ def testCompressIPv6Address(self): '::7:6:5:4:3:2:0': '0:7:6:5:4:3:2:0/128', '7:6:5:4:3:2:1::': '7:6:5:4:3:2:1:0/128', '0:6:5:4:3:2:1::': '0:6:5:4:3:2:1:0/128', + '0000:0000:0000:0000:0000:0000:255.255.255.255': '::ffff:ffff/128', + '0000:0000:0000:0000:0000:ffff:255.255.255.255': '::ffff:255.255.255.255/128', + 'ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255': + 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128', } for uncompressed, compressed in list(test_addresses.items()): self.assertEqual(compressed, str(ipaddress.IPv6Interface( diff --git a/Misc/NEWS.d/next/Library/2025-05-28-15-53-27.gh-issue-128840.Nur2pB.rst b/Misc/NEWS.d/next/Library/2025-05-28-15-53-27.gh-issue-128840.Nur2pB.rst new file mode 100644 index 00000000000000..faff433aa4b86e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-28-15-53-27.gh-issue-128840.Nur2pB.rst @@ -0,0 +1 @@ +Fix parsing long IPv6 addresses with embedded IPv4 address. From d9ec0ee733d6e0ae824581bae9db8f4d5314e2c7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 28 May 2025 17:41:11 +0200 Subject: [PATCH 394/989] gh-133711: Log Windows OEM code page in test.pythoninfo (#134840) Add _winapi.GetOEMCP() function. --- Lib/test/pythoninfo.py | 13 ++++++++++--- Modules/_winapi.c | 14 ++++++++++++++ Modules/clinic/_winapi.c.h | 20 +++++++++++++++++++- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py index e1830f2e6eba97..80a262c18a5ad2 100644 --- a/Lib/test/pythoninfo.py +++ b/Lib/test/pythoninfo.py @@ -920,10 +920,17 @@ def collect_windows(info_add): try: import _winapi - dll_path = _winapi.GetModuleFileName(sys.dllhandle) - info_add('windows.dll_path', dll_path) - except (ImportError, AttributeError): + except ImportError: pass + else: + try: + dll_path = _winapi.GetModuleFileName(sys.dllhandle) + info_add('windows.dll_path', dll_path) + except AttributeError: + pass + + call_func(info_add, 'windows.ansi_code_page', _winapi, 'GetACP') + call_func(info_add, 'windows.oem_code_page', _winapi, 'GetOEMCP') # windows.version_caption: "wmic os get Caption,Version /value" command import subprocess diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 044505fab6216c..b4cfbebcb1bb5e 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -2747,6 +2747,19 @@ _winapi_GetACP_impl(PyObject *module) return PyLong_FromUnsignedLong(GetACP()); } +/*[clinic input] +_winapi.GetOEMCP + +Get the current Windows ANSI code page identifier. +[clinic start generated code]*/ + +static PyObject * +_winapi_GetOEMCP_impl(PyObject *module) +/*[clinic end generated code: output=4def5b07a8be1b3b input=e8caf4353a28e28e]*/ +{ + return PyLong_FromUnsignedLong(GetOEMCP()); +} + /*[clinic input] _winapi.GetFileType -> DWORD @@ -3007,6 +3020,7 @@ static PyMethodDef winapi_functions[] = { _WINAPI_WAITFORSINGLEOBJECT_METHODDEF _WINAPI_WRITEFILE_METHODDEF _WINAPI_GETACP_METHODDEF + _WINAPI_GETOEMCP_METHODDEF _WINAPI_GETFILETYPE_METHODDEF _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF _WINAPI_NEEDCURRENTDIRECTORYFOREXEPATH_METHODDEF diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index b0fc1f1a89b760..bd685e75d9344f 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -1933,6 +1933,24 @@ _winapi_GetACP(PyObject *module, PyObject *Py_UNUSED(ignored)) return _winapi_GetACP_impl(module); } +PyDoc_STRVAR(_winapi_GetOEMCP__doc__, +"GetOEMCP($module, /)\n" +"--\n" +"\n" +"Get the current Windows ANSI code page identifier."); + +#define _WINAPI_GETOEMCP_METHODDEF \ + {"GetOEMCP", (PyCFunction)_winapi_GetOEMCP, METH_NOARGS, _winapi_GetOEMCP__doc__}, + +static PyObject * +_winapi_GetOEMCP_impl(PyObject *module); + +static PyObject * +_winapi_GetOEMCP(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return _winapi_GetOEMCP_impl(module); +} + PyDoc_STRVAR(_winapi_GetFileType__doc__, "GetFileType($module, /, handle)\n" "--\n" @@ -2169,4 +2187,4 @@ _winapi_CopyFile2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO #ifndef _WINAPI_GETSHORTPATHNAME_METHODDEF #define _WINAPI_GETSHORTPATHNAME_METHODDEF #endif /* !defined(_WINAPI_GETSHORTPATHNAME_METHODDEF) */ -/*[clinic end generated code: output=ede63eaaf63aa7e6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=4581fd481c3c6293 input=a9049054013a1b77]*/ From 4635115c3f1495fa20e553937df37861fffa7054 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 28 May 2025 17:43:52 +0200 Subject: [PATCH 395/989] gh-133711: Fix test_readline.test_nonascii() for UTF-8 Mode (#134841) Skip the test if the Python UTF-8 Mode is enabled and the LC_CTYPE encoding is not UTF-8. --- Lib/test/test_readline.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/test/test_readline.py b/Lib/test/test_readline.py index b9d082b3597f13..45192fe508270d 100644 --- a/Lib/test/test_readline.py +++ b/Lib/test/test_readline.py @@ -1,6 +1,7 @@ """ Very minimal unittests for parts of the readline module. """ +import codecs import locale import os import sys @@ -231,6 +232,13 @@ def test_nonascii(self): # writing and reading non-ASCII bytes into/from a TTY works, but # readline or ncurses ignores non-ASCII bytes on read. self.skipTest(f"the LC_CTYPE locale is {loc!r}") + if sys.flags.utf8_mode: + encoding = locale.getencoding() + encoding = codecs.lookup(encoding).name # normalize the name + if encoding != "utf-8": + # gh-133711: The Python UTF-8 Mode ignores the LC_CTYPE locale + # and always use the UTF-8 encoding. + self.skipTest(f"the LC_CTYPE encoding is {encoding!r}") try: readline.add_history("\xEB\xEF") From b265a7ddeb12b2040d80b471d447ce4c3ff4bb95 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Wed, 28 May 2025 17:30:53 +0100 Subject: [PATCH 396/989] GH-134848: Use a set to store ``AuditEvents.sources`` (#134849) --- Doc/tools/extensions/audit_events.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/tools/extensions/audit_events.py b/Doc/tools/extensions/audit_events.py index 23d82c0f4414bf..385a58b2145446 100644 --- a/Doc/tools/extensions/audit_events.py +++ b/Doc/tools/extensions/audit_events.py @@ -13,7 +13,7 @@ from sphinx.util.docutils import SphinxDirective if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Iterator, Set from sphinx.application import Sphinx from sphinx.builders import Builder @@ -33,7 +33,7 @@ class AuditEvents: def __init__(self) -> None: self.events: dict[str, list[str]] = {} - self.sources: dict[str, list[tuple[str, str]]] = {} + self.sources: dict[str, set[tuple[str, str]]] = {} def __iter__(self) -> Iterator[tuple[str, list[str], tuple[str, str]]]: for name, args in self.events.items(): @@ -47,7 +47,7 @@ def add_event( self._check_args_match(name, args) else: self.events[name] = args - self.sources.setdefault(name, []).append(source) + self.sources.setdefault(name, set()).add(source) def _check_args_match(self, name: str, args: list[str]) -> None: current_args = self.events[name] @@ -69,11 +69,11 @@ def _check_args_match(self, name: str, args: list[str]) -> None: return def id_for(self, name) -> str: - source_count = len(self.sources.get(name, ())) + source_count = len(self.sources.get(name, set())) name_clean = re.sub(r"\W", "_", name) return f"audit_event_{name_clean}_{source_count}" - def rows(self) -> Iterator[tuple[str, list[str], list[tuple[str, str]]]]: + def rows(self) -> Iterator[tuple[str, list[str], Set[tuple[str, str]]]]: for name in sorted(self.events.keys()): yield name, self.events[name], self.sources[name] @@ -218,7 +218,7 @@ def _make_row( docname: str, name: str, args: list[str], - sources: list[tuple[str, str]], + sources: Set[tuple[str, str]], ) -> nodes.row: row = nodes.row() name_node = nodes.paragraph("", nodes.Text(name)) @@ -233,7 +233,7 @@ def _make_row( row += nodes.entry("", args_node) backlinks_node = nodes.paragraph() - backlinks = enumerate(sorted(set(sources)), start=1) + backlinks = enumerate(sorted(sources), start=1) for i, (doc, label) in backlinks: if isinstance(label, str): ref = nodes.reference("", f"[{i}]", internal=True) @@ -258,7 +258,7 @@ def setup(app: Sphinx): app.connect("env-purge-doc", audit_events_purge) app.connect("env-merge-info", audit_events_merge) return { - "version": "1.0", + "version": "2.0", "parallel_read_safe": True, "parallel_write_safe": True, } From bac3fcba5b2d83aa294267a456ccc36d86151dd4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 28 May 2025 20:11:09 +0300 Subject: [PATCH 397/989] gh-108512: Add and use new replacements for PySys_GetObject() (GH-111035) Add functions PySys_GetAttr(), PySys_GetAttrString(), PySys_GetOptionalAttr() and PySys_GetOptionalAttrString(). --- Doc/c-api/init_config.rst | 2 +- Doc/c-api/sys.rst | 51 ++++++++++++- Doc/data/stable_abi.dat | 4 + Doc/whatsnew/3.15.rst | 5 +- Include/internal/pycore_sysmodule.h | 5 -- Include/sysmodule.h | 6 ++ Lib/test/test_capi/test_sys.py | 64 +++++++++++++++- Lib/test/test_stable_abi_ctypes.py | 4 + ...-10-18-14-36-35.gh-issue-108512.fMZLfr.rst | 2 + Misc/stable_abi.toml | 8 ++ Modules/_cursesmodule.c | 3 +- Modules/_lsprof.c | 6 +- Modules/_pickle.c | 2 +- Modules/_testlimitedcapi/sys.c | 73 +++++++++++++++++++ Modules/_threadmodule.c | 3 +- Modules/_tkinter.c | 5 +- Modules/faulthandler.c | 3 +- Modules/main.c | 13 ++-- Modules/syslogmodule.c | 3 +- Objects/moduleobject.c | 3 +- PC/python3dll.c | 4 + Python/_warnings.c | 3 +- Python/bltinmodule.c | 11 ++- Python/ceval.c | 2 +- Python/errors.c | 5 +- Python/import.c | 12 +-- Python/initconfig.c | 8 +- Python/intrinsics.c | 3 +- Python/pylifecycle.c | 8 +- Python/pythonrun.c | 18 ++--- Python/sysmodule.c | 36 ++++----- Python/traceback.c | 5 +- 32 files changed, 287 insertions(+), 93 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2023-10-18-14-36-35.gh-issue-108512.fMZLfr.rst diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index e1931655618b1c..4fd10224262488 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -2111,7 +2111,7 @@ initialization:: /* Specify sys.path explicitly */ /* If you want to modify the default set of paths, finish - initialization first and then use PySys_GetObject("path") */ + initialization first and then use PySys_GetAttrString("path") */ config.module_search_paths_set = 1; status = PyWideStringList_Append(&config.module_search_paths, L"/path/to/stdlib"); diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index b3c89800e386ff..b34936dd55e94c 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -258,10 +258,57 @@ These are utility functions that make functionality from the :mod:`sys` module accessible to C code. They all work with the current interpreter thread's :mod:`sys` module's dict, which is contained in the internal thread state structure. +.. c:function:: PyObject *PySys_GetAttr(PyObject *name) + + Get the attribute *name* of the :mod:`sys` module. + Return a :term:`strong reference`. + Raise :exc:`RuntimeError` and return ``NULL`` if it does not exist or + if the :mod:`sys` module cannot be found. + + If the non-existing object should not be treated as a failure, you can use + :c:func:`PySys_GetOptionalAttr` instead. + + .. versionadded:: next + +.. c:function:: PyObject *PySys_GetAttrString(const char *name) + + This is the same as :c:func:`PySys_GetAttr`, but *name* is + specified as a :c:expr:`const char*` UTF-8 encoded bytes string, + rather than a :c:expr:`PyObject*`. + + If the non-existing object should not be treated as a failure, you can use + :c:func:`PySys_GetOptionalAttrString` instead. + + .. versionadded:: next + +.. c:function:: int PySys_GetOptionalAttr(PyObject *name, PyObject **result) + + Variant of :c:func:`PySys_GetAttr` which doesn't raise + exception if the object does not exist. + + * Set *\*result* to a new :term:`strong reference` to the object and + return ``1`` if the object exists. + * Set *\*result* to ``NULL`` and return ``0`` without setting an exception + if the object does not exist. + * Set an exception, set *\*result* to ``NULL``, and return ``-1``, + if an error occurred. + + .. versionadded:: next + +.. c:function:: int PySys_GetOptionalAttrString(const char *name, PyObject **result) + + This is the same as :c:func:`PySys_GetOptionalAttr`, but *name* is + specified as a :c:expr:`const char*` UTF-8 encoded bytes string, + rather than a :c:expr:`PyObject*`. + + .. versionadded:: next + .. c:function:: PyObject *PySys_GetObject(const char *name) - Return the object *name* from the :mod:`sys` module or ``NULL`` if it does - not exist, without setting an exception. + Similar to :c:func:`PySys_GetAttrString`, but return a :term:`borrowed + reference` and return ``NULL`` *without* setting exception on failure. + + Preserves exception that was set before the call. .. c:function:: int PySys_SetObject(const char *name, PyObject *v) diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index e71a40e55e918c..0d0dfb3843260e 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -628,7 +628,11 @@ func,PySys_Audit,3.13,, func,PySys_AuditTuple,3.13,, func,PySys_FormatStderr,3.2,, func,PySys_FormatStdout,3.2,, +func,PySys_GetAttr,3.15,, +func,PySys_GetAttrString,3.15,, func,PySys_GetObject,3.2,, +func,PySys_GetOptionalAttr,3.15,, +func,PySys_GetOptionalAttrString,3.15,, func,PySys_GetXOptions,3.7,, func,PySys_ResetWarnOptions,3.2,, func,PySys_SetArgv,3.2,, diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 4d1a27354fc620..87cca4eeff385a 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -213,7 +213,10 @@ C API changes New features ------------ -* TODO +* Add :c:func:`PySys_GetAttr`, :c:func:`PySys_GetAttrString`, + :c:func:`PySys_GetOptionalAttr`, and :c:func:`PySys_GetOptionalAttrString` + functions as replacements for :c:func:`PySys_GetObject`. + (Contributed by Serhiy Storchaka in :gh:`108512`.) Porting to Python 3.15 ---------------------- diff --git a/Include/internal/pycore_sysmodule.h b/Include/internal/pycore_sysmodule.h index 008a2da0d04fa7..347b0a7a790c06 100644 --- a/Include/internal/pycore_sysmodule.h +++ b/Include/internal/pycore_sysmodule.h @@ -8,11 +8,6 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -PyAPI_FUNC(int) _PySys_GetOptionalAttr(PyObject *, PyObject **); -PyAPI_FUNC(int) _PySys_GetOptionalAttrString(const char *, PyObject **); -PyAPI_FUNC(PyObject *) _PySys_GetRequiredAttr(PyObject *); -PyAPI_FUNC(PyObject *) _PySys_GetRequiredAttrString(const char *); - // Export for '_pickle' shared extension PyAPI_FUNC(size_t) _PySys_GetSizeOf(PyObject *); diff --git a/Include/sysmodule.h b/Include/sysmodule.h index c1d5f610fe08a5..2f362791797ded 100644 --- a/Include/sysmodule.h +++ b/Include/sysmodule.h @@ -4,6 +4,12 @@ extern "C" { #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030f0000 +PyAPI_FUNC(PyObject *) PySys_GetAttr(PyObject *); +PyAPI_FUNC(PyObject *) PySys_GetAttrString(const char *); +PyAPI_FUNC(int) PySys_GetOptionalAttr(PyObject *, PyObject **); +PyAPI_FUNC(int) PySys_GetOptionalAttrString(const char *, PyObject **); +#endif PyAPI_FUNC(PyObject *) PySys_GetObject(const char *); PyAPI_FUNC(int) PySys_SetObject(const char *, PyObject *); diff --git a/Lib/test/test_capi/test_sys.py b/Lib/test/test_capi/test_sys.py index d3a9b378e7769a..3793ce2461effd 100644 --- a/Lib/test/test_capi/test_sys.py +++ b/Lib/test/test_capi/test_sys.py @@ -19,6 +19,68 @@ class CAPITest(unittest.TestCase): maxDiff = None + @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') + def test_sys_getattr(self): + # Test PySys_GetAttr() + sys_getattr = _testlimitedcapi.sys_getattr + + self.assertIs(sys_getattr('stdout'), sys.stdout) + with support.swap_attr(sys, '\U0001f40d', 42): + self.assertEqual(sys_getattr('\U0001f40d'), 42) + + with self.assertRaisesRegex(RuntimeError, r'lost sys\.nonexistent'): + sys_getattr('nonexistent') + with self.assertRaisesRegex(RuntimeError, r'lost sys\.\U0001f40d'): + sys_getattr('\U0001f40d') + self.assertRaises(TypeError, sys_getattr, 1) + self.assertRaises(TypeError, sys_getattr, []) + # CRASHES sys_getattr(NULL) + + @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') + def test_sys_getattrstring(self): + # Test PySys_GetAttrString() + getattrstring = _testlimitedcapi.sys_getattrstring + + self.assertIs(getattrstring(b'stdout'), sys.stdout) + with support.swap_attr(sys, '\U0001f40d', 42): + self.assertEqual(getattrstring('\U0001f40d'.encode()), 42) + + with self.assertRaisesRegex(RuntimeError, r'lost sys\.nonexistent'): + getattrstring(b'nonexistent') + with self.assertRaisesRegex(RuntimeError, r'lost sys\.\U0001f40d'): + getattrstring('\U0001f40d'.encode()) + self.assertRaises(UnicodeDecodeError, getattrstring, b'\xff') + # CRASHES getattrstring(NULL) + + @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') + def test_sys_getoptionalattr(self): + # Test PySys_GetOptionalAttr() + getoptionalattr = _testlimitedcapi.sys_getoptionalattr + + self.assertIs(getoptionalattr('stdout'), sys.stdout) + with support.swap_attr(sys, '\U0001f40d', 42): + self.assertEqual(getoptionalattr('\U0001f40d'), 42) + + self.assertIs(getoptionalattr('nonexistent'), AttributeError) + self.assertIs(getoptionalattr('\U0001f40d'), AttributeError) + self.assertRaises(TypeError, getoptionalattr, 1) + self.assertRaises(TypeError, getoptionalattr, []) + # CRASHES getoptionalattr(NULL) + + @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') + def test_sys_getoptionalattrstring(self): + # Test PySys_GetOptionalAttrString() + getoptionalattrstring = _testlimitedcapi.sys_getoptionalattrstring + + self.assertIs(getoptionalattrstring(b'stdout'), sys.stdout) + with support.swap_attr(sys, '\U0001f40d', 42): + self.assertEqual(getoptionalattrstring('\U0001f40d'.encode()), 42) + + self.assertIs(getoptionalattrstring(b'nonexistent'), AttributeError) + self.assertIs(getoptionalattrstring('\U0001f40d'.encode()), AttributeError) + self.assertRaises(UnicodeDecodeError, getoptionalattrstring, b'\xff') + # CRASHES getoptionalattrstring(NULL) + @support.cpython_only @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') def test_sys_getobject(self): @@ -29,7 +91,7 @@ def test_sys_getobject(self): with support.swap_attr(sys, '\U0001f40d', 42): self.assertEqual(getobject('\U0001f40d'.encode()), 42) - self.assertIs(getobject(b'nonexisting'), AttributeError) + self.assertIs(getobject(b'nonexistent'), AttributeError) with support.catch_unraisable_exception() as cm: self.assertIs(getobject(b'\xff'), AttributeError) self.assertEqual(cm.unraisable.exc_type, UnicodeDecodeError) diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 1e6f69d49e9335..5a6ba9de337904 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -658,7 +658,11 @@ def test_windows_feature_macros(self): "PySys_AuditTuple", "PySys_FormatStderr", "PySys_FormatStdout", + "PySys_GetAttr", + "PySys_GetAttrString", "PySys_GetObject", + "PySys_GetOptionalAttr", + "PySys_GetOptionalAttrString", "PySys_GetXOptions", "PySys_HasWarnOptions", "PySys_ResetWarnOptions", diff --git a/Misc/NEWS.d/next/C_API/2023-10-18-14-36-35.gh-issue-108512.fMZLfr.rst b/Misc/NEWS.d/next/C_API/2023-10-18-14-36-35.gh-issue-108512.fMZLfr.rst new file mode 100644 index 00000000000000..279e588f3adcb7 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2023-10-18-14-36-35.gh-issue-108512.fMZLfr.rst @@ -0,0 +1,2 @@ +Add functions :c:func:`PySys_GetAttr`, :c:func:`PySys_GetAttrString`, +:c:func:`PySys_GetOptionalAttr` and :c:func:`PySys_GetOptionalAttrString`. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index d38919a8ea9b62..1f323cc03973e5 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2575,3 +2575,11 @@ added = '3.14' [function.Py_PACK_VERSION] added = '3.14' +[function.PySys_GetAttr] + added = '3.15' +[function.PySys_GetAttrString] + added = '3.15' +[function.PySys_GetOptionalAttr] + added = '3.15' +[function.PySys_GetOptionalAttrString] + added = '3.15' diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index 290ae4e55cd7a7..d7788ef7a585b0 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -108,7 +108,6 @@ static const char PyCursesVersion[] = "2.2"; #include "pycore_capsule.h" // _PyCapsule_SetTraverse() #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_structseq.h" // _PyStructSequence_NewType() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttrString() #include "pycore_fileutils.h" // _Py_set_inheritable #ifdef __hpux @@ -3847,7 +3846,7 @@ _curses_setupterm_impl(PyObject *module, const char *term, int fd) if (fd == -1) { PyObject* sys_stdout; - if (_PySys_GetOptionalAttrString("stdout", &sys_stdout) < 0) { + if (PySys_GetOptionalAttrString("stdout", &sys_stdout) < 0) { return NULL; } diff --git a/Modules/_lsprof.c b/Modules/_lsprof.c index 626c176715bdac..bbad5eb69032da 100644 --- a/Modules/_lsprof.c +++ b/Modules/_lsprof.c @@ -782,7 +782,7 @@ _lsprof_Profiler_enable_impl(ProfilerObject *self, int subcalls, return NULL; } - PyObject* monitoring = PyImport_ImportModuleAttrString("sys", "monitoring"); + PyObject* monitoring = PySys_GetAttrString("monitoring"); if (!monitoring) { return NULL; } @@ -864,7 +864,7 @@ _lsprof_Profiler_disable_impl(ProfilerObject *self) } if (self->flags & POF_ENABLED) { PyObject* result = NULL; - PyObject* monitoring = PyImport_ImportModuleAttrString("sys", "monitoring"); + PyObject* monitoring = PySys_GetAttrString("monitoring"); if (!monitoring) { return NULL; @@ -983,7 +983,7 @@ profiler_init_impl(ProfilerObject *self, PyObject *timer, double timeunit, Py_XSETREF(self->externalTimer, Py_XNewRef(timer)); self->tool_id = PY_MONITORING_PROFILER_ID; - PyObject* monitoring = PyImport_ImportModuleAttrString("sys", "monitoring"); + PyObject* monitoring = PySys_GetAttrString("monitoring"); if (!monitoring) { return -1; } diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 29ef0cb0c2e088..86d8b38620cb7f 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1915,7 +1915,7 @@ whichmodule(PickleState *st, PyObject *global, PyObject *global_name, PyObject * __module__ can be None. If it is so, then search sys.modules for the module of global. */ Py_CLEAR(module_name); - modules = _PySys_GetRequiredAttr(&_Py_ID(modules)); + modules = PySys_GetAttr(&_Py_ID(modules)); if (modules == NULL) { return NULL; } diff --git a/Modules/_testlimitedcapi/sys.c b/Modules/_testlimitedcapi/sys.c index 7d8b7a8569e515..cec7f8ab612019 100644 --- a/Modules/_testlimitedcapi/sys.c +++ b/Modules/_testlimitedcapi/sys.c @@ -1,7 +1,76 @@ +#include "pyconfig.h" // Py_GIL_DISABLED +// Need limited C API version 3.15 for PySys_GetAttr() etc +#if !defined(Py_GIL_DISABLED) && !defined(Py_LIMITED_API) +# define Py_LIMITED_API 0x030f0000 +#endif #include "parts.h" #include "util.h" +static PyObject * +sys_getattr(PyObject *Py_UNUSED(module), PyObject *name) +{ + NULLABLE(name); + return PySys_GetAttr(name); +} + +static PyObject * +sys_getattrstring(PyObject *Py_UNUSED(module), PyObject *arg) +{ + const char *name; + Py_ssize_t size; + if (!PyArg_Parse(arg, "z#", &name, &size)) { + return NULL; + } + return PySys_GetAttrString(name); +} + +static PyObject * +sys_getoptionalattr(PyObject *Py_UNUSED(module), PyObject *name) +{ + PyObject *value = UNINITIALIZED_PTR; + NULLABLE(name); + + switch (PySys_GetOptionalAttr(name, &value)) { + case -1: + assert(value == NULL); + assert(PyErr_Occurred()); + return NULL; + case 0: + assert(value == NULL); + return Py_NewRef(PyExc_AttributeError); + case 1: + return value; + default: + Py_FatalError("PySys_GetOptionalAttr() returned invalid code"); + } +} + +static PyObject * +sys_getoptionalattrstring(PyObject *Py_UNUSED(module), PyObject *arg) +{ + PyObject *value = UNINITIALIZED_PTR; + const char *name; + Py_ssize_t size; + if (!PyArg_Parse(arg, "z#", &name, &size)) { + return NULL; + } + + switch (PySys_GetOptionalAttrString(name, &value)) { + case -1: + assert(value == NULL); + assert(PyErr_Occurred()); + return NULL; + case 0: + assert(value == NULL); + return Py_NewRef(PyExc_AttributeError); + case 1: + return value; + default: + Py_FatalError("PySys_GetOptionalAttrString() returned invalid code"); + } +} + static PyObject * sys_getobject(PyObject *Py_UNUSED(module), PyObject *arg) { @@ -39,6 +108,10 @@ sys_getxoptions(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(ignored)) static PyMethodDef test_methods[] = { + {"sys_getattr", sys_getattr, METH_O}, + {"sys_getattrstring", sys_getattrstring, METH_O}, + {"sys_getoptionalattr", sys_getoptionalattr, METH_O}, + {"sys_getoptionalattrstring", sys_getoptionalattrstring, METH_O}, {"sys_getobject", sys_getobject, METH_O}, {"sys_setobject", sys_setobject, METH_VARARGS}, {"sys_getxoptions", sys_getxoptions, METH_NOARGS}, diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index cc83be4b5ff311..150a266b521736 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -10,7 +10,6 @@ #include "pycore_object_deferred.h" // _PyObject_SetDeferredRefcount() #include "pycore_pylifecycle.h" #include "pycore_pystate.h" // _PyThreadState_SetCurrent() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttr() #include "pycore_time.h" // _PyTime_FromSeconds() #include "pycore_weakref.h" // _PyWeakref_GET_REF() @@ -2290,7 +2289,7 @@ thread_excepthook(PyObject *module, PyObject *args) PyObject *thread = PyStructSequence_GET_ITEM(args, 3); PyObject *file; - if (_PySys_GetOptionalAttr( &_Py_ID(stderr), &file) < 0) { + if (PySys_GetOptionalAttr( &_Py_ID(stderr), &file) < 0) { return NULL; } if (file == NULL || file == Py_None) { diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 77695401919cb7..875840bd6a6364 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -31,7 +31,6 @@ Copyright (C) 1994 Steen Lumholt. #endif #include "pycore_long.h" // _PyLong_IsNegative() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttrString() #include "pycore_unicodeobject.h" // _PyUnicode_AsUTF8String #ifdef MS_WINDOWS @@ -146,7 +145,7 @@ _get_tcl_lib_path(void) int stat_return_value; PyObject *prefix; - (void) _PySys_GetOptionalAttrString("base_prefix", &prefix); + (void) PySys_GetOptionalAttrString("base_prefix", &prefix); if (prefix == NULL) { return NULL; } @@ -3547,7 +3546,7 @@ PyInit__tkinter(void) /* This helps the dynamic loader; in Unicode aware Tcl versions it also helps Tcl find its encodings. */ - (void) _PySys_GetOptionalAttrString("executable", &uexe); + (void) PySys_GetOptionalAttrString("executable", &uexe); if (uexe && PyUnicode_Check(uexe)) { // sys.executable can be None cexe = PyUnicode_EncodeFSDefault(uexe); Py_DECREF(uexe); diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c index c94f4f66366170..73bea8172c7253 100644 --- a/Modules/faulthandler.c +++ b/Modules/faulthandler.c @@ -6,7 +6,6 @@ #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_runtime.h" // _Py_ID() #include "pycore_signal.h" // Py_NSIG -#include "pycore_sysmodule.h" // _PySys_GetRequiredAttr() #include "pycore_time.h" // _PyTime_FromSecondsObject() #include "pycore_traceback.h" // _Py_DumpTracebackThreads #ifdef HAVE_UNISTD_H @@ -98,7 +97,7 @@ faulthandler_get_fileno(PyObject **file_ptr) PyObject *file = *file_ptr; if (file == NULL || file == Py_None) { - file = _PySys_GetRequiredAttr(&_Py_ID(stderr)); + file = PySys_GetAttr(&_Py_ID(stderr)); if (file == NULL) { return -1; } diff --git a/Modules/main.c b/Modules/main.c index 2d7ed25f5f9790..74e48c94732565 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -497,16 +497,13 @@ pymain_run_startup(PyConfig *config, int *exitcode) static int pymain_run_interactive_hook(int *exitcode) { - PyObject *hook = PyImport_ImportModuleAttrString("sys", - "__interactivehook__"); - if (hook == NULL) { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - // no sys.__interactivehook__ attribute - PyErr_Clear(); - return 0; - } + PyObject *hook; + if (PySys_GetOptionalAttrString("__interactivehook__", &hook) < 0) { goto error; } + if (hook == NULL) { + return 0; + } if (PySys_Audit("cpython.run_interactivehook", "O", hook) < 0) { goto error; diff --git a/Modules/syslogmodule.c b/Modules/syslogmodule.c index 9c54af51402a27..ab20fff1509dfe 100644 --- a/Modules/syslogmodule.c +++ b/Modules/syslogmodule.c @@ -56,7 +56,6 @@ Revision history: #include "Python.h" #include "osdefs.h" // SEP -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttrString() #include @@ -92,7 +91,7 @@ syslog_get_argv(void) Py_ssize_t slash; PyObject *argv; - if (_PySys_GetOptionalAttrString("argv", &argv) <= 0) { + if (PySys_GetOptionalAttrString("argv", &argv) <= 0) { return NULL; } diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index f363ef173cbd46..ba86b41e945e9d 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -12,7 +12,6 @@ #include "pycore_object.h" // _PyType_AllocNoTrack #include "pycore_pyerrors.h" // _PyErr_FormatFromCause() #include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttrString() #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() #include "osdefs.h" // MAXPATHLEN @@ -1058,7 +1057,7 @@ _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) int is_possibly_shadowing_stdlib = 0; if (is_possibly_shadowing) { PyObject *stdlib_modules; - if (_PySys_GetOptionalAttrString("stdlib_module_names", &stdlib_modules) < 0) { + if (PySys_GetOptionalAttrString("stdlib_module_names", &stdlib_modules) < 0) { goto done; } if (stdlib_modules && PyAnySet_Check(stdlib_modules)) { diff --git a/PC/python3dll.c b/PC/python3dll.c index f0c578e11c643b..8ec791f8280f13 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -595,7 +595,11 @@ EXPORT_FUNC(PySys_Audit) EXPORT_FUNC(PySys_AuditTuple) EXPORT_FUNC(PySys_FormatStderr) EXPORT_FUNC(PySys_FormatStdout) +EXPORT_FUNC(PySys_GetAttr) +EXPORT_FUNC(PySys_GetAttrString) EXPORT_FUNC(PySys_GetObject) +EXPORT_FUNC(PySys_GetOptionalAttr) +EXPORT_FUNC(PySys_GetOptionalAttrString) EXPORT_FUNC(PySys_GetXOptions) EXPORT_FUNC(PySys_HasWarnOptions) EXPORT_FUNC(PySys_ResetWarnOptions) diff --git a/Python/_warnings.c b/Python/_warnings.c index 39bf1b225ccb0c..12e6172b0cf828 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -6,7 +6,6 @@ #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_pylifecycle.h" // _Py_IsInterpreterFinalizing() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttr() #include "pycore_traceback.h" // _Py_DisplaySourceLine() #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() @@ -678,7 +677,7 @@ show_warning(PyThreadState *tstate, PyObject *filename, int lineno, goto error; } - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &f_stderr) <= 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &f_stderr) <= 0) { fprintf(stderr, "lost sys.stderr\n"); goto error; } diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 3d0295ee3883f2..e08c63924ca16d 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -14,7 +14,6 @@ #include "pycore_pyerrors.h" // _PyErr_NoMemory() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_pythonrun.h" // _Py_SourceAsString() -#include "pycore_sysmodule.h" // _PySys_GetRequiredAttr() #include "pycore_tuple.h" // _PyTuple_FromArray() #include "pycore_cell.h" // PyCell_GetRef() @@ -465,7 +464,7 @@ builtin_callable(PyObject *module, PyObject *obj) static PyObject * builtin_breakpoint(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *keywords) { - PyObject *hook = _PySys_GetRequiredAttrString("breakpointhook"); + PyObject *hook = PySys_GetAttrString("breakpointhook"); if (hook == NULL) { return NULL; } @@ -2164,7 +2163,7 @@ builtin_print_impl(PyObject *module, PyObject * const *args, int i, err; if (file == Py_None) { - file = _PySys_GetRequiredAttr(&_Py_ID(stdout)); + file = PySys_GetAttr(&_Py_ID(stdout)); if (file == NULL) { return NULL; } @@ -2270,7 +2269,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt) int tty; /* Check that stdin/out/err are intact */ - fin = _PySys_GetRequiredAttr(&_Py_ID(stdin)); + fin = PySys_GetAttr(&_Py_ID(stdin)); if (fin == NULL) { goto error; } @@ -2278,7 +2277,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt) PyErr_SetString(PyExc_RuntimeError, "lost sys.stdin"); goto error; } - fout = _PySys_GetRequiredAttr(&_Py_ID(stdout)); + fout = PySys_GetAttr(&_Py_ID(stdout)); if (fout == NULL) { goto error; } @@ -2286,7 +2285,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt) PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); goto error; } - ferr = _PySys_GetRequiredAttr(&_Py_ID(stderr)); + ferr = PySys_GetAttr(&_Py_ID(stderr)); if (ferr == NULL) { goto error; } diff --git a/Python/ceval.c b/Python/ceval.c index 4efa4b697fb59a..7aec196cb85704 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2979,7 +2979,7 @@ _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) int is_possibly_shadowing_stdlib = 0; if (is_possibly_shadowing) { PyObject *stdlib_modules; - if (_PySys_GetOptionalAttrString("stdlib_module_names", &stdlib_modules) < 0) { + if (PySys_GetOptionalAttrString("stdlib_module_names", &stdlib_modules) < 0) { goto done; } if (stdlib_modules && PyAnySet_Check(stdlib_modules)) { diff --git a/Python/errors.c b/Python/errors.c index 81f267b043afaf..a3122f76bdd87d 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -10,7 +10,6 @@ #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_runtime.h" // _Py_ID() #include "pycore_structseq.h" // _PyStructSequence_FiniBuiltin() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttr() #include "pycore_traceback.h" // _PyTraceBack_FromFrame() #include "pycore_unicodeobject.h" // _PyUnicode_Equal() @@ -1570,7 +1569,7 @@ write_unraisable_exc(PyThreadState *tstate, PyObject *exc_type, PyObject *obj) { PyObject *file; - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { return -1; } if (file == NULL || file == Py_None) { @@ -1677,7 +1676,7 @@ format_unraisable_v(const char *format, va_list va, PyObject *obj) } PyObject *hook; - if (_PySys_GetOptionalAttr(&_Py_ID(unraisablehook), &hook) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(unraisablehook), &hook) < 0) { Py_DECREF(hook_args); err_msg_str = NULL; obj = NULL; diff --git a/Python/import.c b/Python/import.c index e7be1b90751a6c..98557991378e05 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3369,11 +3369,11 @@ PyObject * PyImport_GetImporter(PyObject *path) { PyThreadState *tstate = _PyThreadState_GET(); - PyObject *path_importer_cache = _PySys_GetRequiredAttrString("path_importer_cache"); + PyObject *path_importer_cache = PySys_GetAttrString("path_importer_cache"); if (path_importer_cache == NULL) { return NULL; } - PyObject *path_hooks = _PySys_GetRequiredAttrString("path_hooks"); + PyObject *path_hooks = PySys_GetAttrString("path_hooks"); if (path_hooks == NULL) { Py_DECREF(path_importer_cache); return NULL; @@ -3682,14 +3682,14 @@ import_find_and_load(PyThreadState *tstate, PyObject *abs_name) PyTime_t t1 = 0, accumulated_copy = accumulated; PyObject *sys_path, *sys_meta_path, *sys_path_hooks; - if (_PySys_GetOptionalAttrString("path", &sys_path) < 0) { + if (PySys_GetOptionalAttrString("path", &sys_path) < 0) { return NULL; } - if (_PySys_GetOptionalAttrString("meta_path", &sys_meta_path) < 0) { + if (PySys_GetOptionalAttrString("meta_path", &sys_meta_path) < 0) { Py_XDECREF(sys_path); return NULL; } - if (_PySys_GetOptionalAttrString("path_hooks", &sys_path_hooks) < 0) { + if (PySys_GetOptionalAttrString("path_hooks", &sys_path_hooks) < 0) { Py_XDECREF(sys_meta_path); Py_XDECREF(sys_path); return NULL; @@ -4127,7 +4127,7 @@ _PyImport_FiniCore(PyInterpreterState *interp) static int init_zipimport(PyThreadState *tstate, int verbose) { - PyObject *path_hooks = _PySys_GetRequiredAttrString("path_hooks"); + PyObject *path_hooks = PySys_GetAttrString("path_hooks"); if (path_hooks == NULL) { return -1; } diff --git a/Python/initconfig.c b/Python/initconfig.c index 25e30aa648e8aa..71d7cfed5c44c1 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -3647,7 +3647,7 @@ _Py_DumpPathConfig(PyThreadState *tstate) #define DUMP_SYS(NAME) \ do { \ PySys_FormatStderr(" sys.%s = ", #NAME); \ - if (_PySys_GetOptionalAttrString(#NAME, &obj) < 0) { \ + if (PySys_GetOptionalAttrString(#NAME, &obj) < 0) { \ PyErr_Clear(); \ } \ if (obj != NULL) { \ @@ -3671,7 +3671,7 @@ _Py_DumpPathConfig(PyThreadState *tstate) #undef DUMP_SYS PyObject *sys_path; - (void) _PySys_GetOptionalAttrString("path", &sys_path); + (void) PySys_GetOptionalAttrString("path", &sys_path); if (sys_path != NULL && PyList_Check(sys_path)) { PySys_WriteStderr(" sys.path = [\n"); Py_ssize_t len = PyList_GET_SIZE(sys_path); @@ -4294,7 +4294,7 @@ _PyConfig_CreateXOptionsDict(const PyConfig *config) static int config_get_sys_write_bytecode(const PyConfig *config, int *value) { - PyObject *attr = _PySys_GetRequiredAttrString("dont_write_bytecode"); + PyObject *attr = PySys_GetAttrString("dont_write_bytecode"); if (attr == NULL) { return -1; } @@ -4315,7 +4315,7 @@ config_get(const PyConfig *config, const PyConfigSpec *spec, { if (use_sys) { if (spec->sys.attr != NULL) { - return _PySys_GetRequiredAttrString(spec->sys.attr); + return PySys_GetAttrString(spec->sys.attr); } if (strcmp(spec->name, "write_bytecode") == 0) { diff --git a/Python/intrinsics.c b/Python/intrinsics.c index ff44ba0ee64fa9..8ea920e690cd0d 100644 --- a/Python/intrinsics.c +++ b/Python/intrinsics.c @@ -9,7 +9,6 @@ #include "pycore_intrinsics.h" // INTRINSIC_PRINT #include "pycore_pyerrors.h" // _PyErr_SetString() #include "pycore_runtime.h" // _Py_ID() -#include "pycore_sysmodule.h" // _PySys_GetRequiredAttr() #include "pycore_tuple.h" // _PyTuple_FromArray() #include "pycore_typevarobject.h" // _Py_make_typevar() #include "pycore_unicodeobject.h" // _PyUnicode_FromASCII() @@ -27,7 +26,7 @@ no_intrinsic1(PyThreadState* tstate, PyObject *unused) static PyObject * print_expr(PyThreadState* Py_UNUSED(ignored), PyObject *value) { - PyObject *hook = _PySys_GetRequiredAttr(&_Py_ID(displayhook)); + PyObject *hook = PySys_GetAttr(&_Py_ID(displayhook)); if (hook == NULL) { return NULL; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 8394245d373030..724fda63511282 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1283,7 +1283,7 @@ init_interp_main(PyThreadState *tstate) if (is_main_interp) { /* Initialize warnings. */ PyObject *warnoptions; - if (_PySys_GetOptionalAttrString("warnoptions", &warnoptions) < 0) { + if (PySys_GetOptionalAttrString("warnoptions", &warnoptions) < 0) { return _PyStatus_ERR("can't initialize warnings"); } if (warnoptions != NULL && PyList_Check(warnoptions) && @@ -1806,7 +1806,7 @@ flush_std_files(void) PyObject *file; int status = 0; - if (_PySys_GetOptionalAttr(&_Py_ID(stdout), &file) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stdout), &file) < 0) { status = -1; } else if (file != NULL && file != Py_None && !file_is_closed(file)) { @@ -1819,7 +1819,7 @@ flush_std_files(void) } Py_XDECREF(file); - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { PyErr_Clear(); status = -1; } @@ -3046,7 +3046,7 @@ _Py_FatalError_PrintExc(PyThreadState *tstate) } PyObject *ferr; - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &ferr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &ferr) < 0) { _PyErr_Clear(tstate); } if (ferr == NULL || ferr == Py_None) { diff --git a/Python/pythonrun.c b/Python/pythonrun.c index f67b72aa91f671..8f1c78bf831863 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -114,7 +114,7 @@ _PyRun_InteractiveLoopObject(FILE *fp, PyObject *filename, PyCompilerFlags *flag } PyObject *v; - if (_PySys_GetOptionalAttr(&_Py_ID(ps1), &v) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(ps1), &v) < 0) { PyErr_Print(); return -1; } @@ -128,7 +128,7 @@ _PyRun_InteractiveLoopObject(FILE *fp, PyObject *filename, PyCompilerFlags *flag } } Py_XDECREF(v); - if (_PySys_GetOptionalAttr(&_Py_ID(ps2), &v) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(ps2), &v) < 0) { PyErr_Print(); return -1; } @@ -206,7 +206,7 @@ pyrun_one_parse_ast(FILE *fp, PyObject *filename, PyObject *encoding_obj = NULL; const char *encoding = NULL; if (fp == stdin) { - if (_PySys_GetOptionalAttr(&_Py_ID(stdin), &attr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stdin), &attr) < 0) { PyErr_Clear(); } else if (attr != NULL && attr != Py_None) { @@ -226,7 +226,7 @@ pyrun_one_parse_ast(FILE *fp, PyObject *filename, // Get sys.ps1 (as UTF-8) PyObject *ps1_obj = NULL; const char *ps1 = ""; - if (_PySys_GetOptionalAttr(&_Py_ID(ps1), &attr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(ps1), &attr) < 0) { PyErr_Clear(); } else if (attr != NULL) { @@ -247,7 +247,7 @@ pyrun_one_parse_ast(FILE *fp, PyObject *filename, // Get sys.ps2 (as UTF-8) PyObject *ps2_obj = NULL; const char *ps2 = ""; - if (_PySys_GetOptionalAttr(&_Py_ID(ps2), &attr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(ps2), &attr) < 0) { PyErr_Clear(); } else if (attr != NULL) { @@ -658,7 +658,7 @@ _Py_HandleSystemExitAndKeyboardInterrupt(int *exitcode_p) } PyObject *sys_stderr; - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &sys_stderr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &sys_stderr) < 0) { PyErr_Clear(); } else if (sys_stderr != NULL && sys_stderr != Py_None) { @@ -722,7 +722,7 @@ _PyErr_PrintEx(PyThreadState *tstate, int set_sys_last_vars) _PyErr_Clear(tstate); } } - if (_PySys_GetOptionalAttr(&_Py_ID(excepthook), &hook) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(excepthook), &hook) < 0) { PyErr_Clear(); } if (_PySys_Audit(tstate, "sys.excepthook", "OOOO", hook ? hook : Py_None, @@ -1197,7 +1197,7 @@ void PyErr_Display(PyObject *unused, PyObject *value, PyObject *tb) { PyObject *file; - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { PyObject *exc = PyErr_GetRaisedException(); _PyObject_Dump(value); fprintf(stderr, "lost sys.stderr\n"); @@ -1321,7 +1321,7 @@ static void flush_io_stream(PyThreadState *tstate, PyObject *name) { PyObject *f; - if (_PySys_GetOptionalAttr(name, &f) < 0) { + if (PySys_GetOptionalAttr(name, &f) < 0) { PyErr_Clear(); } if (f != NULL) { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 4ed045e3297bbc..e5ae841d195d4f 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -76,12 +76,12 @@ module sys PyObject * -_PySys_GetRequiredAttr(PyObject *name) +PySys_GetAttr(PyObject *name) { if (!PyUnicode_Check(name)) { PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - Py_TYPE(name)->tp_name); + "attribute name must be string, not '%T'", + name); return NULL; } PyThreadState *tstate = _PyThreadState_GET(); @@ -98,7 +98,7 @@ _PySys_GetRequiredAttr(PyObject *name) } PyObject * -_PySys_GetRequiredAttrString(const char *name) +PySys_GetAttrString(const char *name) { PyThreadState *tstate = _PyThreadState_GET(); PyObject *sysdict = tstate->interp->sysdict; @@ -114,12 +114,12 @@ _PySys_GetRequiredAttrString(const char *name) } int -_PySys_GetOptionalAttr(PyObject *name, PyObject **value) +PySys_GetOptionalAttr(PyObject *name, PyObject **value) { if (!PyUnicode_Check(name)) { PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - Py_TYPE(name)->tp_name); + "attribute name must be string, not '%T'", + name); *value = NULL; return -1; } @@ -133,7 +133,7 @@ _PySys_GetOptionalAttr(PyObject *name, PyObject **value) } int -_PySys_GetOptionalAttrString(const char *name, PyObject **value) +PySys_GetOptionalAttrString(const char *name, PyObject **value) { PyThreadState *tstate = _PyThreadState_GET(); PyObject *sysdict = tstate->interp->sysdict; @@ -773,7 +773,7 @@ sys_displayhook(PyObject *module, PyObject *o) } if (PyObject_SetAttr(builtins, _Py_LATIN1_CHR('_'), Py_None) != 0) return NULL; - outf = _PySys_GetRequiredAttr(&_Py_ID(stdout)); + outf = PySys_GetAttr(&_Py_ID(stdout)); if (outf == NULL) { return NULL; } @@ -3005,7 +3005,7 @@ static PyObject * get_warnoptions(PyThreadState *tstate) { PyObject *warnoptions; - if (_PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { return NULL; } if (warnoptions == NULL || !PyList_Check(warnoptions)) { @@ -3042,7 +3042,7 @@ PySys_ResetWarnOptions(void) } PyObject *warnoptions; - if (_PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { PyErr_Clear(); return; } @@ -3106,7 +3106,7 @@ PyAPI_FUNC(int) PySys_HasWarnOptions(void) { PyObject *warnoptions; - if (_PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { PyErr_Clear(); return 0; } @@ -3120,7 +3120,7 @@ static PyObject * get_xoptions(PyThreadState *tstate) { PyObject *xoptions; - if (_PySys_GetOptionalAttr(&_Py_ID(_xoptions), &xoptions) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(_xoptions), &xoptions) < 0) { return NULL; } if (xoptions == NULL || !PyDict_Check(xoptions)) { @@ -3373,7 +3373,7 @@ sys_set_flag(PyObject *flags, Py_ssize_t pos, PyObject *value) int _PySys_SetFlagObj(Py_ssize_t pos, PyObject *value) { - PyObject *flags = _PySys_GetRequiredAttrString("flags"); + PyObject *flags = PySys_GetAttrString("flags"); if (flags == NULL) { return -1; } @@ -3935,7 +3935,7 @@ _PySys_UpdateConfig(PyThreadState *tstate) #undef COPY_WSTR // sys.flags - PyObject *flags = _PySys_GetRequiredAttrString("flags"); + PyObject *flags = PySys_GetAttrString("flags"); if (flags == NULL) { return -1; } @@ -4251,7 +4251,7 @@ PySys_SetArgvEx(int argc, wchar_t **argv, int updatepath) } PyObject *sys_path; - if (_PySys_GetOptionalAttr(&_Py_ID(path), &sys_path) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(path), &sys_path) < 0) { Py_FatalError("can't get sys.path"); } else if (sys_path != NULL) { @@ -4347,7 +4347,7 @@ sys_write(PyObject *key, FILE *fp, const char *format, va_list va) PyObject *exc = _PyErr_GetRaisedException(tstate); written = PyOS_vsnprintf(buffer, sizeof(buffer), format, va); - file = _PySys_GetRequiredAttr(key); + file = PySys_GetAttr(key); if (sys_pyfile_write(buffer, file) != 0) { _PyErr_Clear(tstate); fputs(buffer, fp); @@ -4391,7 +4391,7 @@ sys_format(PyObject *key, FILE *fp, const char *format, va_list va) PyObject *exc = _PyErr_GetRaisedException(tstate); message = PyUnicode_FromFormatV(format, va); if (message != NULL) { - file = _PySys_GetRequiredAttr(key); + file = PySys_GetAttr(key); if (sys_pyfile_write_unicode(message, file) != 0) { _PyErr_Clear(tstate); utf8 = PyUnicode_AsUTF8(message); diff --git a/Python/traceback.c b/Python/traceback.c index c06cb1a59089e2..4f674eaf55715b 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -9,7 +9,6 @@ #include "pycore_interpframe.h" // _PyFrame_GetCode() #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttr() #include "pycore_traceback.h" // EXCEPTION_TB_HEADER #include "frameobject.h" // PyFrame_New() @@ -399,7 +398,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * taillen = strlen(tail); PyThreadState *tstate = _PyThreadState_GET(); - if (_PySys_GetOptionalAttr(&_Py_ID(path), &syspath) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(path), &syspath) < 0) { PyErr_Clear(); goto error; } @@ -777,7 +776,7 @@ _PyTraceBack_Print(PyObject *v, const char *header, PyObject *f) PyErr_BadInternalCall(); return -1; } - if (_PySys_GetOptionalAttrString("tracebacklimit", &limitv) < 0) { + if (PySys_GetOptionalAttrString("tracebacklimit", &limitv) < 0) { return -1; } else if (limitv != NULL && PyLong_Check(limitv)) { From f6324bc7eedc615c3c961fe368a8c56697d42936 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 28 May 2025 20:14:56 +0300 Subject: [PATCH 398/989] gh-108885: Imporove tests for doctest (GH-134832) Test the error and failure report in more detail. --- .../test_doctest/sample_doctest_errors.py | 46 ++ Lib/test/test_doctest/test_doctest.py | 485 +++++++++++++++++- Lib/test/test_doctest/test_doctest_errors.txt | 14 + Lib/test/test_doctest/test_doctest_skip.txt | 2 + Lib/test/test_doctest/test_doctest_skip2.txt | 6 + 5 files changed, 545 insertions(+), 8 deletions(-) create mode 100644 Lib/test/test_doctest/sample_doctest_errors.py create mode 100644 Lib/test/test_doctest/test_doctest_errors.txt create mode 100644 Lib/test/test_doctest/test_doctest_skip2.txt diff --git a/Lib/test/test_doctest/sample_doctest_errors.py b/Lib/test/test_doctest/sample_doctest_errors.py new file mode 100644 index 00000000000000..4a6f07af2d4e5a --- /dev/null +++ b/Lib/test/test_doctest/sample_doctest_errors.py @@ -0,0 +1,46 @@ +"""This is a sample module used for testing doctest. + +This module includes various scenarios involving errors. + +>>> 2 + 2 +5 +>>> 1/0 +1 +""" + +def g(): + [][0] # line 12 + +def errors(): + """ + >>> 2 + 2 + 5 + >>> 1/0 + 1 + >>> def f(): + ... 2 + '2' + ... + >>> f() + 1 + >>> g() + 1 + """ + +def syntax_error(): + """ + >>> 2+*3 + 5 + """ + +__test__ = { + 'bad': """ + >>> 2 + 2 + 5 + >>> 1/0 + 1 + """, +} + +def test_suite(): + import doctest + return doctest.DocTestSuite() diff --git a/Lib/test/test_doctest/test_doctest.py b/Lib/test/test_doctest/test_doctest.py index a4a49298bab3be..c5b247797c321d 100644 --- a/Lib/test/test_doctest/test_doctest.py +++ b/Lib/test/test_doctest/test_doctest.py @@ -2267,13 +2267,21 @@ def test_DocTestSuite(): >>> import unittest >>> import test.test_doctest.sample_doctest >>> suite = doctest.DocTestSuite(test.test_doctest.sample_doctest) - >>> suite.run(unittest.TestResult()) + >>> result = suite.run(unittest.TestResult()) + >>> result + >>> for tst, _ in result.failures: + ... print(tst) + bad (test.test_doctest.sample_doctest.__test__) + foo (test.test_doctest.sample_doctest) + test_silly_setup (test.test_doctest.sample_doctest) + y_is_one (test.test_doctest.sample_doctest) We can also supply the module by name: >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest') - >>> suite.run(unittest.TestResult()) + >>> result = suite.run(unittest.TestResult()) + >>> result The module need not contain any doctest examples: @@ -2297,6 +2305,14 @@ def test_DocTestSuite(): >>> len(result.skipped) 2 + >>> for tst, _ in result.skipped: + ... print(tst) + double_skip (test.test_doctest.sample_doctest_skip) + single_skip (test.test_doctest.sample_doctest_skip) + >>> for tst, _ in result.failures: + ... print(tst) + no_skip_fail (test.test_doctest.sample_doctest_skip) + partial_skip_fail (test.test_doctest.sample_doctest_skip) We can use the current module: @@ -2383,7 +2399,174 @@ def test_DocTestSuite(): modified the test globals, which are a copy of the sample_doctest module dictionary. The test globals are automatically cleared for us after a test. - """ + """ + +def test_DocTestSuite_errors(): + """Tests for error reporting in DocTestSuite. + + >>> import unittest + >>> import test.test_doctest.sample_doctest_errors as mod + >>> suite = doctest.DocTestSuite(mod) + >>> result = suite.run(unittest.TestResult()) + >>> result + + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File ... + raise self.failureException(self.format_failure(new.getvalue())) + AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors + File "...sample_doctest_errors.py", line 0, in sample_doctest_errors + + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 5, in test.test_doctest.sample_doctest_errors + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 7, in test.test_doctest.sample_doctest_errors + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + + + >>> print(result.failures[1][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File ... + raise self.failureException(self.format_failure(new.getvalue())) + AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.__test__.bad + File "...sample_doctest_errors.py", line unknown line number, in bad + + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + + + >>> print(result.failures[2][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File ... + raise self.failureException(self.format_failure(new.getvalue())) + AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.errors + File "...sample_doctest_errors.py", line 14, in errors + + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 16, in test.test_doctest.sample_doctest_errors.errors + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 18, in test.test_doctest.sample_doctest_errors.errors + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 23, in test.test_doctest.sample_doctest_errors.errors + Failed example: + f() + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + f() + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 25, in test.test_doctest.sample_doctest_errors.errors + Failed example: + g() + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + g() + ~^^ + File "...sample_doctest_errors.py", line 12, in g + [][0] # line 12 + ~~^^^ + IndexError: list index out of range + + + >>> print(result.failures[3][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File ... + raise self.failureException(self.format_failure(new.getvalue())) + AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.syntax_error + File "...sample_doctest_errors.py", line 29, in syntax_error + + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 31, in test.test_doctest.sample_doctest_errors.syntax_error + Failed example: + 2+*3 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^ + File "", line 1 + 2+*3 + ^ + SyntaxError: invalid syntax + + + """ def test_DocFileSuite(): """We can test tests found in text files using a DocFileSuite. @@ -2455,12 +2638,16 @@ def test_DocFileSuite(): >>> suite = doctest.DocFileSuite('test_doctest.txt', ... 'test_doctest4.txt', - ... 'test_doctest_skip.txt') + ... 'test_doctest_skip.txt', + ... 'test_doctest_skip2.txt') >>> result = suite.run(unittest.TestResult()) >>> result - - >>> len(result.skipped) - 1 + + >>> len(result.skipped) + 1 + >>> for tst, _ in result.skipped: # doctest: +ELLIPSIS + ... print('=', tst) + = ...test_doctest_skip.txt You can specify initial global variables: @@ -2542,8 +2729,82 @@ def test_DocFileSuite(): ... encoding='utf-8') >>> suite.run(unittest.TestResult()) + """ - """ +def test_DocFileSuite_errors(): + """Tests for error reporting in DocTestSuite. + + >>> import unittest + >>> suite = doctest.DocFileSuite('test_doctest_errors.txt') + >>> result = suite.run(unittest.TestResult()) + >>> result + + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File ... + raise self.failureException(self.format_failure(new.getvalue())) + AssertionError: Failed doctest test for test_doctest_errors.txt + File "...test_doctest_errors.txt", line 0 + + ---------------------------------------------------------------------- + File "...test_doctest_errors.txt", line 4, in test_doctest_errors.txt + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ---------------------------------------------------------------------- + File "...test_doctest_errors.txt", line 6, in test_doctest_errors.txt + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ---------------------------------------------------------------------- + File "...test_doctest_errors.txt", line 11, in test_doctest_errors.txt + Failed example: + f() + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + f() + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... + ---------------------------------------------------------------------- + File "...test_doctest_errors.txt", line 13, in test_doctest_errors.txt + Failed example: + 2+*3 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^ + File "", line 1 + 2+*3 + ^ + SyntaxError: invalid syntax + + + """ def test_trailing_space_in_test(): """ @@ -2612,6 +2873,8 @@ def test_unittest_reportflags(): ... optionflags=doctest.DONT_ACCEPT_BLANKLINE) >>> import unittest >>> result = suite.run(unittest.TestResult()) + >>> result + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS Traceback ... Failed example: @@ -2629,6 +2892,8 @@ def test_unittest_reportflags(): Now, when we run the test: >>> result = suite.run(unittest.TestResult()) + >>> result + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS Traceback ... Failed example: @@ -2650,6 +2915,8 @@ def test_unittest_reportflags(): Then the default eporting options are ignored: >>> result = suite.run(unittest.TestResult()) + >>> result + *NOTE*: These doctest are intentionally not placed in raw string to depict the trailing whitespace using `\x20` in the diff below. @@ -2860,6 +3127,73 @@ def test_testfile(): r""" >>> _colorize.COLORIZE = save_colorize """ +def test_testfile_errors(): r""" +Tests for error reporting in the testfile() function. + + >>> doctest.testfile('test_doctest_errors.txt', verbose=False) # doctest: +ELLIPSIS + ********************************************************************** + File "...test_doctest_errors.txt", line 4, in test_doctest_errors.txt + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ********************************************************************** + File "...test_doctest_errors.txt", line 6, in test_doctest_errors.txt + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ********************************************************************** + File "...test_doctest_errors.txt", line 11, in test_doctest_errors.txt + Failed example: + f() + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + f() + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... + ********************************************************************** + File "...test_doctest_errors.txt", line 13, in test_doctest_errors.txt + Failed example: + 2+*3 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^ + File "", line 1 + 2+*3 + ^ + SyntaxError: invalid syntax + ********************************************************************** + 1 item had failures: + 4 of 5 in test_doctest_errors.txt + ***Test Failed*** 4 failures. + TestResults(failed=4, attempted=5) +""" + class TestImporter(importlib.abc.MetaPathFinder): def find_spec(self, fullname, path, target=None): @@ -2990,6 +3324,141 @@ def test_testmod(): r""" TestResults(failed=0, attempted=0) """ +def test_testmod_errors(): r""" +Tests for error reporting in the testmod() function. + + >>> import test.test_doctest.sample_doctest_errors as mod + >>> doctest.testmod(mod, verbose=False) # doctest: +ELLIPSIS + ********************************************************************** + File "...sample_doctest_errors.py", line 5, in test.test_doctest.sample_doctest_errors + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ********************************************************************** + File "...sample_doctest_errors.py", line 7, in test.test_doctest.sample_doctest_errors + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ********************************************************************** + File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ********************************************************************** + File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ********************************************************************** + File "...sample_doctest_errors.py", line 16, in test.test_doctest.sample_doctest_errors.errors + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ********************************************************************** + File "...sample_doctest_errors.py", line 18, in test.test_doctest.sample_doctest_errors.errors + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ********************************************************************** + File "...sample_doctest_errors.py", line 23, in test.test_doctest.sample_doctest_errors.errors + Failed example: + f() + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + f() + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... + ********************************************************************** + File "...sample_doctest_errors.py", line 25, in test.test_doctest.sample_doctest_errors.errors + Failed example: + g() + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "", line 1, in + g() + ~^^ + File "...sample_doctest_errors.py", line 12, in g + [][0] # line 12 + ~~^^^ + IndexError: list index out of range + ********************************************************************** + File "...sample_doctest_errors.py", line 31, in test.test_doctest.sample_doctest_errors.syntax_error + Failed example: + 2+*3 + Exception raised: + Traceback (most recent call last): + File ... + exec(compile(example.source, filename, "single", + ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + compileflags, True), test.globs) + ^^^^^^^^^^^^^^^^^^^ + File "", line 1 + 2+*3 + ^ + SyntaxError: invalid syntax + ********************************************************************** + 4 items had failures: + 2 of 2 in test.test_doctest.sample_doctest_errors + 2 of 2 in test.test_doctest.sample_doctest_errors.__test__.bad + 4 of 5 in test.test_doctest.sample_doctest_errors.errors + 1 of 1 in test.test_doctest.sample_doctest_errors.syntax_error + ***Test Failed*** 9 failures. + TestResults(failed=9, attempted=10) +""" + try: os.fsencode("foo-bär@baz.py") supports_unicode = True diff --git a/Lib/test/test_doctest/test_doctest_errors.txt b/Lib/test/test_doctest/test_doctest_errors.txt new file mode 100644 index 00000000000000..93c3c106e60b32 --- /dev/null +++ b/Lib/test/test_doctest/test_doctest_errors.txt @@ -0,0 +1,14 @@ +This is a sample doctest in a text file, in which all examples fail +or raise an exception. + + >>> 2 + 2 + 5 + >>> 1/0 + 1 + >>> def f(): + ... 2 + '2' + ... + >>> f() + 1 + >>> 2+*3 + 5 diff --git a/Lib/test/test_doctest/test_doctest_skip.txt b/Lib/test/test_doctest/test_doctest_skip.txt index f340e2b8141253..06c23d06e606a3 100644 --- a/Lib/test/test_doctest/test_doctest_skip.txt +++ b/Lib/test/test_doctest/test_doctest_skip.txt @@ -2,3 +2,5 @@ This is a sample doctest in a text file, in which all examples are skipped. >>> 2 + 2 # doctest: +SKIP 5 + >>> 2 + 2 # doctest: +SKIP + 4 diff --git a/Lib/test/test_doctest/test_doctest_skip2.txt b/Lib/test/test_doctest/test_doctest_skip2.txt new file mode 100644 index 00000000000000..85e4938c346a09 --- /dev/null +++ b/Lib/test/test_doctest/test_doctest_skip2.txt @@ -0,0 +1,6 @@ +This is a sample doctest in a text file, in which some examples are skipped. + + >>> 2 + 2 # doctest: +SKIP + 5 + >>> 2 + 2 + 4 From 9fbd66a93d526c49fac8e1427c25e8f7f4154e29 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 28 May 2025 19:03:41 +0100 Subject: [PATCH 399/989] GH-133912: Fix `PyObject_GenericSetDict` to handle inline values (GH-134725) --- Include/internal/pycore_object.h | 2 + Lib/test/test_capi/test_type.py | 10 ++++ ...-05-26-15-55-50.gh-issue-133912.-xAguL.rst | 2 + Modules/_testcapimodule.c | 49 +++++++++++++++++ Objects/object.c | 31 +---------- Objects/typeobject.c | 54 +++++++++++-------- 6 files changed, 95 insertions(+), 53 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-26-15-55-50.gh-issue-133912.-xAguL.rst diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 3fc1e7d7dcbd15..50225623fe52db 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -1010,6 +1010,8 @@ enum _PyAnnotateFormat { _Py_ANNOTATE_FORMAT_STRING = 4, }; +int _PyObject_SetDict(PyObject *obj, PyObject *value); + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_capi/test_type.py b/Lib/test/test_capi/test_type.py index 3c9974c7387388..15fb4a93e2ad74 100644 --- a/Lib/test/test_capi/test_type.py +++ b/Lib/test/test_capi/test_type.py @@ -264,3 +264,13 @@ def test_manual_heap_type(self): ManualHeapType = _testcapi.ManualHeapType for i in range(100): self.assertIsInstance(ManualHeapType(), ManualHeapType) + + def test_extension_managed_dict_type(self): + ManagedDictType = _testcapi.ManagedDictType + obj = ManagedDictType() + obj.foo = 42 + self.assertEqual(obj.foo, 42) + self.assertEqual(obj.__dict__, {'foo': 42}) + obj.__dict__ = {'bar': 3} + self.assertEqual(obj.__dict__, {'bar': 3}) + self.assertEqual(obj.bar, 3) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-26-15-55-50.gh-issue-133912.-xAguL.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-26-15-55-50.gh-issue-133912.-xAguL.rst new file mode 100644 index 00000000000000..2118f3d0c350ec --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-26-15-55-50.gh-issue-133912.-xAguL.rst @@ -0,0 +1,2 @@ +Fix the C API function ``PyObject_GenericSetDict`` to handle extension +classes with inline values. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3aa6e4c9e43a26..281c5b41137ac2 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3175,6 +3175,48 @@ create_manual_heap_type(void) return (PyObject *)type; } +typedef struct { + PyObject_VAR_HEAD +} ManagedDictObject; + +int ManagedDict_traverse(PyObject *self, visitproc visit, void *arg) { + PyObject_VisitManagedDict(self, visit, arg); + Py_VISIT(Py_TYPE(self)); + return 0; +} + +int ManagedDict_clear(PyObject *self) { + PyObject_ClearManagedDict(self); + return 0; +} + +static PyGetSetDef ManagedDict_getset[] = { + {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict, NULL, NULL}, + {NULL, NULL, NULL, NULL, NULL}, +}; + +static PyType_Slot ManagedDict_slots[] = { + {Py_tp_new, (void *)PyType_GenericNew}, + {Py_tp_getset, (void *)ManagedDict_getset}, + {Py_tp_traverse, (void *)ManagedDict_traverse}, + {Py_tp_clear, (void *)ManagedDict_clear}, + {0} +}; + +static PyType_Spec ManagedDict_spec = { + "_testcapi.ManagedDictType", + sizeof(ManagedDictObject), + 0, // itemsize + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_MANAGED_DICT | Py_TPFLAGS_HEAPTYPE | Py_TPFLAGS_HAVE_GC, + ManagedDict_slots +}; + +static PyObject * +create_managed_dict_type(void) +{ + return PyType_FromSpec(&ManagedDict_spec); +} + static struct PyModuleDef _testcapimodule = { PyModuleDef_HEAD_INIT, .m_name = "_testcapi", @@ -3315,6 +3357,13 @@ PyInit__testcapi(void) return NULL; } + PyObject *managed_dict_type = create_managed_dict_type(); + if (managed_dict_type == NULL) { + return NULL; + } + if (PyModule_Add(m, "ManagedDictType", managed_dict_type) < 0) { + return NULL; + } /* Include tests from the _testcapi/ directory */ if (_PyTestCapi_Init_Vectorcall(m) < 0) { diff --git a/Objects/object.c b/Objects/object.c index 68c8bfeae33e33..9fe61ba7f1593a 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2016,40 +2016,11 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) int PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context) { - PyObject **dictptr = _PyObject_GetDictPtr(obj); - if (dictptr == NULL) { - if (_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_INLINE_VALUES) && - _PyObject_GetManagedDict(obj) == NULL - ) { - /* Was unable to convert to dict */ - PyErr_NoMemory(); - } - else { - PyErr_SetString(PyExc_AttributeError, - "This object has no __dict__"); - } - return -1; - } if (value == NULL) { PyErr_SetString(PyExc_TypeError, "cannot delete __dict__"); return -1; } - if (!PyDict_Check(value)) { - PyErr_Format(PyExc_TypeError, - "__dict__ must be set to a dictionary, " - "not a '%.200s'", Py_TYPE(value)->tp_name); - return -1; - } - Py_BEGIN_CRITICAL_SECTION(obj); - PyObject *olddict = *dictptr; - FT_ATOMIC_STORE_PTR_RELEASE(*dictptr, Py_NewRef(value)); -#ifdef Py_GIL_DISABLED - _PyObject_XDecRefDelayed(olddict); -#else - Py_XDECREF(olddict); -#endif - Py_END_CRITICAL_SECTION(); - return 0; + return _PyObject_SetDict(obj, value); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 07f01e3bcb229c..db923c164774b7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3948,10 +3948,39 @@ subtype_dict(PyObject *obj, void *context) return PyObject_GenericGetDict(obj, context); } +int +_PyObject_SetDict(PyObject *obj, PyObject *value) +{ + if (value != NULL && !PyDict_Check(value)) { + PyErr_Format(PyExc_TypeError, + "__dict__ must be set to a dictionary, " + "not a '%.200s'", Py_TYPE(value)->tp_name); + return -1; + } + if (Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) { + return _PyObject_SetManagedDict(obj, value); + } + PyObject **dictptr = _PyObject_ComputedDictPointer(obj); + if (dictptr == NULL) { + PyErr_SetString(PyExc_AttributeError, + "This object has no __dict__"); + return -1; + } + Py_BEGIN_CRITICAL_SECTION(obj); + PyObject *olddict = *dictptr; + FT_ATOMIC_STORE_PTR_RELEASE(*dictptr, Py_NewRef(value)); +#ifdef Py_GIL_DISABLED + _PyObject_XDecRefDelayed(olddict); +#else + Py_XDECREF(olddict); +#endif + Py_END_CRITICAL_SECTION(); + return 0; +} + static int subtype_setdict(PyObject *obj, PyObject *value, void *context) { - PyObject **dictptr; PyTypeObject *base; base = get_builtin_base_with_dict(Py_TYPE(obj)); @@ -3969,28 +3998,7 @@ subtype_setdict(PyObject *obj, PyObject *value, void *context) } return func(descr, obj, value); } - /* Almost like PyObject_GenericSetDict, but allow __dict__ to be deleted. */ - if (value != NULL && !PyDict_Check(value)) { - PyErr_Format(PyExc_TypeError, - "__dict__ must be set to a dictionary, " - "not a '%.200s'", Py_TYPE(value)->tp_name); - return -1; - } - - if (Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) { - return _PyObject_SetManagedDict(obj, value); - } - else { - dictptr = _PyObject_ComputedDictPointer(obj); - if (dictptr == NULL) { - PyErr_SetString(PyExc_AttributeError, - "This object has no __dict__"); - return -1; - } - Py_CLEAR(*dictptr); - *dictptr = Py_XNewRef(value); - } - return 0; + return _PyObject_SetDict(obj, value); } static PyObject * From e496444fdfb2a64b4c7c2075fc53cc69f7c01677 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 28 May 2025 21:27:24 +0100 Subject: [PATCH 400/989] Add a tool to benchmark external inspection (#134810) --- .../benchmark_external_inspection.py | 459 ++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100644 Tools/inspection/benchmark_external_inspection.py diff --git a/Tools/inspection/benchmark_external_inspection.py b/Tools/inspection/benchmark_external_inspection.py new file mode 100644 index 00000000000000..62182194c1ab2a --- /dev/null +++ b/Tools/inspection/benchmark_external_inspection.py @@ -0,0 +1,459 @@ +import _remote_debugging +import time +import subprocess +import sys +import contextlib +import tempfile +import os +import argparse +from _colorize import get_colors, can_colorize + +CODE = '''\ +import time +import os +import sys +import math + +def slow_fibonacci(n): + """Intentionally slow recursive fibonacci - should show up prominently in profiler""" + if n <= 1: + return n + return slow_fibonacci(n-1) + slow_fibonacci(n-2) + +def medium_computation(): + """Medium complexity function""" + result = 0 + for i in range(1000): + result += math.sqrt(i) * math.sin(i) + return result + +def fast_loop(): + """Fast simple loop""" + total = 0 + for i in range(100): + total += i + return total + +def string_operations(): + """String manipulation that should be visible in profiler""" + text = "hello world " * 100 + words = text.split() + return " ".join(reversed(words)) + +def nested_calls(): + """Nested function calls to test call stack depth""" + def level1(): + def level2(): + def level3(): + return medium_computation() + return level3() + return level2() + return level1() + +def main_loop(): + """Main computation loop with different execution paths""" + iteration = 0 + + while True: + iteration += 1 + + # Different execution paths with different frequencies + if iteration % 50 == 0: + # Expensive operation - should show high per-call time + result = slow_fibonacci(20) + + elif iteration % 10 == 0: + # Medium operation + result = nested_calls() + + elif iteration % 5 == 0: + # String operations + result = string_operations() + + else: + # Fast operation - most common + result = fast_loop() + + # Small delay to make sampling more interesting + time.sleep(0.001) + +if __name__ == "__main__": + main_loop() +''' + +DEEP_STATIC_CODE = """\ +import time +def factorial(n): + if n <= 1: + time.sleep(10000) + return 1 + return n * factorial(n-1) + +factorial(900) +""" + +CODE_WITH_TONS_OF_THREADS = '''\ +import time +import threading +import random +import math + +def cpu_intensive_work(): + """Do some CPU intensive calculations""" + result = 0 + for _ in range(10000): + result += math.sin(random.random()) * math.cos(random.random()) + return result + +def io_intensive_work(): + """Simulate IO intensive work with sleeps""" + time.sleep(0.1) + +def mixed_workload(): + """Mix of CPU and IO work""" + while True: + if random.random() < 0.3: + cpu_intensive_work() + else: + io_intensive_work() + +def create_threads(n): + """Create n threads doing mixed workloads""" + threads = [] + for _ in range(n): + t = threading.Thread(target=mixed_workload, daemon=True) + t.start() + threads.append(t) + return threads + +# Start with 5 threads +active_threads = create_threads(5) +thread_count = 5 + +# Main thread manages threads and does work +while True: + # Randomly add or remove threads + if random.random() < 0.1: # 10% chance each iteration + if random.random() < 0.5 and thread_count < 100: + # Add 1-5 new threads + new_count = random.randint(1, 5) + new_threads = create_threads(new_count) + active_threads.extend(new_threads) + thread_count += new_count + elif thread_count > 10: + # Remove 1-3 threads + remove_count = random.randint(1, 5) + # The threads will terminate naturally since they're daemons + active_threads = active_threads[remove_count:] + thread_count -= remove_count + + cpu_intensive_work() + time.sleep(0.05) +''' + +CODE_EXAMPLES = { + "basic": { + "code": CODE, + "description": "Mixed workload with fibonacci, computations, and string operations", + }, + "deep_static": { + "code": DEEP_STATIC_CODE, + "description": "Deep recursive call stack with 900+ frames (factorial)", + }, + "threads": { + "code": CODE_WITH_TONS_OF_THREADS, + "description": "Tons of threads doing mixed CPU/IO work", + }, +} + + +def benchmark(unwinder, duration_seconds=10): + """Benchmark mode - measure raw sampling speed for specified duration""" + sample_count = 0 + fail_count = 0 + total_work_time = 0.0 + start_time = time.perf_counter() + end_time = start_time + duration_seconds + + colors = get_colors(can_colorize()) + + print( + f"{colors.BOLD_BLUE}Benchmarking sampling speed for {duration_seconds} seconds...{colors.RESET}" + ) + + try: + while time.perf_counter() < end_time: + work_start = time.perf_counter() + try: + stack_trace = unwinder.get_stack_trace() + if stack_trace: + sample_count += 1 + except (OSError, RuntimeError, UnicodeDecodeError) as e: + fail_count += 1 + + work_end = time.perf_counter() + total_work_time += work_end - work_start + + total_attempts = sample_count + fail_count + if total_attempts % 10000 == 0: + avg_work_time_us = (total_work_time / total_attempts) * 1e6 + work_rate = ( + total_attempts / total_work_time if total_work_time > 0 else 0 + ) + success_rate = (sample_count / total_attempts) * 100 + + # Color code the success rate + if success_rate >= 95: + success_color = colors.GREEN + elif success_rate >= 80: + success_color = colors.YELLOW + else: + success_color = colors.RED + + print( + f"{colors.CYAN}Attempts:{colors.RESET} {total_attempts} | " + f"{colors.CYAN}Success:{colors.RESET} {success_color}{success_rate:.1f}%{colors.RESET} | " + f"{colors.CYAN}Rate:{colors.RESET} {colors.MAGENTA}{work_rate:.1f}Hz{colors.RESET} | " + f"{colors.CYAN}Avg:{colors.RESET} {colors.YELLOW}{avg_work_time_us:.2f}µs{colors.RESET}" + ) + except KeyboardInterrupt: + print(f"\n{colors.YELLOW}Benchmark interrupted by user{colors.RESET}") + + actual_end_time = time.perf_counter() + wall_time = actual_end_time - start_time + total_attempts = sample_count + fail_count + + # Return final statistics + return { + "wall_time": wall_time, + "total_attempts": total_attempts, + "sample_count": sample_count, + "fail_count": fail_count, + "success_rate": ( + (sample_count / total_attempts) * 100 if total_attempts > 0 else 0 + ), + "total_work_time": total_work_time, + "avg_work_time_us": ( + (total_work_time / total_attempts) * 1e6 if total_attempts > 0 else 0 + ), + "work_rate_hz": total_attempts / total_work_time if total_work_time > 0 else 0, + "samples_per_sec": sample_count / wall_time if wall_time > 0 else 0, + } + + +def print_benchmark_results(results): + """Print comprehensive benchmark results""" + colors = get_colors(can_colorize()) + + print(f"\n{colors.BOLD_GREEN}{'='*60}{colors.RESET}") + print(f"{colors.BOLD_GREEN}get_stack_trace() Benchmark Results{colors.RESET}") + print(f"{colors.BOLD_GREEN}{'='*60}{colors.RESET}") + + # Basic statistics + print(f"\n{colors.BOLD_CYAN}Basic Statistics:{colors.RESET}") + print( + f" {colors.CYAN}Wall time:{colors.RESET} {colors.YELLOW}{results['wall_time']:.3f}{colors.RESET} seconds" + ) + print( + f" {colors.CYAN}Total attempts:{colors.RESET} {colors.MAGENTA}{results['total_attempts']:,}{colors.RESET}" + ) + print( + f" {colors.CYAN}Successful samples:{colors.RESET} {colors.GREEN}{results['sample_count']:,}{colors.RESET}" + ) + print( + f" {colors.CYAN}Failed samples:{colors.RESET} {colors.RED}{results['fail_count']:,}{colors.RESET}" + ) + + # Color code the success rate + success_rate = results["success_rate"] + if success_rate >= 95: + success_color = colors.BOLD_GREEN + elif success_rate >= 80: + success_color = colors.BOLD_YELLOW + else: + success_color = colors.BOLD_RED + + print( + f" {colors.CYAN}Success rate:{colors.RESET} {success_color}{success_rate:.2f}%{colors.RESET}" + ) + + # Performance metrics + print(f"\n{colors.BOLD_CYAN}Performance Metrics:{colors.RESET}") + print( + f" {colors.CYAN}Average call time:{colors.RESET} {colors.YELLOW}{results['avg_work_time_us']:.2f}{colors.RESET} µs" + ) + print( + f" {colors.CYAN}Work rate:{colors.RESET} {colors.MAGENTA}{results['work_rate_hz']:.1f}{colors.RESET} calls/sec" + ) + print( + f" {colors.CYAN}Sample rate:{colors.RESET} {colors.MAGENTA}{results['samples_per_sec']:.1f}{colors.RESET} samples/sec" + ) + print( + f" {colors.CYAN}Total work time:{colors.RESET} {colors.YELLOW}{results['total_work_time']:.3f}{colors.RESET} seconds" + ) + + # Color code work efficiency + efficiency = (results["total_work_time"] / results["wall_time"]) * 100 + if efficiency >= 80: + efficiency_color = colors.GREEN + elif efficiency >= 50: + efficiency_color = colors.YELLOW + else: + efficiency_color = colors.RED + + print( + f" {colors.CYAN}Work efficiency:{colors.RESET} {efficiency_color}{efficiency:.1f}%{colors.RESET}" + ) + + +def parse_arguments(): + """Parse command line arguments""" + # Build the code examples description + examples_desc = "\n".join( + [f" {name}: {info['description']}" for name, info in CODE_EXAMPLES.items()] + ) + + parser = argparse.ArgumentParser( + description="Benchmark get_stack_trace() performance", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=f""" +Examples: + %(prog)s # Run basic benchmark for 10 seconds (default) + %(prog)s --duration 30 # Run basic benchmark for 30 seconds + %(prog)s -d 60 # Run basic benchmark for 60 seconds + %(prog)s --code deep_static # Run deep static call stack benchmark + %(prog)s --code deep_static -d 30 # Run deep static benchmark for 30 seconds + +Available code examples: +{examples_desc} + """, + color=True, + ) + + parser.add_argument( + "--duration", + "-d", + type=int, + default=10, + help="Benchmark duration in seconds (default: 10)", + ) + + parser.add_argument( + "--code", + "-c", + choices=list(CODE_EXAMPLES.keys()), + default="basic", + help="Code example to benchmark (default: basic)", + ) + + return parser.parse_args() + + +def create_target_process(temp_file, code_example="basic"): + """Create and start the target process for benchmarking""" + example_info = CODE_EXAMPLES.get(code_example, {"code": CODE}) + selected_code = example_info["code"] + temp_file.write(selected_code) + temp_file.flush() + + process = subprocess.Popen( + [sys.executable, temp_file.name], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + + # Give it time to start + time.sleep(1.0) + + # Check if it's still running + if process.poll() is not None: + stdout, stderr = process.communicate() + raise RuntimeError( + f"Target process exited unexpectedly:\nSTDOUT: {stdout.decode()}\nSTDERR: {stderr.decode()}" + ) + + return process, temp_file.name + + +def cleanup_process(process, temp_file_path): + """Clean up the target process and temporary file""" + with contextlib.suppress(Exception): + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=5.0) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + + +def main(): + """Main benchmark function""" + colors = get_colors(can_colorize()) + args = parse_arguments() + + print(f"{colors.BOLD_MAGENTA}External Inspection Benchmark Tool{colors.RESET}") + print(f"{colors.BOLD_MAGENTA}{'=' * 34}{colors.RESET}") + + example_info = CODE_EXAMPLES.get(args.code, {"description": "Unknown"}) + print( + f"\n{colors.CYAN}Code Example:{colors.RESET} {colors.GREEN}{args.code}{colors.RESET}" + ) + print(f"{colors.CYAN}Description:{colors.RESET} {example_info['description']}") + print( + f"{colors.CYAN}Benchmark Duration:{colors.RESET} {colors.YELLOW}{args.duration}{colors.RESET} seconds" + ) + + process = None + temp_file_path = None + + try: + # Create target process + print(f"\n{colors.BLUE}Creating and starting target process...{colors.RESET}") + with tempfile.NamedTemporaryFile(mode="w", suffix=".py") as temp_file: + process, temp_file_path = create_target_process(temp_file, args.code) + print( + f"{colors.GREEN}Target process started with PID: {colors.BOLD_WHITE}{process.pid}{colors.RESET}" + ) + + # Run benchmark with specified duration + with process: + # Create unwinder and run benchmark + print(f"{colors.BLUE}Initializing unwinder...{colors.RESET}") + try: + unwinder = _remote_debugging.RemoteUnwinder( + process.pid, all_threads=True + ) + results = benchmark(unwinder, duration_seconds=args.duration) + finally: + cleanup_process(process, temp_file_path) + + # Print results + print_benchmark_results(results) + + except PermissionError as e: + print( + f"{colors.BOLD_RED}Error: Insufficient permissions to read stack trace: {e}{colors.RESET}" + ) + print( + f"{colors.YELLOW}Try running with appropriate privileges (e.g., sudo){colors.RESET}" + ) + return 1 + except Exception as e: + print(f"{colors.BOLD_RED}Error during benchmarking: {e}{colors.RESET}") + if process: + with contextlib.suppress(Exception): + stdout, stderr = process.communicate(timeout=1) + if stdout: + print( + f"{colors.CYAN}Process STDOUT:{colors.RESET} {stdout.decode()}" + ) + if stderr: + print( + f"{colors.RED}Process STDERR:{colors.RESET} {stderr.decode()}" + ) + raise + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From e9d845b41dca9ad84b76ef777d05e647a4b4d8cd Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Wed, 28 May 2025 14:18:34 -0700 Subject: [PATCH 401/989] gh-134262: Add retries to downloads in PCbuild\get_external.py (GH-134820) --- PCbuild/get_external.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/PCbuild/get_external.py b/PCbuild/get_external.py index 4ecc8925349c93..99aff63882f5ba 100755 --- a/PCbuild/get_external.py +++ b/PCbuild/get_external.py @@ -9,6 +9,25 @@ from urllib.request import urlretrieve +def retrieve_with_retries(download_location, output_path, reporthook, + max_retries=7): + """Download a file with exponential backoff retry and save to disk.""" + for attempt in range(max_retries): + try: + resp = urlretrieve( + download_location, + output_path, + reporthook=reporthook, + ) + except ConnectionError as ex: + if attempt == max_retries: + msg = f"Download from {download_location} failed." + raise OSError(msg) from ex + time.sleep(2.25**attempt) + else: + return resp + + def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): repo = f'cpython-{"bin" if binary else "source"}-deps' url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip' @@ -16,10 +35,10 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): if verbose: reporthook = print zip_dir.mkdir(parents=True, exist_ok=True) - filename, headers = urlretrieve( + filename, _headers = retrieve_with_retries( url, zip_dir / f'{commit_hash}.zip', - reporthook=reporthook, + reporthook ) return filename From e64395e8eb8d3a9e35e3e534e87d427ff27ab0a5 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Wed, 28 May 2025 16:15:39 -0700 Subject: [PATCH 402/989] gh-134262: Fix off by one errors in download retry functions (GH-134867) --- PCbuild/get_external.py | 2 +- Tools/build/generate_sbom.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/PCbuild/get_external.py b/PCbuild/get_external.py index 99aff63882f5ba..8c1155c74a642c 100755 --- a/PCbuild/get_external.py +++ b/PCbuild/get_external.py @@ -12,7 +12,7 @@ def retrieve_with_retries(download_location, output_path, reporthook, max_retries=7): """Download a file with exponential backoff retry and save to disk.""" - for attempt in range(max_retries): + for attempt in range(max_retries + 1): try: resp = urlretrieve( download_location, diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index ecb7b54f6d8a13..df52f8de762a01 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -169,7 +169,7 @@ def download_with_retries(download_location: str, base_delay: float = 2.25, max_jitter: float = 1.0) -> typing.Any: """Download a file with exponential backoff retry.""" - for attempt in range(max_retries): + for attempt in range(max_retries + 1): try: resp = urllib.request.urlopen(download_location) except urllib.error.URLError as ex: From b783e1791b2076ef560104f8207156d75859c0a6 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 29 May 2025 14:16:20 +0100 Subject: [PATCH 403/989] gh-69011: : clarify & deduplicate `ctypes.create_*_buffer` docs (GH-132858) This adds a warning about the possibly-missing NUL terminator, but in a way that doesn't make it sound like a bug/wart. --- Doc/library/ctypes.rst | 52 +++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index 5b733d5321e907..8e74c6c9dee744 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -2031,35 +2031,55 @@ Utility functions pointer. -.. function:: create_string_buffer(init_or_size, size=None) +.. function:: create_string_buffer(init, size=None) + create_string_buffer(size) This function creates a mutable character buffer. The returned object is a ctypes array of :class:`c_char`. - *init_or_size* must be an integer which specifies the size of the array, or a - bytes object which will be used to initialize the array items. + If *size* is given (and not ``None``), it must be an :class:`int`. + It specifies the size of the returned array. - If a bytes object is specified as first argument, the buffer is made one item - larger than its length so that the last element in the array is a NUL - termination character. An integer can be passed as second argument which allows - specifying the size of the array if the length of the bytes should not be used. + If the *init* argument is given, it must be :class:`bytes`. It is used + to initialize the array items. Bytes not initialized this way are + set to zero (NUL). + + If *size* is not given (or if it is ``None``), the buffer is made one element + larger than *init*, effectively adding a NUL terminator. + + If both arguments are given, *size* must not be less than ``len(init)``. + + .. warning:: + + If *size* is equal to ``len(init)``, a NUL terminator is + not added. Do not treat such a buffer as a C string. + + For example:: + + >>> bytes(create_string_buffer(2)) + b'\x00\x00' + >>> bytes(create_string_buffer(b'ab')) + b'ab\x00' + >>> bytes(create_string_buffer(b'ab', 2)) + b'ab' + >>> bytes(create_string_buffer(b'ab', 4)) + b'ab\x00\x00' + >>> bytes(create_string_buffer(b'abcdef', 2)) + Traceback (most recent call last): + ... + ValueError: byte string too long .. audit-event:: ctypes.create_string_buffer init,size ctypes.create_string_buffer -.. function:: create_unicode_buffer(init_or_size, size=None) +.. function:: create_unicode_buffer(init, size=None) + create_unicode_buffer(size) This function creates a mutable unicode character buffer. The returned object is a ctypes array of :class:`c_wchar`. - *init_or_size* must be an integer which specifies the size of the array, or a - string which will be used to initialize the array items. - - If a string is specified as first argument, the buffer is made one item - larger than the length of the string so that the last element in the array is a - NUL termination character. An integer can be passed as second argument which - allows specifying the size of the array if the length of the string should not - be used. + The function takes the same arguments as :func:`~create_string_buffer` except + *init* must be a string and *size* counts :class:`c_wchar`. .. audit-event:: ctypes.create_unicode_buffer init,size ctypes.create_unicode_buffer From cafbcd666a7488e9f72553708350b7091dc913fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 29 May 2025 15:28:57 +0200 Subject: [PATCH 404/989] gh-133866: remove deprecated and undocumented function `ctypes.SetPointerType` (GH-133869) --- Doc/whatsnew/3.15.rst | 8 ++++++++ Lib/ctypes/__init__.py | 6 ------ Lib/test/test_ctypes/test_incomplete.py | 9 +++------ .../2025-05-11-10-01-48.gh-issue-133866.g3dHP_.rst | 3 +++ 4 files changed, 14 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-11-10-01-48.gh-issue-133866.g3dHP_.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 87cca4eeff385a..ced9c63071a53c 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -138,6 +138,14 @@ Deprecated Removed ======= +ctypes +------ + +* Removed the undocumented function :func:`!ctypes.SetPointerType`, + which has been deprecated since Python 3.13. + (Contributed by Bénédikt Tran in :gh:`133866`.) + + http.server ----------- diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index 823a3692fd1bbf..d6d07a13f756e2 100644 --- a/Lib/ctypes/__init__.py +++ b/Lib/ctypes/__init__.py @@ -379,12 +379,6 @@ def create_unicode_buffer(init, size=None): return buf raise TypeError(init) - -def SetPointerType(pointer, cls): - import warnings - warnings._deprecated("ctypes.SetPointerType", remove=(3, 15)) - pointer.set_type(cls) - def ARRAY(typ, len): return typ * len diff --git a/Lib/test/test_ctypes/test_incomplete.py b/Lib/test/test_ctypes/test_incomplete.py index fefdfe9102e668..2f344611995b2c 100644 --- a/Lib/test/test_ctypes/test_incomplete.py +++ b/Lib/test/test_ctypes/test_incomplete.py @@ -21,9 +21,7 @@ class cell(Structure): _fields_ = [("name", c_char_p), ("next", lpcell)] - with warnings.catch_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - ctypes.SetPointerType(lpcell, cell) + lpcell.set_type(cell) self.assertIs(POINTER(cell), lpcell) @@ -50,10 +48,9 @@ class cell(Structure): _fields_ = [("name", c_char_p), ("next", lpcell)] - with self.assertWarns(DeprecationWarning): - ctypes.SetPointerType(lpcell, cell) - + lpcell.set_type(cell) self.assertIs(POINTER(cell), lpcell) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-05-11-10-01-48.gh-issue-133866.g3dHP_.rst b/Misc/NEWS.d/next/Library/2025-05-11-10-01-48.gh-issue-133866.g3dHP_.rst new file mode 100644 index 00000000000000..00f13c9a305eb5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-11-10-01-48.gh-issue-133866.g3dHP_.rst @@ -0,0 +1,3 @@ +Remove the undocumented function :func:`!ctypes.SetPointerType`, +which has been deprecated since Python 3.13. +Patch by Bénédikt Tran. From 4109a9c6b33faa0032ffc95d96cd0db482af3ce2 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 29 May 2025 15:36:59 +0200 Subject: [PATCH 405/989] gh-133260: Remove claim that PyUnicode_InternFromString immortalizes (GH-134213) --- Doc/c-api/unicode.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index cdd90d05b70b36..684432da81c61f 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1726,10 +1726,6 @@ They all return ``NULL`` or ``-1`` if an exception occurs. from user input, prefer calling :c:func:`PyUnicode_FromString` and :c:func:`PyUnicode_InternInPlace` directly. - .. impl-detail:: - - Strings interned this way are made :term:`immortal`. - .. c:function:: unsigned int PyUnicode_CHECK_INTERNED(PyObject *str) From f49a07b531543dd8a42d90f5b1c89c0312fbf806 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 29 May 2025 16:54:30 +0200 Subject: [PATCH 406/989] gh-133968: Add PyUnicodeWriter_WriteASCII() function (#133973) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace most PyUnicodeWriter_WriteUTF8() calls with PyUnicodeWriter_WriteASCII(). Unrelated change to please the linter: remove an unused import in test_ctypes. Co-authored-by: Peter Bierma Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/c-api/unicode.rst | 17 +++++++++++++- Doc/whatsnew/3.15.rst | 7 ++++++ Include/cpython/unicodeobject.h | 4 ++++ Lib/test/test_capi/test_unicode.py | 7 ++++++ Lib/test/test_ctypes/test_incomplete.py | 1 - ...-05-13-16-06-46.gh-issue-133968.6alWst.rst | 4 ++++ Modules/_json.c | 10 ++++----- Modules/_ssl.c | 2 +- Modules/_testcapi/unicode.c | 22 +++++++++++++++++++ Objects/genericaliasobject.c | 6 ++--- Objects/typevarobject.c | 4 ++-- Objects/unicodeobject.c | 14 ++++++++++++ Objects/unionobject.c | 8 +++---- Parser/asdl_c.py | 6 ++--- Python/Python-ast.c | 6 ++--- Python/context.c | 10 ++++----- Python/hamt.c | 6 ++--- 17 files changed, 103 insertions(+), 31 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 684432da81c61f..ef180464ef1688 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1802,9 +1802,24 @@ object. See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`. +.. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const char *str, Py_ssize_t size) + + Write the ASCII string *str* into *writer*. + + *size* is the string length in bytes. If *size* is equal to ``-1``, call + ``strlen(str)`` to get the string length. + + *str* must only contain ASCII characters. The behavior is undefined if + *str* contains non-ASCII characters. + + On success, return ``0``. + On error, set an exception, leave the writer unchanged, and return ``-1``. + + .. versionadded:: next + .. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size) - Writer the wide string *str* into *writer*. + Write the wide string *str* into *writer*. *size* is a number of wide characters. If *size* is equal to ``-1``, call ``wcslen(str)`` to get the string length. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index ced9c63071a53c..6d1f653f086a15 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -226,6 +226,13 @@ New features functions as replacements for :c:func:`PySys_GetObject`. (Contributed by Serhiy Storchaka in :gh:`108512`.) +* Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string + into a :c:type:`PyUnicodeWriter`. The function is faster than + :c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the + input string contains non-ASCII characters. + (Contributed by Victor Stinner in :gh:`133968`.) + + Porting to Python 3.15 ---------------------- diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 136f5d5c5f8425..3d0414f5291fe4 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -478,6 +478,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8( PyUnicodeWriter *writer, const char *str, Py_ssize_t size); +PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII( + PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size); PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar( PyUnicodeWriter *writer, const wchar_t *str, diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 3408c10f426058..c8be4f3faa9483 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1776,6 +1776,13 @@ def test_utf8(self): self.assertEqual(writer.finish(), "ascii-latin1=\xE9-euro=\u20AC.") + def test_ascii(self): + writer = self.create_writer(0) + writer.write_ascii(b"Hello ", -1) + writer.write_ascii(b"", 0) + writer.write_ascii(b"Python! ", 6) + self.assertEqual(writer.finish(), "Hello Python") + def test_invalid_utf8(self): writer = self.create_writer(0) with self.assertRaises(UnicodeDecodeError): diff --git a/Lib/test/test_ctypes/test_incomplete.py b/Lib/test/test_ctypes/test_incomplete.py index 2f344611995b2c..3189fcd1bd1330 100644 --- a/Lib/test/test_ctypes/test_incomplete.py +++ b/Lib/test/test_ctypes/test_incomplete.py @@ -1,6 +1,5 @@ import ctypes import unittest -import warnings from ctypes import Structure, POINTER, pointer, c_char_p # String-based "incomplete pointers" were implemented in ctypes 0.6.3 (2003, when diff --git a/Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst b/Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst new file mode 100644 index 00000000000000..47d5a3bda39942 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst @@ -0,0 +1,4 @@ +Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string +into a :c:type:`PyUnicodeWriter`. The function is faster than +:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the +input string contains non-ASCII characters. Patch by Victor Stinner. diff --git a/Modules/_json.c b/Modules/_json.c index 89b0a41dd10acb..4aa6ae650651b3 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1476,13 +1476,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return PyUnicodeWriter_WriteUTF8(writer, "null", 4); + return PyUnicodeWriter_WriteASCII(writer, "null", 4); } else if (obj == Py_True) { - return PyUnicodeWriter_WriteUTF8(writer, "true", 4); + return PyUnicodeWriter_WriteASCII(writer, "true", 4); } else if (obj == Py_False) { - return PyUnicodeWriter_WriteUTF8(writer, "false", 5); + return PyUnicodeWriter_WriteASCII(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1649,7 +1649,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, if (PyDict_GET_SIZE(dct) == 0) { /* Fast path */ - return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + return PyUnicodeWriter_WriteASCII(writer, "{}", 2); } if (s->markers != Py_None) { @@ -1753,7 +1753,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); + return PyUnicodeWriter_WriteASCII(writer, "[]", 2); } if (s->markers != Py_None) { diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 976da1340ecf1e..014e624f6c2f00 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -563,7 +563,7 @@ fill_and_set_sslerror(_sslmodulestate *state, goto fail; } } - if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) { goto fail; } } diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index b8ecf53f4f8b9c..e70f5c68bc3b69 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -332,6 +332,27 @@ writer_write_utf8(PyObject *self_raw, PyObject *args) } +static PyObject* +writer_write_ascii(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "yn", &str, &size)) { + return NULL; + } + + if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + static PyObject* writer_write_widechar(PyObject *self_raw, PyObject *args) { @@ -513,6 +534,7 @@ writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) static PyMethodDef writer_methods[] = { {"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS}, {"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS}, + {"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS}, {"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS}, {"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS}, {"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS}, diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index ec3d01f00a3c3c..07b57f0c552ce9 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -65,7 +65,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p) for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { return -1; } } @@ -109,7 +109,7 @@ ga_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -126,7 +126,7 @@ ga_repr(PyObject *self) } if (len == 0) { // for something like tuple[()] we should print a "()" - if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) { goto error; } } diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c index 6c199a52aa0ae6..cead6e69af5451 100644 --- a/Objects/typevarobject.c +++ b/Objects/typevarobject.c @@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, PyObject *kwargs) for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) { PyObject *item = PyTuple_GET_ITEM(value, i); if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { PyUnicodeWriter_Discard(writer); return NULL; } @@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p) } if (p == (PyObject *)&_PyNone_Type) { - return PyUnicodeWriter_WriteUTF8(writer, "None", 4); + return PyUnicodeWriter_WriteASCII(writer, "None", 4); } if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 && diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index aa94fb91e65fc3..5611f839627a2e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14083,6 +14083,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, return 0; } + +int +PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size) +{ + assert(writer != NULL); + _Py_AssertHoldsTstate(); + + _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer; + return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size); +} + + int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, diff --git a/Objects/unionobject.c b/Objects/unionobject.c index 66435924b6c6c3..00ca5b9bf80341 100644 --- a/Objects/unionobject.c +++ b/Objects/unionobject.c @@ -290,7 +290,7 @@ union_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { - if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) { + if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) { goto error; } PyObject *p = PyTuple_GET_ITEM(alias->args, i); @@ -300,12 +300,12 @@ union_repr(PyObject *self) } #if 0 - PyUnicodeWriter_WriteUTF8(writer, "|args=", 6); + PyUnicodeWriter_WriteASCII(writer, "|args=", 6); PyUnicodeWriter_WriteRepr(writer, alias->args); - PyUnicodeWriter_WriteUTF8(writer, "|h=", 3); + PyUnicodeWriter_WriteASCII(writer, "|h=", 3); PyUnicodeWriter_WriteRepr(writer, alias->hashable_args); if (alias->unhashable_args) { - PyUnicodeWriter_WriteUTF8(writer, "|u=", 3); + PyUnicodeWriter_WriteASCII(writer, "|u=", 3); PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args); } #endif diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 22dcfe1b0d99bf..dba20226c3283a 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -1512,7 +1512,7 @@ def visitModule(self, mod): for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -1536,7 +1536,7 @@ def visitModule(self, mod): } if (i == 0 && length > 2) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) { goto error; } } @@ -1640,7 +1640,7 @@ def visitModule(self, mod): } if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { Py_DECREF(name); Py_DECREF(value_repr); goto error; diff --git a/Python/Python-ast.c b/Python/Python-ast.c index f7625ab1205bdc..660bc598a4862c 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -5796,7 +5796,7 @@ ast_repr_list(PyObject *list, int depth) for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -5820,7 +5820,7 @@ ast_repr_list(PyObject *list, int depth) } if (i == 0 && length > 2) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) { goto error; } } @@ -5924,7 +5924,7 @@ ast_repr_max_depth(AST_object *self, int depth) } if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { Py_DECREF(name); Py_DECREF(value_repr); goto error; diff --git a/Python/context.c b/Python/context.c index dceaae9b42979d..9927cab915cae7 100644 --- a/Python/context.c +++ b/Python/context.c @@ -979,7 +979,7 @@ contextvar_tp_repr(PyObject *op) return NULL; } - if (PyUnicodeWriter_WriteUTF8(writer, "tok_used) { - if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) { goto error; } } - if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) { goto error; } if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) { diff --git a/Python/hamt.c b/Python/hamt.c index f9bbf63961d8de..906149cc6cdbdc 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -1176,7 +1176,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } if (key_or_null == NULL) { - if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "NULL:\n", 6) < 0) { goto error; } @@ -1194,7 +1194,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } } - if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) { goto error; } } @@ -1915,7 +1915,7 @@ hamt_node_array_dump(PyHamtNode_Array *node, goto error; } - if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) { goto error; } } From d96343679fd6137c9d87d1bb120228b162ea0f8c Mon Sep 17 00:00:00 2001 From: Collin Funk Date: Thu, 29 May 2025 10:35:53 -0700 Subject: [PATCH 407/989] gh-134771: Fix time_clockid_converter() on Cygwin (#134772) Use long for clockid_t instead of int. --- .../next/Library/2025-05-26-22-18-32.gh-issue-134771.RKXpLT.rst | 2 ++ Modules/timemodule.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-26-22-18-32.gh-issue-134771.RKXpLT.rst diff --git a/Misc/NEWS.d/next/Library/2025-05-26-22-18-32.gh-issue-134771.RKXpLT.rst b/Misc/NEWS.d/next/Library/2025-05-26-22-18-32.gh-issue-134771.RKXpLT.rst new file mode 100644 index 00000000000000..4b70c6ef398793 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-26-22-18-32.gh-issue-134771.RKXpLT.rst @@ -0,0 +1,2 @@ +The ``time_clockid_converter()`` function now selects correct type for +``clockid_t`` on Cygwin which fixes a build error. diff --git a/Modules/timemodule.c b/Modules/timemodule.c index 1bfbf3f6a0b991..3271d87ddc27f2 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -187,7 +187,7 @@ time_clockid_converter(PyObject *obj, clockid_t *p) { #ifdef _AIX long long clk_id = PyLong_AsLongLong(obj); -#elif defined(__DragonFly__) +#elif defined(__DragonFly__) || defined(__CYGWIN__) long clk_id = PyLong_AsLong(obj); #else int clk_id = PyLong_AsInt(obj); From dafd14146f7ca18932894ea445a2f9f98f2a8b01 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 29 May 2025 14:13:12 -0600 Subject: [PATCH 408/989] gh-132775: Fix _PyFunctIon_VerifyStateless() (#134900) The problem we're fixing here is that we were using PyDict_Size() on "defaults", which it is actually a tuple. We're also adding some explicit type checks. This is a follow-up to gh-133221/gh-133528. --- Objects/funcobject.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 27214a129c2fb8..f87b0e5d8f1e47 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1264,26 +1264,32 @@ _PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func) } // Disallow __defaults__. PyObject *defaults = PyFunction_GET_DEFAULTS(func); - if (defaults != NULL && defaults != Py_None && PyDict_Size(defaults) > 0) - { - _PyErr_SetString(tstate, PyExc_ValueError, "defaults not supported"); - return -1; + if (defaults != NULL) { + assert(PyTuple_Check(defaults)); // per PyFunction_New() + if (PyTuple_GET_SIZE(defaults) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, + "defaults not supported"); + return -1; + } } // Disallow __kwdefaults__. PyObject *kwdefaults = PyFunction_GET_KW_DEFAULTS(func); - if (kwdefaults != NULL && kwdefaults != Py_None - && PyDict_Size(kwdefaults) > 0) - { - _PyErr_SetString(tstate, PyExc_ValueError, - "keyword defaults not supported"); - return -1; + if (kwdefaults != NULL) { + assert(PyDict_Check(kwdefaults)); // per PyFunction_New() + if (PyDict_Size(kwdefaults) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, + "keyword defaults not supported"); + return -1; + } } // Disallow __closure__. PyObject *closure = PyFunction_GET_CLOSURE(func); - if (closure != NULL && closure != Py_None && PyTuple_GET_SIZE(closure) > 0) - { - _PyErr_SetString(tstate, PyExc_ValueError, "closures not supported"); - return -1; + if (closure != NULL) { + assert(PyTuple_Check(closure)); // per PyFunction_New() + if (PyTuple_GET_SIZE(closure) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, "closures not supported"); + return -1; + } } // Check the code. PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); From cb8a72b301f47e76d93a7fe5b259e9a5758792e1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 30 May 2025 00:32:44 +0300 Subject: [PATCH 409/989] gh-134857: Improve error report for doctests run with unittest (GH-134858) Remove doctest module frames from tracebacks and redundant newline character from a failure message. --- Lib/doctest.py | 24 ++-- Lib/test/test_doctest/test_doctest.py | 130 +----------------- ...-05-28-20-49-29.gh-issue-134857.dVYXVO.rst | 3 + 3 files changed, 22 insertions(+), 135 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-28-20-49-29.gh-issue-134857.dVYXVO.rst diff --git a/Lib/doctest.py b/Lib/doctest.py index 2acb6cb79f394d..dec10a345165da 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -108,6 +108,8 @@ def _test(): from _colorize import ANSIColors, can_colorize +__unittest = True + class TestResults(namedtuple('TestResults', 'failed attempted')): def __new__(cls, failed, attempted, *, skipped=0): results = super().__new__(cls, failed, attempted) @@ -1395,11 +1397,11 @@ def __run(self, test, compileflags, out): exec(compile(example.source, filename, "single", compileflags, True), test.globs) self.debugger.set_continue() # ==== Example Finished ==== - exception = None + exc_info = None except KeyboardInterrupt: raise - except: - exception = sys.exc_info() + except BaseException as exc: + exc_info = type(exc), exc, exc.__traceback__.tb_next self.debugger.set_continue() # ==== Example Finished ==== got = self._fakeout.getvalue() # the actual output @@ -1408,21 +1410,21 @@ def __run(self, test, compileflags, out): # If the example executed without raising any exceptions, # verify its output. - if exception is None: + if exc_info is None: if check(example.want, got, self.optionflags): outcome = SUCCESS # The example raised an exception: check if it was expected. else: - formatted_ex = traceback.format_exception_only(*exception[:2]) - if issubclass(exception[0], SyntaxError): + formatted_ex = traceback.format_exception_only(*exc_info[:2]) + if issubclass(exc_info[0], SyntaxError): # SyntaxError / IndentationError is special: # we don't care about the carets / suggestions / etc # We only care about the error message and notes. # They start with `SyntaxError:` (or any other class name) exception_line_prefixes = ( - f"{exception[0].__qualname__}:", - f"{exception[0].__module__}.{exception[0].__qualname__}:", + f"{exc_info[0].__qualname__}:", + f"{exc_info[0].__module__}.{exc_info[0].__qualname__}:", ) exc_msg_index = next( index @@ -1433,7 +1435,7 @@ def __run(self, test, compileflags, out): exc_msg = "".join(formatted_ex) if not quiet: - got += _exception_traceback(exception) + got += _exception_traceback(exc_info) # If `example.exc_msg` is None, then we weren't expecting # an exception. @@ -1462,7 +1464,7 @@ def __run(self, test, compileflags, out): elif outcome is BOOM: if not quiet: self.report_unexpected_exception(out, test, example, - exception) + exc_info) failures += 1 else: assert False, ("unknown outcome", outcome) @@ -2324,7 +2326,7 @@ def runTest(self): sys.stdout = old if results.failed: - raise self.failureException(self.format_failure(new.getvalue())) + raise self.failureException(self.format_failure(new.getvalue().rstrip('\n'))) def format_failure(self, err): test = self._dt_test diff --git a/Lib/test/test_doctest/test_doctest.py b/Lib/test/test_doctest/test_doctest.py index c5b247797c321d..2bfaa6c599cd47 100644 --- a/Lib/test/test_doctest/test_doctest.py +++ b/Lib/test/test_doctest/test_doctest.py @@ -2411,9 +2411,6 @@ def test_DocTestSuite_errors(): >>> result >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - Traceback (most recent call last): - File ... - raise self.failureException(self.format_failure(new.getvalue())) AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors File "...sample_doctest_errors.py", line 0, in sample_doctest_errors @@ -2431,21 +2428,12 @@ def test_DocTestSuite_errors(): 1/0 Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in 1/0 ~^~ ZeroDivisionError: division by zero - >>> print(result.failures[1][1]) # doctest: +ELLIPSIS - Traceback (most recent call last): - File ... - raise self.failureException(self.format_failure(new.getvalue())) AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.__test__.bad File "...sample_doctest_errors.py", line unknown line number, in bad @@ -2463,21 +2451,12 @@ def test_DocTestSuite_errors(): 1/0 Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in 1/0 ~^~ ZeroDivisionError: division by zero - >>> print(result.failures[2][1]) # doctest: +ELLIPSIS - Traceback (most recent call last): - File ... - raise self.failureException(self.format_failure(new.getvalue())) AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.errors File "...sample_doctest_errors.py", line 14, in errors @@ -2495,11 +2474,6 @@ def test_DocTestSuite_errors(): 1/0 Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in 1/0 ~^~ @@ -2510,11 +2484,6 @@ def test_DocTestSuite_errors(): f() Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in f() ~^^ @@ -2528,11 +2497,6 @@ def test_DocTestSuite_errors(): g() Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in g() ~^^ @@ -2541,11 +2505,7 @@ def test_DocTestSuite_errors(): ~~^^^ IndexError: list index out of range - >>> print(result.failures[3][1]) # doctest: +ELLIPSIS - Traceback (most recent call last): - File ... - raise self.failureException(self.format_failure(new.getvalue())) AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.syntax_error File "...sample_doctest_errors.py", line 29, in syntax_error @@ -2554,18 +2514,11 @@ def test_DocTestSuite_errors(): Failed example: 2+*3 Exception raised: - Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^ File "", line 1 2+*3 ^ SyntaxError: invalid syntax - """ def test_DocFileSuite(): @@ -2740,9 +2693,6 @@ def test_DocFileSuite_errors(): >>> result >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - Traceback (most recent call last): - File ... - raise self.failureException(self.format_failure(new.getvalue())) AssertionError: Failed doctest test for test_doctest_errors.txt File "...test_doctest_errors.txt", line 0 @@ -2760,11 +2710,6 @@ def test_DocFileSuite_errors(): 1/0 Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in 1/0 ~^~ @@ -2775,11 +2720,6 @@ def test_DocFileSuite_errors(): f() Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in f() ~^^ @@ -2792,18 +2732,11 @@ def test_DocFileSuite_errors(): Failed example: 2+*3 Exception raised: - Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^ File "", line 1 2+*3 ^ SyntaxError: invalid syntax - """ def test_trailing_space_in_test(): @@ -2876,7 +2809,8 @@ def test_unittest_reportflags(): >>> result >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - Traceback ... + AssertionError: Failed doctest test for test_doctest.txt + ... Failed example: favorite_color ... @@ -2895,14 +2829,14 @@ def test_unittest_reportflags(): >>> result >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - Traceback ... + AssertionError: Failed doctest test for test_doctest.txt + ... Failed example: favorite_color Exception raised: ... NameError: name 'favorite_color' is not defined - We get only the first failure. @@ -2922,7 +2856,8 @@ def test_unittest_reportflags(): the trailing whitespace using `\x20` in the diff below. >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - Traceback ... + AssertionError: Failed doctest test for test_doctest.txt + ... Failed example: favorite_color ... @@ -2937,7 +2872,6 @@ def test_unittest_reportflags(): +\x20 b - Test runners can restore the formatting flags after they run: @@ -3145,11 +3079,6 @@ def test_testfile_errors(): r""" 1/0 Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in 1/0 ~^~ @@ -3160,11 +3089,6 @@ def test_testfile_errors(): r""" f() Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in f() ~^^ @@ -3177,12 +3101,6 @@ def test_testfile_errors(): r""" Failed example: 2+*3 Exception raised: - Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^ File "", line 1 2+*3 ^ @@ -3343,11 +3261,6 @@ def test_testmod_errors(): r""" 1/0 Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in 1/0 ~^~ @@ -3366,11 +3279,6 @@ def test_testmod_errors(): r""" 1/0 Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in 1/0 ~^~ @@ -3389,11 +3297,6 @@ def test_testmod_errors(): r""" 1/0 Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in 1/0 ~^~ @@ -3404,11 +3307,6 @@ def test_testmod_errors(): r""" f() Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in f() ~^^ @@ -3422,11 +3320,6 @@ def test_testmod_errors(): r""" g() Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in g() ~^^ @@ -3439,12 +3332,6 @@ def test_testmod_errors(): r""" Failed example: 2+*3 Exception raised: - Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^ File "", line 1 2+*3 ^ @@ -3490,11 +3377,6 @@ def test_unicode(): """ raise Exception('clé') Exception raised: Traceback (most recent call last): - File ... - exec(compile(example.source, filename, "single", - ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - compileflags, True), test.globs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 1, in raise Exception('clé') Exception: clé diff --git a/Misc/NEWS.d/next/Library/2025-05-28-20-49-29.gh-issue-134857.dVYXVO.rst b/Misc/NEWS.d/next/Library/2025-05-28-20-49-29.gh-issue-134857.dVYXVO.rst new file mode 100644 index 00000000000000..92e38c0bb5ac87 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-28-20-49-29.gh-issue-134857.dVYXVO.rst @@ -0,0 +1,3 @@ +Improve error report for :mod:`doctest`\ s run with :mod:`unittest`. Remove +:mod:`!doctest` module frames from tracebacks and redundant newline +character from a failure message. From a4251411a97304ab001721c6231d86ddf4eac3f0 Mon Sep 17 00:00:00 2001 From: Savannah Bailey Date: Thu, 29 May 2025 20:34:34 -0700 Subject: [PATCH 410/989] GH-106235: Clarify `parse_known_args` documentation by removing "remaining" (#126921) --- Doc/library/argparse.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 29396c7a0366a1..17f126cc065a82 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -2122,12 +2122,15 @@ Partial parsing .. method:: ArgumentParser.parse_known_args(args=None, namespace=None) - Sometimes a script may only parse a few of the command-line arguments, passing - the remaining arguments on to another script or program. In these cases, the - :meth:`~ArgumentParser.parse_known_args` method can be useful. It works much like - :meth:`~ArgumentParser.parse_args` except that it does not produce an error when - extra arguments are present. Instead, it returns a two item tuple containing - the populated namespace and the list of remaining argument strings. + Sometimes a script only needs to handle a specific set of command-line + arguments, leaving any unrecognized arguments for another script or program. + In these cases, the :meth:`~ArgumentParser.parse_known_args` method can be + useful. + + This method works similarly to :meth:`~ArgumentParser.parse_args`, but it does + not raise an error for extra, unrecognized arguments. Instead, it parses the + known arguments and returns a two item tuple that contains the populated + namespace and the list of any unrecognized arguments. :: From 381020d41fb1f8b33421f01c609ba0d0edb99764 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Thu, 29 May 2025 21:11:20 -0700 Subject: [PATCH 411/989] ast docs: Fix description of ast.Constant (#134741) Contrary to the current docs, ast.Constant will never hold containers such as frozenset or tuple; the Python parser only emits it for simple literals. For precision, add the exact list of types that may be contained in an ast.Constant. --- Doc/library/ast.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index ca9a6b0712c9a2..cf22250cac6091 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -268,9 +268,9 @@ Literals .. class:: Constant(value) A constant value. The ``value`` attribute of the ``Constant`` literal contains the - Python object it represents. The values represented can be simple types - such as a number, string or ``None``, but also immutable container types - (tuples and frozensets) if all of their elements are constant. + Python object it represents. The values represented can be instances of :class:`str`, + :class:`bytes`, :class:`int`, :class:`float`, :class:`complex`, and :class:`bool`, + and the constants :data:`None` and :data:`Ellipsis`. .. doctest:: From 5f60d0fcccbf6676f5bc924f05452bd5321446f0 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Thu, 29 May 2025 21:37:43 -0700 Subject: [PATCH 412/989] gh-134906: Document CompressionParameter.content_size_flag (#134907) * Document CompressionParameter.content_size_flag --- Doc/library/compression.zstd.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Doc/library/compression.zstd.rst b/Doc/library/compression.zstd.rst index 1e1802155a19ec..35bcbc2bfd8eac 100644 --- a/Doc/library/compression.zstd.rst +++ b/Doc/library/compression.zstd.rst @@ -615,6 +615,24 @@ Advanced parameter control A value of zero causes the value to be selected automatically. + .. attribute:: content_size_flag + + Write the size of the data to be compressed into the Zstandard frame + header when known prior to compressing. + + This flag only takes effect under the following two scenarios: + + * Calling :func:`compress` for one-shot compression + * Providing all of the data to be compressed in the frame in a single + :meth:`ZstdCompressor.compress` call, with the + :attr:`ZstdCompressor.FLUSH_FRAME` mode. + + All other compression calls may not write the size information into the + frame header. + + ``True`` or ``1`` enable the content size flag while ``False`` or ``0`` + disable it. + .. attribute:: checksum_flag A four-byte checksum using XXHash64 of the uncompressed content is From 2f2bee21118adce653ee5bc4eb31d30327465966 Mon Sep 17 00:00:00 2001 From: Sam James Date: Fri, 30 May 2025 05:42:19 +0100 Subject: [PATCH 413/989] gh-134768: Fix definition of `mt_continue_should_break()` (#134769) In 121ed71f4e395948d313249b2ad33e1e21581f8a, mt_continue_should_break was changed to be guarded by `Py_DEBUG`, but it's used in `compress_mt_continue_lock_held` with just `assert`, so it needs to be available when `NDEBUG` is undefined too. `Py_DEBUG` implies `NDEBUG` is undefined, so we can check just that. Fixes: 121ed71f4e395948d313249b2ad33e1e21581f8a --- Modules/_zstd/compressor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 0fc3d7d36c68fe..8ff2a3aadc1cd6 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -512,7 +512,7 @@ compress_lock_held(ZstdCompressor *self, Py_buffer *data, return NULL; } -#ifdef Py_DEBUG +#ifndef NDEBUG static inline int mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) { From b367e27af9b52528e395f95b277ec7b69e98e287 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 30 May 2025 17:59:23 +0900 Subject: [PATCH 414/989] Doc: remove unnecessary section header (GH-134917) --- Doc/c-api/unicode.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index ef180464ef1688..45f50ba5f97d26 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1461,10 +1461,6 @@ the user settings on the machine running the codec. .. versionadded:: 3.3 -Methods & Slots -""""""""""""""" - - .. _unicodemethodsandslots: Methods and Slot Functions From 45c6c48afc13f9897010e32171a3e02d0624258c Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 30 May 2025 02:30:05 -0700 Subject: [PATCH 415/989] gh-134885: zstd: Use Py_XSETREF (GH-134886) --- .../2025-05-29-06-53-40.gh-issue-134885.-_L22o.rst | 2 ++ Modules/_zstd/_zstdmodule.c | 11 ++++------- 2 files changed, 6 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-29-06-53-40.gh-issue-134885.-_L22o.rst diff --git a/Misc/NEWS.d/next/Library/2025-05-29-06-53-40.gh-issue-134885.-_L22o.rst b/Misc/NEWS.d/next/Library/2025-05-29-06-53-40.gh-issue-134885.-_L22o.rst new file mode 100644 index 00000000000000..4b05d42c109d06 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-29-06-53-40.gh-issue-134885.-_L22o.rst @@ -0,0 +1,2 @@ +Fix possible crash in the :mod:`compression.zstd` module related to setting +parameter types. Patch by Jelle Zijlstra. diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 5ad697d2b83dd6..986b3579479f0f 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -514,13 +514,10 @@ _zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, return NULL; } - Py_XDECREF(mod_state->CParameter_type); - Py_INCREF(c_parameter_type); - mod_state->CParameter_type = (PyTypeObject*)c_parameter_type; - - Py_XDECREF(mod_state->DParameter_type); - Py_INCREF(d_parameter_type); - mod_state->DParameter_type = (PyTypeObject*)d_parameter_type; + Py_XSETREF( + mod_state->CParameter_type, (PyTypeObject*)Py_NewRef(c_parameter_type)); + Py_XSETREF( + mod_state->DParameter_type, (PyTypeObject*)Py_NewRef(d_parameter_type)); Py_RETURN_NONE; } From ebf6d13567287d04683dab36f52cde7a3c9915e7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 30 May 2025 12:15:47 +0200 Subject: [PATCH 416/989] gh-134745: Change PyThread_allocate_lock() implementation to PyMutex (#134747) Co-authored-by: Sam Gross --- Include/internal/pycore_lock.h | 3 + Lib/test/test_sys.py | 2 +- ...-05-30-11-33-17.gh-issue-134745.GN-zk2.rst | 3 + Python/lock.c | 3 + Python/thread.c | 82 +++- Python/thread_nt.h | 92 ---- Python/thread_pthread.h | 392 ------------------ 7 files changed, 86 insertions(+), 491 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-05-30-11-33-17.gh-issue-134745.GN-zk2.rst diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 7484b05d7f2446..32b60cc33a21f1 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -48,6 +48,9 @@ typedef enum _PyLockFlags { // Handle signals if interrupted while waiting on the lock. _PY_LOCK_HANDLE_SIGNALS = 2, + + // Fail if interrupted by a signal while waiting on the lock. + _PY_FAIL_IF_INTERRUPTED = 4, } _PyLockFlags; // Lock a mutex with an optional timeout and additional options. See diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 795d1ecbb59f8f..65d15610ed1505 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -729,7 +729,7 @@ def test_thread_info(self): info = sys.thread_info self.assertEqual(len(info), 3) self.assertIn(info.name, ('nt', 'pthread', 'pthread-stubs', 'solaris', None)) - self.assertIn(info.lock, ('semaphore', 'mutex+cond', None)) + self.assertIn(info.lock, ('pymutex', None)) if sys.platform.startswith(("linux", "android", "freebsd")): self.assertEqual(info.name, "pthread") elif sys.platform == "win32": diff --git a/Misc/NEWS.d/next/C_API/2025-05-30-11-33-17.gh-issue-134745.GN-zk2.rst b/Misc/NEWS.d/next/C_API/2025-05-30-11-33-17.gh-issue-134745.GN-zk2.rst new file mode 100644 index 00000000000000..a85d2e90576a49 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-05-30-11-33-17.gh-issue-134745.GN-zk2.rst @@ -0,0 +1,3 @@ +Change :c:func:`!PyThread_allocate_lock` implementation to ``PyMutex``. +On Windows, :c:func:`!PyThread_acquire_lock_timed` now supports the *intr_flag* +parameter: it can be interrupted. Patch by Victor Stinner. diff --git a/Python/lock.c b/Python/lock.c index 28a12ad18352d1..b125ad0c9e356d 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -119,6 +119,9 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) return PY_LOCK_INTR; } } + else if (ret == Py_PARK_INTR && (flags & _PY_FAIL_IF_INTERRUPTED)) { + return PY_LOCK_INTR; + } else if (ret == Py_PARK_TIMEOUT) { assert(timeout >= 0); return PY_LOCK_FAILURE; diff --git a/Python/thread.c b/Python/thread.c index 4ff5f11a34852b..18c4af7f634c75 100644 --- a/Python/thread.c +++ b/Python/thread.c @@ -39,7 +39,8 @@ const long long PY_TIMEOUT_MAX = PY_TIMEOUT_MAX_VALUE; -static void PyThread__init_thread(void); /* Forward */ +/* Forward declaration */ +static void PyThread__init_thread(void); #define initialized _PyRuntime.threads.initialized @@ -71,6 +72,79 @@ PyThread_init_thread(void) #endif +/* + * Lock support. + */ + +PyThread_type_lock +PyThread_allocate_lock(void) +{ + if (!initialized) { + PyThread_init_thread(); + } + + PyMutex *lock = (PyMutex *)PyMem_RawMalloc(sizeof(PyMutex)); + if (lock) { + *lock = (PyMutex){0}; + } + + return (PyThread_type_lock)lock; +} + +void +PyThread_free_lock(PyThread_type_lock lock) +{ + PyMem_RawFree(lock); +} + +PyLockStatus +PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds, + int intr_flag) +{ + PyTime_t timeout; // relative timeout + if (microseconds >= 0) { + // bpo-41710: PyThread_acquire_lock_timed() cannot report timeout + // overflow to the caller, so clamp the timeout to + // [PyTime_MIN, PyTime_MAX]. + // + // PyTime_MAX nanoseconds is around 292.3 years. + // + // _thread.Lock.acquire() and _thread.RLock.acquire() raise an + // OverflowError if microseconds is greater than PY_TIMEOUT_MAX. + timeout = _PyTime_FromMicrosecondsClamp(microseconds); + } + else { + timeout = -1; + } + + _PyLockFlags flags = _Py_LOCK_DONT_DETACH; + if (intr_flag) { + flags |= _PY_FAIL_IF_INTERRUPTED; + } + + return _PyMutex_LockTimed((PyMutex *)lock, timeout, flags); +} + +void +PyThread_release_lock(PyThread_type_lock lock) +{ + PyMutex_Unlock((PyMutex *)lock); +} + +int +_PyThread_at_fork_reinit(PyThread_type_lock *lock) +{ + _PyMutex_at_fork_reinit((PyMutex *)lock); + return 0; +} + +int +PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) +{ + return PyThread_acquire_lock_timed(lock, waitflag ? -1 : 0, /*intr_flag=*/0); +} + + /* return the current thread stack size */ size_t PyThread_get_stacksize(void) @@ -261,11 +335,7 @@ PyThread_GetInfo(void) #ifdef HAVE_PTHREAD_STUBS value = Py_NewRef(Py_None); #elif defined(_POSIX_THREADS) -#ifdef USE_SEMAPHORES - value = PyUnicode_FromString("semaphore"); -#else - value = PyUnicode_FromString("mutex+cond"); -#endif + value = PyUnicode_FromString("pymutex"); if (value == NULL) { Py_DECREF(threadinfo); return NULL; diff --git a/Python/thread_nt.h b/Python/thread_nt.h index e078b98be3cdf4..9a29d14ef67678 100644 --- a/Python/thread_nt.h +++ b/Python/thread_nt.h @@ -300,98 +300,6 @@ PyThread_hang_thread(void) } } -/* - * Lock support. It has to be implemented as semaphores. - * I [Dag] tried to implement it with mutex but I could find a way to - * tell whether a thread already own the lock or not. - */ -PyThread_type_lock -PyThread_allocate_lock(void) -{ - PNRMUTEX mutex; - - if (!initialized) - PyThread_init_thread(); - - mutex = AllocNonRecursiveMutex() ; - - PyThread_type_lock aLock = (PyThread_type_lock) mutex; - assert(aLock); - - return aLock; -} - -void -PyThread_free_lock(PyThread_type_lock aLock) -{ - FreeNonRecursiveMutex(aLock) ; -} - -// WaitForSingleObject() accepts timeout in milliseconds in the range -// [0; 0xFFFFFFFE] (DWORD type). INFINITE value (0xFFFFFFFF) means no -// timeout. 0xFFFFFFFE milliseconds is around 49.7 days. -const DWORD TIMEOUT_MS_MAX = 0xFFFFFFFE; - -/* - * Return 1 on success if the lock was acquired - * - * and 0 if the lock was not acquired. This means a 0 is returned - * if the lock has already been acquired by this thread! - */ -PyLockStatus -PyThread_acquire_lock_timed(PyThread_type_lock aLock, - PY_TIMEOUT_T microseconds, int intr_flag) -{ - assert(aLock); - - /* Fow now, intr_flag does nothing on Windows, and lock acquires are - * uninterruptible. */ - PyLockStatus success; - PY_TIMEOUT_T milliseconds; - - if (microseconds >= 0) { - milliseconds = microseconds / 1000; - // Round milliseconds away from zero - if (microseconds % 1000 > 0) { - milliseconds++; - } - if (milliseconds > (PY_TIMEOUT_T)TIMEOUT_MS_MAX) { - // bpo-41710: PyThread_acquire_lock_timed() cannot report timeout - // overflow to the caller, so clamp the timeout to - // [0, TIMEOUT_MS_MAX] milliseconds. - // - // _thread.Lock.acquire() and _thread.RLock.acquire() raise an - // OverflowError if microseconds is greater than PY_TIMEOUT_MAX. - milliseconds = TIMEOUT_MS_MAX; - } - assert(milliseconds != INFINITE); - } - else { - milliseconds = INFINITE; - } - - if (EnterNonRecursiveMutex((PNRMUTEX)aLock, - (DWORD)milliseconds) == WAIT_OBJECT_0) { - success = PY_LOCK_ACQUIRED; - } - else { - success = PY_LOCK_FAILURE; - } - - return success; -} -int -PyThread_acquire_lock(PyThread_type_lock aLock, int waitflag) -{ - return PyThread_acquire_lock_timed(aLock, waitflag ? -1 : 0, 0); -} - -void -PyThread_release_lock(PyThread_type_lock aLock) -{ - assert(aLock); - (void)LeaveNonRecursiveMutex((PNRMUTEX) aLock); -} /* minimum/maximum thread stack sizes supported */ #define THREAD_MIN_STACKSIZE 0x8000 /* 32 KiB */ diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h index da4058242448f3..13992f95723866 100644 --- a/Python/thread_pthread.h +++ b/Python/thread_pthread.h @@ -99,16 +99,6 @@ #undef HAVE_SEM_CLOCKWAIT #endif -/* Whether or not to use semaphores directly rather than emulating them with - * mutexes and condition variables: - */ -#if (defined(_POSIX_SEMAPHORES) && !defined(HAVE_BROKEN_POSIX_SEMAPHORES) && \ - (defined(HAVE_SEM_TIMEDWAIT) || defined(HAVE_SEM_CLOCKWAIT))) -# define USE_SEMAPHORES -#else -# undef USE_SEMAPHORES -#endif - /* On platforms that don't use standard POSIX threads pthread_sigmask() * isn't present. DEC threads uses sigprocmask() instead as do most @@ -442,388 +432,6 @@ PyThread_hang_thread(void) } } -#ifdef USE_SEMAPHORES - -/* - * Lock support. - */ - -PyThread_type_lock -PyThread_allocate_lock(void) -{ - sem_t *lock; - int status, error = 0; - - if (!initialized) - PyThread_init_thread(); - - lock = (sem_t *)PyMem_RawMalloc(sizeof(sem_t)); - - if (lock) { - status = sem_init(lock,0,1); - CHECK_STATUS("sem_init"); - - if (error) { - PyMem_RawFree((void *)lock); - lock = NULL; - } - } - - return (PyThread_type_lock)lock; -} - -void -PyThread_free_lock(PyThread_type_lock lock) -{ - sem_t *thelock = (sem_t *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - if (!thelock) - return; - - status = sem_destroy(thelock); - CHECK_STATUS("sem_destroy"); - - PyMem_RawFree((void *)thelock); -} - -/* - * As of February 2002, Cygwin thread implementations mistakenly report error - * codes in the return value of the sem_ calls (like the pthread_ functions). - * Correct implementations return -1 and put the code in errno. This supports - * either. - */ -static int -fix_status(int status) -{ - return (status == -1) ? errno : status; -} - -PyLockStatus -PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds, - int intr_flag) -{ - PyLockStatus success; - sem_t *thelock = (sem_t *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - PyTime_t timeout; // relative timeout - if (microseconds >= 0) { - // bpo-41710: PyThread_acquire_lock_timed() cannot report timeout - // overflow to the caller, so clamp the timeout to - // [PyTime_MIN, PyTime_MAX]. - // - // PyTime_MAX nanoseconds is around 292.3 years. - // - // _thread.Lock.acquire() and _thread.RLock.acquire() raise an - // OverflowError if microseconds is greater than PY_TIMEOUT_MAX. - timeout = _PyTime_FromMicrosecondsClamp(microseconds); - } - else { - timeout = -1; - } - -#ifdef HAVE_SEM_CLOCKWAIT - struct timespec abs_timeout; - // Local scope for deadline - { - PyTime_t now; - // silently ignore error: cannot report error to the caller - (void)PyTime_MonotonicRaw(&now); - PyTime_t deadline = _PyTime_Add(now, timeout); - _PyTime_AsTimespec_clamp(deadline, &abs_timeout); - } -#else - PyTime_t deadline = 0; - if (timeout > 0 && !intr_flag) { - deadline = _PyDeadline_Init(timeout); - } -#endif - - while (1) { - if (timeout > 0) { -#ifdef HAVE_SEM_CLOCKWAIT - status = fix_status(sem_clockwait(thelock, CLOCK_MONOTONIC, - &abs_timeout)); -#else - PyTime_t now; - // silently ignore error: cannot report error to the caller - (void)PyTime_TimeRaw(&now); - PyTime_t abs_time = _PyTime_Add(now, timeout); - - struct timespec ts; - _PyTime_AsTimespec_clamp(abs_time, &ts); - status = fix_status(sem_timedwait(thelock, &ts)); -#endif - } - else if (timeout == 0) { - status = fix_status(sem_trywait(thelock)); - } - else { - status = fix_status(sem_wait(thelock)); - } - - /* Retry if interrupted by a signal, unless the caller wants to be - notified. */ - if (intr_flag || status != EINTR) { - break; - } - - // sem_clockwait() uses an absolute timeout, there is no need - // to recompute the relative timeout. -#ifndef HAVE_SEM_CLOCKWAIT - if (timeout > 0) { - /* wait interrupted by a signal (EINTR): recompute the timeout */ - timeout = _PyDeadline_Get(deadline); - if (timeout < 0) { - status = ETIMEDOUT; - break; - } - } -#endif - } - - /* Don't check the status if we're stopping because of an interrupt. */ - if (!(intr_flag && status == EINTR)) { - if (timeout > 0) { - if (status != ETIMEDOUT) { -#ifdef HAVE_SEM_CLOCKWAIT - CHECK_STATUS("sem_clockwait"); -#else - CHECK_STATUS("sem_timedwait"); -#endif - } - } - else if (timeout == 0) { - if (status != EAGAIN) { - CHECK_STATUS("sem_trywait"); - } - } - else { - CHECK_STATUS("sem_wait"); - } - } - - if (status == 0) { - success = PY_LOCK_ACQUIRED; - } else if (intr_flag && status == EINTR) { - success = PY_LOCK_INTR; - } else { - success = PY_LOCK_FAILURE; - } - - return success; -} - -void -PyThread_release_lock(PyThread_type_lock lock) -{ - sem_t *thelock = (sem_t *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - status = sem_post(thelock); - CHECK_STATUS("sem_post"); -} - -#else /* USE_SEMAPHORES */ - -/* - * Lock support. - */ -PyThread_type_lock -PyThread_allocate_lock(void) -{ - pthread_lock *lock; - int status, error = 0; - - if (!initialized) - PyThread_init_thread(); - - lock = (pthread_lock *) PyMem_RawCalloc(1, sizeof(pthread_lock)); - if (lock) { - lock->locked = 0; - - status = pthread_mutex_init(&lock->mut, NULL); - CHECK_STATUS_PTHREAD("pthread_mutex_init"); - /* Mark the pthread mutex underlying a Python mutex as - pure happens-before. We can't simply mark the - Python-level mutex as a mutex because it can be - acquired and released in different threads, which - will cause errors. */ - _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(&lock->mut); - - status = _PyThread_cond_init(&lock->lock_released); - CHECK_STATUS_PTHREAD("pthread_cond_init"); - - if (error) { - PyMem_RawFree((void *)lock); - lock = 0; - } - } - - return (PyThread_type_lock) lock; -} - -void -PyThread_free_lock(PyThread_type_lock lock) -{ - pthread_lock *thelock = (pthread_lock *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - /* some pthread-like implementations tie the mutex to the cond - * and must have the cond destroyed first. - */ - status = pthread_cond_destroy( &thelock->lock_released ); - CHECK_STATUS_PTHREAD("pthread_cond_destroy"); - - status = pthread_mutex_destroy( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_destroy"); - - PyMem_RawFree((void *)thelock); -} - -PyLockStatus -PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds, - int intr_flag) -{ - PyLockStatus success = PY_LOCK_FAILURE; - pthread_lock *thelock = (pthread_lock *)lock; - int status, error = 0; - - if (microseconds == 0) { - status = pthread_mutex_trylock( &thelock->mut ); - if (status != EBUSY) { - CHECK_STATUS_PTHREAD("pthread_mutex_trylock[1]"); - } - } - else { - status = pthread_mutex_lock( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_lock[1]"); - } - if (status != 0) { - goto done; - } - - if (thelock->locked == 0) { - success = PY_LOCK_ACQUIRED; - goto unlock; - } - if (microseconds == 0) { - goto unlock; - } - - struct timespec abs_timeout; - if (microseconds > 0) { - _PyThread_cond_after(microseconds, &abs_timeout); - } - // Continue trying until we get the lock - - // mut must be locked by me -- part of the condition protocol - while (1) { - if (microseconds > 0) { - status = pthread_cond_timedwait(&thelock->lock_released, - &thelock->mut, &abs_timeout); - if (status == 1) { - break; - } - if (status == ETIMEDOUT) { - break; - } - CHECK_STATUS_PTHREAD("pthread_cond_timedwait"); - } - else { - status = pthread_cond_wait( - &thelock->lock_released, - &thelock->mut); - CHECK_STATUS_PTHREAD("pthread_cond_wait"); - } - - if (intr_flag && status == 0 && thelock->locked) { - // We were woken up, but didn't get the lock. We probably received - // a signal. Return PY_LOCK_INTR to allow the caller to handle - // it and retry. - success = PY_LOCK_INTR; - break; - } - - if (status == 0 && !thelock->locked) { - success = PY_LOCK_ACQUIRED; - break; - } - - // Wait got interrupted by a signal: retry - } - -unlock: - if (success == PY_LOCK_ACQUIRED) { - thelock->locked = 1; - } - status = pthread_mutex_unlock( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_unlock[1]"); - -done: - if (error) { - success = PY_LOCK_FAILURE; - } - return success; -} - -void -PyThread_release_lock(PyThread_type_lock lock) -{ - pthread_lock *thelock = (pthread_lock *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - status = pthread_mutex_lock( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_lock[3]"); - - thelock->locked = 0; - - /* wake up someone (anyone, if any) waiting on the lock */ - status = pthread_cond_signal( &thelock->lock_released ); - CHECK_STATUS_PTHREAD("pthread_cond_signal"); - - status = pthread_mutex_unlock( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_unlock[3]"); -} - -#endif /* USE_SEMAPHORES */ - -int -_PyThread_at_fork_reinit(PyThread_type_lock *lock) -{ - PyThread_type_lock new_lock = PyThread_allocate_lock(); - if (new_lock == NULL) { - return -1; - } - - /* bpo-6721, bpo-40089: The old lock can be in an inconsistent state. - fork() can be called in the middle of an operation on the lock done by - another thread. So don't call PyThread_free_lock(*lock). - - Leak memory on purpose. Don't release the memory either since the - address of a mutex is relevant. Putting two mutexes at the same address - can lead to problems. */ - - *lock = new_lock; - return 0; -} - -int -PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) -{ - return PyThread_acquire_lock_timed(lock, waitflag ? -1 : 0, /*intr_flag=*/0); -} /* set the thread stack size. * Return 0 if size is valid, -1 if size is invalid, From c600310663277e24607890298e6d9bf7e1d4f584 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 30 May 2025 14:44:03 +0200 Subject: [PATCH 417/989] gh-134586: mark `_mi_assert_fail` as `noreturn`, `cold` and `throw` (#134624) We add the following attributes on `_mi_assert_fail` to help IDE introspection: * `__attribute__((__noreturn__))` * `__attribute__((cold))` * `__THROW` (GCC only) --- Include/internal/mimalloc/mimalloc/types.h | 29 +++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h index 4f77bd7bc525db..a04169f7fb84e8 100644 --- a/Include/internal/mimalloc/mimalloc/types.h +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -50,6 +50,32 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_cache_align #endif +#if (MI_DEBUG) +#if defined(_MSC_VER) +#define mi_decl_noreturn __declspec(noreturn) +#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) +#define mi_decl_noreturn __attribute__((__noreturn__)) +#else +#define mi_decl_noreturn +#endif + +/* + * 'cold' attribute seems to have been fully supported since GCC 4.x. + * See https://github.com/gcc-mirror/gcc/commit/52bf96d2f299e9e6. + */ +#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) +#define mi_decl_cold __attribute__((cold)) +#else +#define mi_decl_cold +#endif + +#if (defined(__GNUC__) && defined(__THROW)) +#define mi_decl_throw __THROW +#else +#define mi_decl_throw +#endif +#endif + // ------------------------------------------------------ // Variants // ------------------------------------------------------ @@ -582,7 +608,8 @@ struct mi_heap_s { #if (MI_DEBUG) // use our own assertion to print without memory allocation -void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); +mi_decl_noreturn mi_decl_cold mi_decl_throw +void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func); #define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) #else #define mi_assert(x) From ce6a6371a23dc57ed4257eb102ebfb2827477abf Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 30 May 2025 15:11:42 +0100 Subject: [PATCH 418/989] GH-134879: Fix `INSTRUMENT_FOR_ITER` for list/tuple (#134897) Fix INSTRUMENT_FOR_ITER for list/tuple --- Lib/test/test_monitoring.py | 15 +++++++++++++++ Python/bytecodes.c | 2 +- Python/generated_cases.c.h | 4 ++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_monitoring.py b/Lib/test/test_monitoring.py index 263e4e6f394155..a932ac80117d27 100644 --- a/Lib/test/test_monitoring.py +++ b/Lib/test/test_monitoring.py @@ -2157,6 +2157,21 @@ def callback(code, instruction_offset): sys.monitoring.restart_events() sys.monitoring.set_events(0, 0) + def test_134879(self): + # gh-134789 + # Specialized FOR_ITER not incrementing index + def foo(): + t = 0 + for i in [1,2,3,4]: + t += i + self.assertEqual(t, 10) + + sys.monitoring.use_tool_id(0, "test") + self.addCleanup(sys.monitoring.free_tool_id, 0) + sys.monitoring.set_local_events(0, foo.__code__, E.BRANCH_LEFT | E.BRANCH_RIGHT) + foo() + sys.monitoring.set_local_events(0, foo.__code__, 0) + class TestOptimizer(MonitoringTestBase, unittest.TestCase): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a1f8d3605283e3..f02e32fd1d312a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3130,7 +3130,6 @@ dummy_func( if (PyStackRef_IsTaggedInt(null_or_index)) { next = _PyForIter_NextWithIndex(iter_o, null_or_index); if (PyStackRef_IsNull(next)) { - null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); DISPATCH(); } @@ -3191,6 +3190,7 @@ dummy_func( JUMPBY(oparg + 1); DISPATCH(); } + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); } else { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 5be2671700aee7..e3cd3b71a1de08 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5759,9 +5759,7 @@ next = _PyForIter_NextWithIndex(iter_o, null_or_index); stack_pointer = _PyFrame_GetStackPointer(frame); if (PyStackRef_IsNull(next)) { - null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); - stack_pointer[-1] = null_or_index; DISPATCH(); } null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); @@ -7070,6 +7068,7 @@ JUMPBY(oparg + 1); DISPATCH(); } + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); } else { @@ -7099,6 +7098,7 @@ DISPATCH(); } } + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); From cc344e8dd0a6fdc83a032c229f9b3cf53f76a887 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 30 May 2025 17:25:07 +0300 Subject: [PATCH 419/989] gh-134718: Fix ast.dump() for empty non-default values (GH-134926) --- Lib/ast.py | 22 ++++++++--------- Lib/test/test_ast/test_ast.py | 24 +++++++++++++++++++ ...-05-30-13-07-29.gh-issue-134718.9Qvhxn.rst | 2 ++ 3 files changed, 36 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-30-13-07-29.gh-issue-134718.9Qvhxn.rst diff --git a/Lib/ast.py b/Lib/ast.py index b9791bf52d3e08..2f11683ecf7c68 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -147,18 +147,16 @@ def _format(node, level=0): if value is None and getattr(cls, name, ...) is None: keywords = True continue - if ( - not show_empty - and (value is None or value == []) - # Special cases: - # `Constant(value=None)` and `MatchSingleton(value=None)` - and not isinstance(node, (Constant, MatchSingleton)) - ): - args_buffer.append(repr(value)) - continue - elif not keywords: - args.extend(args_buffer) - args_buffer = [] + if not show_empty: + if value == []: + field_type = cls._field_types.get(name, object) + if getattr(field_type, '__origin__', ...) is list: + if not keywords: + args_buffer.append(repr(value)) + continue + if not keywords: + args.extend(args_buffer) + args_buffer = [] value, simple = _format(value, level) allsimple = allsimple and simple if keywords: diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 46745cfa8f8325..59263012bc1440 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -1543,18 +1543,42 @@ def check_text(code, empty, full, **kwargs): full="MatchSingleton(value=None)", ) + check_node( + ast.MatchSingleton(value=[]), + empty="MatchSingleton(value=[])", + full="MatchSingleton(value=[])", + ) + check_node( ast.Constant(value=None), empty="Constant(value=None)", full="Constant(value=None)", ) + check_node( + ast.Constant(value=[]), + empty="Constant(value=[])", + full="Constant(value=[])", + ) + check_node( ast.Constant(value=''), empty="Constant(value='')", full="Constant(value='')", ) + check_node( + ast.Interpolation(value=ast.Constant(42), str=None, conversion=-1), + empty="Interpolation(value=Constant(value=42), str=None, conversion=-1)", + full="Interpolation(value=Constant(value=42), str=None, conversion=-1)", + ) + + check_node( + ast.Interpolation(value=ast.Constant(42), str=[], conversion=-1), + empty="Interpolation(value=Constant(value=42), str=[], conversion=-1)", + full="Interpolation(value=Constant(value=42), str=[], conversion=-1)", + ) + check_text( "def a(b: int = 0, *, c): ...", empty="Module(body=[FunctionDef(name='a', args=arguments(args=[arg(arg='b', annotation=Name(id='int', ctx=Load()))], kwonlyargs=[arg(arg='c')], kw_defaults=[None], defaults=[Constant(value=0)]), body=[Expr(value=Constant(value=Ellipsis))])])", diff --git a/Misc/NEWS.d/next/Library/2025-05-30-13-07-29.gh-issue-134718.9Qvhxn.rst b/Misc/NEWS.d/next/Library/2025-05-30-13-07-29.gh-issue-134718.9Qvhxn.rst new file mode 100644 index 00000000000000..922ab168fdd095 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-30-13-07-29.gh-issue-134718.9Qvhxn.rst @@ -0,0 +1,2 @@ +:func:`ast.dump` now only omits ``None`` and ``[]`` values if they are +default values. From eb145fabbdd755f803296beba2d235fc32efe592 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 30 May 2025 16:27:54 +0200 Subject: [PATCH 420/989] gh-134160: Improve multi-phase init note on isolation & subinterpreters (GH-134775) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/c-api/module.rst | 42 +++++++++++++++++++++--------- Doc/howto/isolating-extensions.rst | 4 ++- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst index f7f4d37d4c721f..710135dca89eda 100644 --- a/Doc/c-api/module.rst +++ b/Doc/c-api/module.rst @@ -288,22 +288,40 @@ An alternate way to specify extensions is to request "multi-phase initialization Extension modules created this way behave more like Python modules: the initialization is split between the *creation phase*, when the module object is created, and the *execution phase*, when it is populated. -The distinction is similar to the :py:meth:`!__new__` and :py:meth:`!__init__` methods -of classes. +The distinction is similar to the :py:meth:`~object.__new__` and +:py:meth:`~object.__init__` methods of classes. Unlike modules created using single-phase initialization, these modules are not -singletons: if the *sys.modules* entry is removed and the module is re-imported, -a new module object is created, and the old module is subject to normal garbage -collection -- as with Python modules. -By default, multiple modules created from the same definition should be -independent: changes to one should not affect the others. -This means that all state should be specific to the module object (using e.g. -using :c:func:`PyModule_GetState`), or its contents (such as the module's -:attr:`~object.__dict__` or individual classes created with :c:func:`PyType_FromSpec`). +singletons. +For example, if the :py:attr:`sys.modules` entry is removed and the module +is re-imported, a new module object is created, and typically populated with +fresh method and type objects. +The old module is subject to normal garbage collection. +This mirrors the behavior of pure-Python modules. + +Additional module instances may be created in +:ref:`sub-interpreters ` +or after after Python runtime reinitialization +(:c:func:`Py_Finalize` and :c:func:`Py_Initialize`). +In these cases, sharing Python objects between module instances would likely +cause crashes or undefined behavior. + +To avoid such issues, each instance of an extension module should +be *isolated*: changes to one instance should not implicitly affect the others, +and all state, including references to Python objects, should be specific to +a particular module instance. +See :ref:`isolating-extensions-howto` for more details and a practical guide. + +A simpler way to avoid these issues is +:ref:`raising an error on repeated initialization `. All modules created using multi-phase initialization are expected to support -:ref:`sub-interpreters `. Making sure multiple modules -are independent is typically enough to achieve this. +:ref:`sub-interpreters `, or otherwise explicitly +signal a lack of support. +This is usually achieved by isolation or blocking repeated initialization, +as above. +A module may also be limited to the main interpreter using +the :c:data:`Py_mod_multiple_interpreters` slot. To request multi-phase initialization, the initialization function (PyInit_modulename) returns a :c:type:`PyModuleDef` instance with non-empty diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index 5513cd7367519f..b2109b1503992b 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -168,7 +168,7 @@ possible, consider explicit locking. If it is necessary to use process-global state, the simplest way to avoid issues with multiple interpreters is to explicitly prevent a module from being loaded more than once per process—see -`Opt-Out: Limiting to One Module Object per Process`_. +:ref:`isolating-extensions-optout`. Managing Per-Module State @@ -207,6 +207,8 @@ An example of a module with per-module state is currently available as example module initialization shown at the bottom of the file. +.. _isolating-extensions-optout: + Opt-Out: Limiting to One Module Object per Process -------------------------------------------------- From 52deabefd0af8fc6d9b40823323437bf210f50a5 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 30 May 2025 09:15:00 -0600 Subject: [PATCH 421/989] gh-132775: Expand the Capability of Interpreter.call() (gh-133484) It now supports most callables, full args, and return values. --- Include/internal/pycore_crossinterp.h | 35 +- Lib/test/_code_definitions.py | 20 + Lib/test/support/interpreters/__init__.py | 31 +- Lib/test/test_code.py | 48 +- Lib/test/test_interpreters/test_api.py | 541 ++++++++++++++++++---- Modules/_interpchannelsmodule.c | 8 +- Modules/_interpqueuesmodule.c | 8 +- Modules/_interpretersmodule.c | 414 +++++++++++++---- Python/crossinterp.c | 449 ++++++++++++++---- Python/import.c | 4 +- 10 files changed, 1257 insertions(+), 301 deletions(-) diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index 1272927413868b..713ddc66ba7382 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -317,7 +317,9 @@ typedef enum error_code { _PyXI_ERR_ALREADY_RUNNING = -4, _PyXI_ERR_MAIN_NS_FAILURE = -5, _PyXI_ERR_APPLY_NS_FAILURE = -6, - _PyXI_ERR_NOT_SHAREABLE = -7, + _PyXI_ERR_PRESERVE_FAILURE = -7, + _PyXI_ERR_EXC_PROPAGATION_FAILURE = -8, + _PyXI_ERR_NOT_SHAREABLE = -9, } _PyXI_errcode; @@ -350,16 +352,33 @@ typedef struct xi_session _PyXI_session; PyAPI_FUNC(_PyXI_session *) _PyXI_NewSession(void); PyAPI_FUNC(void) _PyXI_FreeSession(_PyXI_session *); +typedef struct { + PyObject *preserved; + PyObject *excinfo; + _PyXI_errcode errcode; +} _PyXI_session_result; +PyAPI_FUNC(void) _PyXI_ClearResult(_PyXI_session_result *); + PyAPI_FUNC(int) _PyXI_Enter( _PyXI_session *session, PyInterpreterState *interp, - PyObject *nsupdates); -PyAPI_FUNC(void) _PyXI_Exit(_PyXI_session *session); - -PyAPI_FUNC(PyObject *) _PyXI_GetMainNamespace(_PyXI_session *); - -PyAPI_FUNC(PyObject *) _PyXI_ApplyCapturedException(_PyXI_session *session); -PyAPI_FUNC(int) _PyXI_HasCapturedException(_PyXI_session *session); + PyObject *nsupdates, + _PyXI_session_result *); +PyAPI_FUNC(int) _PyXI_Exit( + _PyXI_session *, + _PyXI_errcode, + _PyXI_session_result *); + +PyAPI_FUNC(PyObject *) _PyXI_GetMainNamespace( + _PyXI_session *, + _PyXI_errcode *); + +PyAPI_FUNC(int) _PyXI_Preserve( + _PyXI_session *, + const char *, + PyObject *, + _PyXI_errcode *); +PyAPI_FUNC(PyObject *) _PyXI_GetPreserved(_PyXI_session_result *, const char *); /*************/ diff --git a/Lib/test/_code_definitions.py b/Lib/test/_code_definitions.py index 733a15b25f6894..274beb65a6d0f4 100644 --- a/Lib/test/_code_definitions.py +++ b/Lib/test/_code_definitions.py @@ -57,6 +57,15 @@ def spam_with_globals_and_builtins(): print(res) +def spam_full_args(a, b, /, c, d, *args, e, f, **kwargs): + return (a, b, c, d, e, f, args, kwargs) + + +def spam_full_args_with_defaults(a=-1, b=-2, /, c=-3, d=-4, *args, + e=-5, f=-6, **kwargs): + return (a, b, c, d, e, f, args, kwargs) + + def spam_args_attrs_and_builtins(a, b, /, c, d, *args, e, f, **kwargs): if args.__len__() > 2: return None @@ -67,6 +76,10 @@ def spam_returns_arg(x): return x +def spam_raises(): + raise Exception('spam!') + + def spam_with_inner_not_closure(): def eggs(): pass @@ -177,8 +190,11 @@ def ham_C_closure(z): spam_minimal, spam_with_builtins, spam_with_globals_and_builtins, + spam_full_args, + spam_full_args_with_defaults, spam_args_attrs_and_builtins, spam_returns_arg, + spam_raises, spam_with_inner_not_closure, spam_with_inner_closure, spam_annotated, @@ -219,8 +235,10 @@ def ham_C_closure(z): spam, spam_minimal, spam_with_builtins, + spam_full_args, spam_args_attrs_and_builtins, spam_returns_arg, + spam_raises, spam_annotated, spam_with_inner_not_closure, spam_with_inner_closure, @@ -238,6 +256,7 @@ def ham_C_closure(z): STATELESS_CODE = [ *STATELESS_FUNCTIONS, script_with_globals, + spam_full_args_with_defaults, spam_with_globals_and_builtins, spam_full, ] @@ -248,6 +267,7 @@ def ham_C_closure(z): script_with_explicit_empty_return, spam_minimal, spam_with_builtins, + spam_raises, spam_with_inner_not_closure, spam_with_inner_closure, ] diff --git a/Lib/test/support/interpreters/__init__.py b/Lib/test/support/interpreters/__init__.py index e067f259364d2a..6d1b0690805d2d 100644 --- a/Lib/test/support/interpreters/__init__.py +++ b/Lib/test/support/interpreters/__init__.py @@ -226,33 +226,32 @@ def exec(self, code, /): if excinfo is not None: raise ExecutionFailed(excinfo) - def call(self, callable, /): - """Call the object in the interpreter with given args/kwargs. + def _call(self, callable, args, kwargs): + res, excinfo = _interpreters.call(self._id, callable, args, kwargs, restrict=True) + if excinfo is not None: + raise ExecutionFailed(excinfo) + return res - Only functions that take no arguments and have no closure - are supported. + def call(self, callable, /, *args, **kwargs): + """Call the object in the interpreter with given args/kwargs. - The return value is discarded. + Nearly all callables, args, kwargs, and return values are + supported. All "shareable" objects are supported, as are + "stateless" functions (meaning non-closures that do not use + any globals). This method will fall back to pickle. If the callable raises an exception then the error display - (including full traceback) is send back between the interpreters + (including full traceback) is sent back between the interpreters and an ExecutionFailed exception is raised, much like what happens with Interpreter.exec(). """ - # XXX Support args and kwargs. - # XXX Support arbitrary callables. - # XXX Support returning the return value (e.g. via pickle). - excinfo = _interpreters.call(self._id, callable, restrict=True) - if excinfo is not None: - raise ExecutionFailed(excinfo) + return self._call(callable, args, kwargs) - def call_in_thread(self, callable, /): + def call_in_thread(self, callable, /, *args, **kwargs): """Return a new thread that calls the object in the interpreter. The return value and any raised exception are discarded. """ - def task(): - self.call(callable) - t = threading.Thread(target=task) + t = threading.Thread(target=self._call, args=(callable, args, kwargs)) t.start() return t diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 32cf8aacaf6b72..9fc2b047bef719 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -701,6 +701,26 @@ def test_local_kinds(self): 'checks': CO_FAST_LOCAL, 'res': CO_FAST_LOCAL, }, + defs.spam_full_args: { + 'a': POSONLY, + 'b': POSONLY, + 'c': POSORKW, + 'd': POSORKW, + 'e': KWONLY, + 'f': KWONLY, + 'args': VARARGS, + 'kwargs': VARKWARGS, + }, + defs.spam_full_args_with_defaults: { + 'a': POSONLY, + 'b': POSONLY, + 'c': POSORKW, + 'd': POSORKW, + 'e': KWONLY, + 'f': KWONLY, + 'args': VARARGS, + 'kwargs': VARKWARGS, + }, defs.spam_args_attrs_and_builtins: { 'a': POSONLY, 'b': POSONLY, @@ -714,6 +734,7 @@ def test_local_kinds(self): defs.spam_returns_arg: { 'x': POSORKW, }, + defs.spam_raises: {}, defs.spam_with_inner_not_closure: { 'eggs': CO_FAST_LOCAL, }, @@ -934,6 +955,20 @@ def new_var_counts(*, purelocals=5, globalvars=6, ), + defs.spam_full_args: new_var_counts( + posonly=2, + posorkw=2, + kwonly=2, + varargs=1, + varkwargs=1, + ), + defs.spam_full_args_with_defaults: new_var_counts( + posonly=2, + posorkw=2, + kwonly=2, + varargs=1, + varkwargs=1, + ), defs.spam_args_attrs_and_builtins: new_var_counts( posonly=2, posorkw=2, @@ -945,6 +980,9 @@ def new_var_counts(*, defs.spam_returns_arg: new_var_counts( posorkw=1, ), + defs.spam_raises: new_var_counts( + globalvars=1, + ), defs.spam_with_inner_not_closure: new_var_counts( purelocals=1, ), @@ -1097,10 +1135,16 @@ def new_var_counts(*, def test_stateless(self): self.maxDiff = None + STATELESS_FUNCTIONS = [ + *defs.STATELESS_FUNCTIONS, + # stateless with defaults + defs.spam_full_args_with_defaults, + ] + for func in defs.STATELESS_CODE: with self.subTest((func, '(code)')): _testinternalcapi.verify_stateless_code(func.__code__) - for func in defs.STATELESS_FUNCTIONS: + for func in STATELESS_FUNCTIONS: with self.subTest((func, '(func)')): _testinternalcapi.verify_stateless_code(func) @@ -1110,7 +1154,7 @@ def test_stateless(self): with self.assertRaises(Exception): _testinternalcapi.verify_stateless_code(func.__code__) - if func not in defs.STATELESS_FUNCTIONS: + if func not in STATELESS_FUNCTIONS: with self.subTest((func, '(func)')): with self.assertRaises(Exception): _testinternalcapi.verify_stateless_code(func) diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 165949167ceba8..b3c9ef8efba37a 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -1,17 +1,22 @@ +import contextlib import os import pickle +import sys from textwrap import dedent import threading import types import unittest from test import support +from test.support import os_helper +from test.support import script_helper from test.support import import_helper # Raise SkipTest if subinterpreters not supported. _interpreters = import_helper.import_module('_interpreters') from test.support import Py_GIL_DISABLED from test.support import interpreters from test.support import force_not_colorized +import test._crossinterp_definitions as defs from test.support.interpreters import ( InterpreterError, InterpreterNotFoundError, ExecutionFailed, ) @@ -29,6 +34,59 @@ WHENCE_STR_STDLIB = '_interpreters module' +def is_pickleable(obj): + try: + pickle.dumps(obj) + except Exception: + return False + return True + + +@contextlib.contextmanager +def defined_in___main__(name, script, *, remove=False): + import __main__ as mainmod + mainns = vars(mainmod) + assert name not in mainns + exec(script, mainns, mainns) + if remove: + yield mainns.pop(name) + else: + try: + yield mainns[name] + finally: + mainns.pop(name, None) + + +def build_excinfo(exctype, msg=None, formatted=None, errdisplay=None): + if isinstance(exctype, type): + assert issubclass(exctype, BaseException), exctype + exctype = types.SimpleNamespace( + __name__=exctype.__name__, + __qualname__=exctype.__qualname__, + __module__=exctype.__module__, + ) + elif isinstance(exctype, str): + module, _, name = exctype.rpartition(exctype) + if not module and name in __builtins__: + module = 'builtins' + exctype = types.SimpleNamespace( + __name__=name, + __qualname__=exctype, + __module__=module or None, + ) + else: + assert isinstance(exctype, types.SimpleNamespace) + assert msg is None or isinstance(msg, str), msg + assert formatted is None or isinstance(formatted, str), formatted + assert errdisplay is None or isinstance(errdisplay, str), errdisplay + return types.SimpleNamespace( + type=exctype, + msg=msg, + formatted=formatted, + errdisplay=errdisplay, + ) + + class ModuleTests(TestBase): def test_queue_aliases(self): @@ -890,24 +948,26 @@ def test_created_with_capi(self): # Interpreter.exec() behavior. -def call_func_noop(): - pass +call_func_noop = defs.spam_minimal +call_func_ident = defs.spam_returns_arg +call_func_failure = defs.spam_raises def call_func_return_shareable(): return (1, None) -def call_func_return_not_shareable(): - return [1, 2, 3] +def call_func_return_stateless_func(): + return (lambda x: x) -def call_func_failure(): - raise Exception('spam!') +def call_func_return_pickleable(): + return [1, 2, 3] -def call_func_ident(value): - return value +def call_func_return_unpickleable(): + x = 42 + return (lambda: x) def get_call_func_closure(value): @@ -916,6 +976,11 @@ def call_func_closure(): return call_func_closure +def call_func_exec_wrapper(script, ns): + res = exec(script, ns, ns) + return res, ns, id(ns) + + class Spam: @staticmethod @@ -1012,86 +1077,375 @@ class TestInterpreterCall(TestBase): # - preserves info (e.g. SyntaxError) # - matching error display - def test_call(self): + @contextlib.contextmanager + def assert_fails(self, expected): + with self.assertRaises(ExecutionFailed) as cm: + yield cm + uncaught = cm.exception.excinfo + self.assertEqual(uncaught.type.__name__, expected.__name__) + + def assert_fails_not_shareable(self): + return self.assert_fails(interpreters.NotShareableError) + + def assert_code_equal(self, code1, code2): + if code1 == code2: + return + self.assertEqual(code1.co_name, code2.co_name) + self.assertEqual(code1.co_flags, code2.co_flags) + self.assertEqual(code1.co_consts, code2.co_consts) + self.assertEqual(code1.co_varnames, code2.co_varnames) + self.assertEqual(code1.co_cellvars, code2.co_cellvars) + self.assertEqual(code1.co_freevars, code2.co_freevars) + self.assertEqual(code1.co_names, code2.co_names) + self.assertEqual( + _testinternalcapi.get_code_var_counts(code1), + _testinternalcapi.get_code_var_counts(code2), + ) + self.assertEqual(code1.co_code, code2.co_code) + + def assert_funcs_equal(self, func1, func2): + if func1 == func2: + return + self.assertIs(type(func1), type(func2)) + self.assertEqual(func1.__name__, func2.__name__) + self.assertEqual(func1.__defaults__, func2.__defaults__) + self.assertEqual(func1.__kwdefaults__, func2.__kwdefaults__) + self.assertEqual(func1.__closure__, func2.__closure__) + self.assert_code_equal(func1.__code__, func2.__code__) + self.assertEqual( + _testinternalcapi.get_code_var_counts(func1), + _testinternalcapi.get_code_var_counts(func2), + ) + + def assert_exceptions_equal(self, exc1, exc2): + assert isinstance(exc1, Exception) + assert isinstance(exc2, Exception) + if exc1 == exc2: + return + self.assertIs(type(exc1), type(exc2)) + self.assertEqual(exc1.args, exc2.args) + + def test_stateless_funcs(self): interp = interpreters.create() - for i, (callable, args, kwargs) in enumerate([ - (call_func_noop, (), {}), - (Spam.noop, (), {}), + func = call_func_noop + with self.subTest('no args, no return'): + res = interp.call(func) + self.assertIsNone(res) + + func = call_func_return_shareable + with self.subTest('no args, returns shareable'): + res = interp.call(func) + self.assertEqual(res, (1, None)) + + func = call_func_return_stateless_func + expected = (lambda x: x) + with self.subTest('no args, returns stateless func'): + res = interp.call(func) + self.assert_funcs_equal(res, expected) + + func = call_func_return_pickleable + with self.subTest('no args, returns pickleable'): + res = interp.call(func) + self.assertEqual(res, [1, 2, 3]) + + func = call_func_return_unpickleable + with self.subTest('no args, returns unpickleable'): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func) + + def test_stateless_func_returns_arg(self): + interp = interpreters.create() + + for arg in [ + None, + 10, + 'spam!', + b'spam!', + (1, 2, 'spam!'), + memoryview(b'spam!'), + ]: + with self.subTest(f'shareable {arg!r}'): + assert _interpreters.is_shareable(arg) + res = interp.call(defs.spam_returns_arg, arg) + self.assertEqual(res, arg) + + for arg in defs.STATELESS_FUNCTIONS: + with self.subTest(f'stateless func {arg!r}'): + res = interp.call(defs.spam_returns_arg, arg) + self.assert_funcs_equal(res, arg) + + for arg in defs.TOP_FUNCTIONS: + if arg in defs.STATELESS_FUNCTIONS: + continue + with self.subTest(f'stateful func {arg!r}'): + res = interp.call(defs.spam_returns_arg, arg) + self.assert_funcs_equal(res, arg) + assert is_pickleable(arg) + + for arg in [ + Ellipsis, + NotImplemented, + object(), + 2**1000, + [1, 2, 3], + {'a': 1, 'b': 2}, + types.SimpleNamespace(x=42), + # builtin types + object, + type, + Exception, + ModuleNotFoundError, + # builtin exceptions + Exception('uh-oh!'), + ModuleNotFoundError('mymodule'), + # builtin fnctions + len, + sys.exit, + # user classes + *defs.TOP_CLASSES, + *(c(*a) for c, a in defs.TOP_CLASSES.items() + if c not in defs.CLASSES_WITHOUT_EQUALITY), + ]: + with self.subTest(f'pickleable {arg!r}'): + res = interp.call(defs.spam_returns_arg, arg) + if type(arg) is object: + self.assertIs(type(res), object) + elif isinstance(arg, BaseException): + self.assert_exceptions_equal(res, arg) + else: + self.assertEqual(res, arg) + assert is_pickleable(arg) + + for arg in [ + types.MappingProxyType({}), + *(f for f in defs.NESTED_FUNCTIONS + if f not in defs.STATELESS_FUNCTIONS), + ]: + with self.subTest(f'unpickleable {arg!r}'): + assert not _interpreters.is_shareable(arg) + assert not is_pickleable(arg) + with self.assertRaises(interpreters.NotShareableError): + interp.call(defs.spam_returns_arg, arg) + + def test_full_args(self): + interp = interpreters.create() + expected = (1, 2, 3, 4, 5, 6, ('?',), {'g': 7, 'h': 8}) + func = defs.spam_full_args + res = interp.call(func, 1, 2, 3, 4, '?', e=5, f=6, g=7, h=8) + self.assertEqual(res, expected) + + def test_full_defaults(self): + # pickleable, but not stateless + interp = interpreters.create() + expected = (-1, -2, -3, -4, -5, -6, (), {'g': 8, 'h': 9}) + res = interp.call(defs.spam_full_args_with_defaults, g=8, h=9) + self.assertEqual(res, expected) + + def test_modified_arg(self): + interp = interpreters.create() + script = dedent(""" + a = 7 + b = 2 + c = a ** b + """) + ns = {} + expected = {'a': 7, 'b': 2, 'c': 49} + res = interp.call(call_func_exec_wrapper, script, ns) + obj, resns, resid = res + del resns['__builtins__'] + self.assertIsNone(obj) + self.assertEqual(ns, {}) + self.assertEqual(resns, expected) + self.assertNotEqual(resid, id(ns)) + self.assertNotEqual(resid, id(resns)) + + def test_func_in___main___valid(self): + # pickleable, already there' + + with os_helper.temp_dir() as tempdir: + def new_mod(name, text): + script_helper.make_script(tempdir, name, dedent(text)) + + def run(text): + name = 'myscript' + text = dedent(f""" + import sys + sys.path.insert(0, {tempdir!r}) + + """) + dedent(text) + filename = script_helper.make_script(tempdir, name, text) + res = script_helper.assert_python_ok(filename) + return res.out.decode('utf-8').strip() + + # no module indirection + with self.subTest('no indirection'): + text = run(f""" + from test.support import interpreters + + def spam(): + # This a global var... + return __name__ + + if __name__ == '__main__': + interp = interpreters.create() + res = interp.call(spam) + print(res) + """) + self.assertEqual(text, '') + + # indirect as func, direct interp + new_mod('mymod', f""" + def run(interp, func): + return interp.call(func) + """) + with self.subTest('indirect as func, direct interp'): + text = run(f""" + from test.support import interpreters + import mymod + + def spam(): + # This a global var... + return __name__ + + if __name__ == '__main__': + interp = interpreters.create() + res = mymod.run(interp, spam) + print(res) + """) + self.assertEqual(text, '') + + # indirect as func, indirect interp + new_mod('mymod', f""" + from test.support import interpreters + def run(func): + interp = interpreters.create() + return interp.call(func) + """) + with self.subTest('indirect as func, indirect interp'): + text = run(f""" + import mymod + + def spam(): + # This a global var... + return __name__ + + if __name__ == '__main__': + res = mymod.run(spam) + print(res) + """) + self.assertEqual(text, '') + + def test_func_in___main___invalid(self): + interp = interpreters.create() + + funcname = f'{__name__.replace(".", "_")}_spam_okay' + script = dedent(f""" + def {funcname}(): + # This a global var... + return __name__ + """) + + with self.subTest('pickleable, added dynamically'): + with defined_in___main__(funcname, script) as arg: + with self.assertRaises(interpreters.NotShareableError): + interp.call(defs.spam_returns_arg, arg) + + with self.subTest('lying about __main__'): + with defined_in___main__(funcname, script, remove=True) as arg: + with self.assertRaises(interpreters.NotShareableError): + interp.call(defs.spam_returns_arg, arg) + + def test_raises(self): + interp = interpreters.create() + with self.assertRaises(ExecutionFailed): + interp.call(call_func_failure) + + with self.assert_fails(ValueError): + interp.call(call_func_complex, '???', exc=ValueError('spam')) + + def test_call_valid(self): + interp = interpreters.create() + + for i, (callable, args, kwargs, expected) in enumerate([ + (call_func_noop, (), {}, None), + (call_func_ident, ('spamspamspam',), {}, 'spamspamspam'), + (call_func_return_shareable, (), {}, (1, None)), + (call_func_return_pickleable, (), {}, [1, 2, 3]), + (Spam.noop, (), {}, None), + (Spam.from_values, (), {}, Spam(())), + (Spam.from_values, (1, 2, 3), {}, Spam((1, 2, 3))), + (Spam, ('???',), {}, Spam('???')), + (Spam(101), (), {}, (101, (), {})), + (Spam(10101).run, (), {}, (10101, (), {})), + (call_func_complex, ('ident', 'spam'), {}, 'spam'), + (call_func_complex, ('full-ident', 'spam'), {}, ('spam', (), {})), + (call_func_complex, ('full-ident', 'spam', 'ham'), {'eggs': '!!!'}, + ('spam', ('ham',), {'eggs': '!!!'})), + (call_func_complex, ('globals',), {}, __name__), + (call_func_complex, ('interpid',), {}, interp.id), + (call_func_complex, ('custom', 'spam!'), {}, Spam('spam!')), ]): with self.subTest(f'success case #{i+1}'): - res = interp.call(callable) - self.assertIs(res, None) + res = interp.call(callable, *args, **kwargs) + self.assertEqual(res, expected) + + def test_call_invalid(self): + interp = interpreters.create() + + func = get_call_func_closure + with self.subTest(func): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func, 42) + + func = get_call_func_closure(42) + with self.subTest(func): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func) + + func = call_func_complex + op = 'closure' + with self.subTest(f'{func} ({op})'): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func, op, value='~~~') + + op = 'custom-inner' + with self.subTest(f'{func} ({op})'): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func, op, 'eggs!') + + def test_call_in_thread(self): + interp = interpreters.create() for i, (callable, args, kwargs) in enumerate([ - (call_func_ident, ('spamspamspam',), {}), - (get_call_func_closure, (42,), {}), - (get_call_func_closure(42), (), {}), + (call_func_noop, (), {}), + (call_func_return_shareable, (), {}), + (call_func_return_pickleable, (), {}), (Spam.from_values, (), {}), (Spam.from_values, (1, 2, 3), {}), - (Spam, ('???'), {}), (Spam(101), (), {}), (Spam(10101).run, (), {}), + (Spam.noop, (), {}), (call_func_complex, ('ident', 'spam'), {}), (call_func_complex, ('full-ident', 'spam'), {}), (call_func_complex, ('full-ident', 'spam', 'ham'), {'eggs': '!!!'}), (call_func_complex, ('globals',), {}), (call_func_complex, ('interpid',), {}), - (call_func_complex, ('closure',), {'value': '~~~'}), (call_func_complex, ('custom', 'spam!'), {}), - (call_func_complex, ('custom-inner', 'eggs!'), {}), - (call_func_complex, ('???',), {'exc': ValueError('spam')}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), - ]): - with self.subTest(f'invalid case #{i+1}'): - with self.assertRaises(Exception): - if args or kwargs: - raise Exception((args, kwargs)) - interp.call(callable) - - with self.assertRaises(ExecutionFailed): - interp.call(call_func_failure) - - def test_call_in_thread(self): - interp = interpreters.create() - - for i, (callable, args, kwargs) in enumerate([ - (call_func_noop, (), {}), - (Spam.noop, (), {}), ]): with self.subTest(f'success case #{i+1}'): with self.captured_thread_exception() as ctx: - t = interp.call_in_thread(callable) + t = interp.call_in_thread(callable, *args, **kwargs) t.join() self.assertIsNone(ctx.caught) for i, (callable, args, kwargs) in enumerate([ - (call_func_ident, ('spamspamspam',), {}), (get_call_func_closure, (42,), {}), (get_call_func_closure(42), (), {}), - (Spam.from_values, (), {}), - (Spam.from_values, (1, 2, 3), {}), - (Spam, ('???'), {}), - (Spam(101), (), {}), - (Spam(10101).run, (), {}), - (call_func_complex, ('ident', 'spam'), {}), - (call_func_complex, ('full-ident', 'spam'), {}), - (call_func_complex, ('full-ident', 'spam', 'ham'), {'eggs': '!!!'}), - (call_func_complex, ('globals',), {}), - (call_func_complex, ('interpid',), {}), - (call_func_complex, ('closure',), {'value': '~~~'}), - (call_func_complex, ('custom', 'spam!'), {}), - (call_func_complex, ('custom-inner', 'eggs!'), {}), - (call_func_complex, ('???',), {'exc': ValueError('spam')}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), ]): with self.subTest(f'invalid case #{i+1}'): - if args or kwargs: - continue with self.captured_thread_exception() as ctx: - t = interp.call_in_thread(callable) + t = interp.call_in_thread(callable, *args, **kwargs) t.join() self.assertIsNotNone(ctx.caught) @@ -1600,18 +1954,14 @@ def test_exec(self): with results: exc = _interpreters.exec(interpid, script) out = results.stdout() - self.assertEqual(out, '') - self.assert_ns_equal(exc, types.SimpleNamespace( - type=types.SimpleNamespace( - __name__='Exception', - __qualname__='Exception', - __module__='builtins', - ), - msg='uh-oh!', + expected = build_excinfo( + Exception, 'uh-oh!', # We check these in other tests. formatted=exc.formatted, errdisplay=exc.errdisplay, - )) + ) + self.assertEqual(out, '') + self.assert_ns_equal(exc, expected) with self.subTest('from C-API'): with self.interpreter_from_capi() as interpid: @@ -1623,25 +1973,50 @@ def test_exec(self): self.assertEqual(exc.msg, 'it worked!') def test_call(self): - with self.subTest('no args'): - interpid = _interpreters.create() - with self.assertRaises(ValueError): - _interpreters.call(interpid, call_func_return_shareable) + interpid = _interpreters.create() + + # Here we focus on basic args and return values. + # See TestInterpreterCall for full operational coverage, + # including supported callables. + + with self.subTest('no args, return None'): + func = defs.spam_minimal + res, exc = _interpreters.call(interpid, func) + self.assertIsNone(exc) + self.assertIsNone(res) + + with self.subTest('empty args, return None'): + func = defs.spam_minimal + res, exc = _interpreters.call(interpid, func, (), {}) + self.assertIsNone(exc) + self.assertIsNone(res) + + with self.subTest('no args, return non-None'): + func = defs.script_with_return + res, exc = _interpreters.call(interpid, func) + self.assertIsNone(exc) + self.assertIs(res, True) + + with self.subTest('full args, return non-None'): + expected = (1, 2, 3, 4, 5, 6, (7, 8), {'g': 9, 'h': 0}) + func = defs.spam_full_args + args = (1, 2, 3, 4, 7, 8) + kwargs = dict(e=5, f=6, g=9, h=0) + res, exc = _interpreters.call(interpid, func, args, kwargs) + self.assertIsNone(exc) + self.assertEqual(res, expected) with self.subTest('uncaught exception'): - interpid = _interpreters.create() - exc = _interpreters.call(interpid, call_func_failure) - self.assertEqual(exc, types.SimpleNamespace( - type=types.SimpleNamespace( - __name__='Exception', - __qualname__='Exception', - __module__='builtins', - ), - msg='spam!', + func = defs.spam_raises + res, exc = _interpreters.call(interpid, func) + expected = build_excinfo( + Exception, 'spam!', # We check these in other tests. formatted=exc.formatted, errdisplay=exc.errdisplay, - )) + ) + self.assertIsNone(res) + self.assertEqual(exc, expected) @requires_test_modules def test_set___main___attrs(self): diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index bfd805bf5e4072..ea2e5f99dfa308 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -254,10 +254,10 @@ _get_current_module_state(void) { PyObject *mod = _get_current_module(); if (mod == NULL) { - // XXX import it? - PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME_STR " module not imported yet"); - return NULL; + mod = PyImport_ImportModule(MODULE_NAME_STR); + if (mod == NULL) { + return NULL; + } } module_state *state = get_module_state(mod); Py_DECREF(mod); diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index ffc52c8ee74d85..71d8fd8716cd94 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -1356,10 +1356,10 @@ _queueobj_from_xid(_PyXIData_t *data) PyObject *mod = _get_current_module(); if (mod == NULL) { - // XXX import it? - PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME_STR " module not imported yet"); - return NULL; + mod = PyImport_ImportModule(MODULE_NAME_STR); + if (mod == NULL) { + return NULL; + } } PyTypeObject *cls = get_external_queue_type(mod); diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 376517ab92360f..037e9544543c4d 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -72,6 +72,32 @@ is_running_main(PyInterpreterState *interp) } +static inline int +is_notshareable_raised(PyThreadState *tstate) +{ + PyObject *exctype = _PyXIData_GetNotShareableErrorType(tstate); + return _PyErr_ExceptionMatches(tstate, exctype); +} + +static void +unwrap_not_shareable(PyThreadState *tstate) +{ + if (!is_notshareable_raised(tstate)) { + return; + } + PyObject *exc = _PyErr_GetRaisedException(tstate); + PyObject *cause = PyException_GetCause(exc); + if (cause != NULL) { + Py_DECREF(exc); + exc = cause; + } + else { + assert(PyException_GetContext(exc) == NULL); + } + _PyErr_SetRaisedException(tstate, exc); +} + + /* Cross-interpreter Buffer Views *******************************************/ /* When a memoryview object is "shared" between interpreters, @@ -320,10 +346,10 @@ _get_current_module_state(void) { PyObject *mod = _get_current_module(); if (mod == NULL) { - // XXX import it? - PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME_STR " module not imported yet"); - return NULL; + mod = PyImport_ImportModule(MODULE_NAME_STR); + if (mod == NULL) { + return NULL; + } } module_state *state = get_module_state(mod); Py_DECREF(mod); @@ -422,76 +448,265 @@ config_from_object(PyObject *configobj, PyInterpreterConfig *config) } +struct interp_call { + _PyXIData_t *func; + _PyXIData_t *args; + _PyXIData_t *kwargs; + struct { + _PyXIData_t func; + _PyXIData_t args; + _PyXIData_t kwargs; + } _preallocated; +}; + +static void +_interp_call_clear(struct interp_call *call) +{ + if (call->func != NULL) { + _PyXIData_Clear(NULL, call->func); + } + if (call->args != NULL) { + _PyXIData_Clear(NULL, call->args); + } + if (call->kwargs != NULL) { + _PyXIData_Clear(NULL, call->kwargs); + } + *call = (struct interp_call){0}; +} + +static int +_interp_call_pack(PyThreadState *tstate, struct interp_call *call, + PyObject *func, PyObject *args, PyObject *kwargs) +{ + xidata_fallback_t fallback = _PyXIDATA_FULL_FALLBACK; + assert(call->func == NULL); + assert(call->args == NULL); + assert(call->kwargs == NULL); + // Handle the func. + if (!PyCallable_Check(func)) { + _PyErr_Format(tstate, PyExc_TypeError, + "expected a callable, got %R", func); + return -1; + } + if (_PyFunction_GetXIData(tstate, func, &call->_preallocated.func) < 0) { + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (_PyPickle_GetXIData(tstate, func, &call->_preallocated.func) < 0) { + _PyErr_SetRaisedException(tstate, exc); + return -1; + } + Py_DECREF(exc); + } + call->func = &call->_preallocated.func; + // Handle the args. + if (args == NULL || args == Py_None) { + // Leave it empty. + } + else { + assert(PyTuple_Check(args)); + if (PyTuple_GET_SIZE(args) > 0) { + if (_PyObject_GetXIData( + tstate, args, fallback, &call->_preallocated.args) < 0) + { + _interp_call_clear(call); + return -1; + } + call->args = &call->_preallocated.args; + } + } + // Handle the kwargs. + if (kwargs == NULL || kwargs == Py_None) { + // Leave it empty. + } + else { + assert(PyDict_Check(kwargs)); + if (PyDict_GET_SIZE(kwargs) > 0) { + if (_PyObject_GetXIData( + tstate, kwargs, fallback, &call->_preallocated.kwargs) < 0) + { + _interp_call_clear(call); + return -1; + } + call->kwargs = &call->_preallocated.kwargs; + } + } + return 0; +} + +static int +_interp_call_unpack(struct interp_call *call, + PyObject **p_func, PyObject **p_args, PyObject **p_kwargs) +{ + // Unpack the func. + PyObject *func = _PyXIData_NewObject(call->func); + if (func == NULL) { + return -1; + } + // Unpack the args. + PyObject *args; + if (call->args == NULL) { + args = PyTuple_New(0); + if (args == NULL) { + Py_DECREF(func); + return -1; + } + } + else { + args = _PyXIData_NewObject(call->args); + if (args == NULL) { + Py_DECREF(func); + return -1; + } + assert(PyTuple_Check(args)); + } + // Unpack the kwargs. + PyObject *kwargs = NULL; + if (call->kwargs != NULL) { + kwargs = _PyXIData_NewObject(call->kwargs); + if (kwargs == NULL) { + Py_DECREF(func); + Py_DECREF(args); + return -1; + } + assert(PyDict_Check(kwargs)); + } + *p_func = func; + *p_args = args; + *p_kwargs = kwargs; + return 0; +} + static int -_run_script(_PyXIData_t *script, PyObject *ns) +_make_call(struct interp_call *call, + PyObject **p_result, _PyXI_errcode *p_errcode) +{ + assert(call != NULL && call->func != NULL); + PyThreadState *tstate = _PyThreadState_GET(); + + // Get the func and args. + PyObject *func = NULL, *args = NULL, *kwargs = NULL; + if (_interp_call_unpack(call, &func, &args, &kwargs) < 0) { + assert(func == NULL); + assert(args == NULL); + assert(kwargs == NULL); + *p_errcode = is_notshareable_raised(tstate) + ? _PyXI_ERR_NOT_SHAREABLE + : _PyXI_ERR_OTHER; + return -1; + } + *p_errcode = _PyXI_ERR_NO_ERROR; + + // Make the call. + PyObject *resobj = PyObject_Call(func, args, kwargs); + Py_DECREF(func); + Py_XDECREF(args); + Py_XDECREF(kwargs); + if (resobj == NULL) { + return -1; + } + *p_result = resobj; + return 0; +} + +static int +_run_script(_PyXIData_t *script, PyObject *ns, _PyXI_errcode *p_errcode) { PyObject *code = _PyXIData_NewObject(script); if (code == NULL) { + *p_errcode = _PyXI_ERR_NOT_SHAREABLE; return -1; } PyObject *result = PyEval_EvalCode(code, ns, ns); Py_DECREF(code); if (result == NULL) { + *p_errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; return -1; } + assert(result == Py_None); Py_DECREF(result); // We throw away the result. return 0; } +struct run_result { + PyObject *result; + PyObject *excinfo; +}; + +static void +_run_result_clear(struct run_result *runres) +{ + Py_CLEAR(runres->result); + Py_CLEAR(runres->excinfo); +} + static int -_exec_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, - _PyXIData_t *script, PyObject *shareables, - PyObject **p_excinfo) +_run_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, + _PyXIData_t *script, struct interp_call *call, + PyObject *shareables, struct run_result *runres) { assert(!_PyErr_Occurred(tstate)); _PyXI_session *session = _PyXI_NewSession(); if (session == NULL) { return -1; } + _PyXI_session_result result = {0}; // Prep and switch interpreters. - if (_PyXI_Enter(session, interp, shareables) < 0) { - if (_PyErr_Occurred(tstate)) { - // If an error occured at this step, it means that interp - // was not prepared and switched. - _PyXI_FreeSession(session); - return -1; - } - // Now, apply the error from another interpreter: - PyObject *excinfo = _PyXI_ApplyCapturedException(session); - if (excinfo != NULL) { - *p_excinfo = excinfo; - } - assert(PyErr_Occurred()); + if (_PyXI_Enter(session, interp, shareables, &result) < 0) { + // If an error occured at this step, it means that interp + // was not prepared and switched. _PyXI_FreeSession(session); + assert(result.excinfo == NULL); return -1; } - // Run the script. + // Run in the interpreter. int res = -1; - PyObject *mainns = _PyXI_GetMainNamespace(session); - if (mainns == NULL) { - goto finally; + _PyXI_errcode errcode = _PyXI_ERR_NO_ERROR; + if (script != NULL) { + assert(call == NULL); + PyObject *mainns = _PyXI_GetMainNamespace(session, &errcode); + if (mainns == NULL) { + goto finally; + } + res = _run_script(script, mainns, &errcode); } - res = _run_script(script, mainns); + else { + assert(call != NULL); + PyObject *resobj; + res = _make_call(call, &resobj, &errcode); + if (res == 0) { + res = _PyXI_Preserve(session, "resobj", resobj, &errcode); + Py_DECREF(resobj); + if (res < 0) { + goto finally; + } + } + } + int exitres; finally: // Clean up and switch back. - _PyXI_Exit(session); + exitres = _PyXI_Exit(session, errcode, &result); + assert(res == 0 || exitres != 0); + _PyXI_FreeSession(session); - // Propagate any exception out to the caller. - assert(!PyErr_Occurred()); - if (res < 0) { - PyObject *excinfo = _PyXI_ApplyCapturedException(session); - if (excinfo != NULL) { - *p_excinfo = excinfo; - } + res = exitres; + if (_PyErr_Occurred(tstate)) { + assert(res < 0); + } + else if (res < 0) { + assert(result.excinfo != NULL); + runres->excinfo = Py_NewRef(result.excinfo); + res = -1; } else { - assert(!_PyXI_HasCapturedException(session)); + assert(result.excinfo == NULL); + runres->result = _PyXI_GetPreserved(&result, "resobj"); + if (_PyErr_Occurred(tstate)) { + res = -1; + } } - - _PyXI_FreeSession(session); + _PyXI_ClearResult(&result); return res; } @@ -842,21 +1057,23 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) } // Prep and switch interpreters, including apply the updates. - if (_PyXI_Enter(session, interp, updates) < 0) { - if (!PyErr_Occurred()) { - _PyXI_ApplyCapturedException(session); - assert(PyErr_Occurred()); - } - else { - assert(!_PyXI_HasCapturedException(session)); - } + if (_PyXI_Enter(session, interp, updates, NULL) < 0) { _PyXI_FreeSession(session); return NULL; } // Clean up and switch back. - _PyXI_Exit(session); + assert(!PyErr_Occurred()); + int res = _PyXI_Exit(session, _PyXI_ERR_NO_ERROR, NULL); _PyXI_FreeSession(session); + assert(res == 0); + if (res < 0) { + // unreachable + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, "unresolved error"); + } + return NULL; + } Py_RETURN_NONE; } @@ -867,23 +1084,16 @@ PyDoc_STRVAR(set___main___attrs_doc, Bind the given attributes in the interpreter's __main__ module."); -static void -unwrap_not_shareable(PyThreadState *tstate) +static PyObject * +_handle_script_error(struct run_result *runres) { - PyObject *exctype = _PyXIData_GetNotShareableErrorType(tstate); - if (!_PyErr_ExceptionMatches(tstate, exctype)) { - return; - } - PyObject *exc = _PyErr_GetRaisedException(tstate); - PyObject *cause = PyException_GetCause(exc); - if (cause != NULL) { - Py_DECREF(exc); - exc = cause; - } - else { - assert(PyException_GetContext(exc) == NULL); + assert(runres->result == NULL); + if (runres->excinfo == NULL) { + assert(PyErr_Occurred()); + return NULL; } - _PyErr_SetRaisedException(tstate, exc); + assert(!PyErr_Occurred()); + return runres->excinfo; } static PyObject * @@ -918,13 +1128,14 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyObject *excinfo = NULL; - int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); + struct run_result runres = {0}; + int res = _run_in_interpreter( + tstate, interp, &xidata, NULL, shared, &runres); _PyXIData_Release(&xidata); if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; + return _handle_script_error(&runres); } + assert(runres.result == NULL); Py_RETURN_NONE; #undef FUNCNAME } @@ -981,13 +1192,14 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyObject *excinfo = NULL; - int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); + struct run_result runres = {0}; + int res = _run_in_interpreter( + tstate, interp, &xidata, NULL, shared, &runres); _PyXIData_Release(&xidata); if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; + return _handle_script_error(&runres); } + assert(runres.result == NULL); Py_RETURN_NONE; #undef FUNCNAME } @@ -1043,13 +1255,14 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyObject *excinfo = NULL; - int res = _exec_in_interpreter(tstate, interp, &xidata, shared, &excinfo); + struct run_result runres = {0}; + int res = _run_in_interpreter( + tstate, interp, &xidata, NULL, shared, &runres); _PyXIData_Release(&xidata); if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; + return _handle_script_error(&runres); } + assert(runres.result == NULL); Py_RETURN_NONE; #undef FUNCNAME } @@ -1069,15 +1282,18 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) #define FUNCNAME MODULE_NAME_STR ".call" PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "callable", "args", "kwargs", - "restrict", NULL}; + "preserve_exc", "restrict", NULL}; PyObject *id, *callable; PyObject *args_obj = NULL; PyObject *kwargs_obj = NULL; + int preserve_exc = 0; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|OO$p:" FUNCNAME, kwlist, - &id, &callable, &args_obj, &kwargs_obj, - &restricted)) + "OO|O!O!$pp:" FUNCNAME, kwlist, + &id, &callable, + &PyTuple_Type, &args_obj, + &PyDict_Type, &kwargs_obj, + &preserve_exc, &restricted)) { return NULL; } @@ -1089,29 +1305,29 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - if (args_obj != NULL) { - _PyErr_SetString(tstate, PyExc_ValueError, "got unexpected args"); - return NULL; - } - if (kwargs_obj != NULL) { - _PyErr_SetString(tstate, PyExc_ValueError, "got unexpected kwargs"); + struct interp_call call = {0}; + if (_interp_call_pack(tstate, &call, callable, args_obj, kwargs_obj) < 0) { return NULL; } - _PyXIData_t xidata = {0}; - if (_PyCode_GetPureScriptXIData(tstate, callable, &xidata) < 0) { - unwrap_not_shareable(tstate); - return NULL; + PyObject *res_and_exc = NULL; + struct run_result runres = {0}; + if (_run_in_interpreter(tstate, interp, NULL, &call, NULL, &runres) < 0) { + if (runres.excinfo == NULL) { + assert(_PyErr_Occurred(tstate)); + goto finally; + } + assert(!_PyErr_Occurred(tstate)); } + assert(runres.result == NULL || runres.excinfo == NULL); + res_and_exc = Py_BuildValue("OO", + (runres.result ? runres.result : Py_None), + (runres.excinfo ? runres.excinfo : Py_None)); - PyObject *excinfo = NULL; - int res = _exec_in_interpreter(tstate, interp, &xidata, NULL, &excinfo); - _PyXIData_Release(&xidata); - if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; - } - Py_RETURN_NONE; +finally: + _interp_call_clear(&call); + _run_result_clear(&runres); + return res_and_exc; #undef FUNCNAME } @@ -1119,13 +1335,7 @@ PyDoc_STRVAR(call_doc, "call(id, callable, args=None, kwargs=None, *, restrict=False)\n\ \n\ Call the provided object in the identified interpreter.\n\ -Pass the given args and kwargs, if possible.\n\ -\n\ -\"callable\" may be a plain function with no free vars that takes\n\ -no arguments.\n\ -\n\ -The function's code object is used and all its state\n\ -is ignored, including its __globals__ dict."); +Pass the given args and kwargs, if possible."); static PyObject * diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 13d91c508c41fa..5e73ab28f2b663 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -70,6 +70,17 @@ runpy_run_path(const char *filename, const char *modname) } +static void +set_exc_with_cause(PyObject *exctype, const char *msg) +{ + PyObject *cause = PyErr_GetRaisedException(); + PyErr_SetString(exctype, msg); + PyObject *exc = PyErr_GetRaisedException(); + PyException_SetCause(exc, cause); + PyErr_SetRaisedException(exc); +} + + static PyObject * pyerr_get_message(PyObject *exc) { @@ -1314,7 +1325,7 @@ _excinfo_normalize_type(struct _excinfo_type *info, } static void -_PyXI_excinfo_Clear(_PyXI_excinfo *info) +_PyXI_excinfo_clear(_PyXI_excinfo *info) { _excinfo_clear_type(&info->type); if (info->msg != NULL) { @@ -1364,7 +1375,7 @@ _PyXI_excinfo_InitFromException(_PyXI_excinfo *info, PyObject *exc) assert(exc != NULL); if (PyErr_GivenExceptionMatches(exc, PyExc_MemoryError)) { - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); return NULL; } const char *failure = NULL; @@ -1410,7 +1421,7 @@ _PyXI_excinfo_InitFromException(_PyXI_excinfo *info, PyObject *exc) error: assert(failure != NULL); - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); return failure; } @@ -1461,7 +1472,7 @@ _PyXI_excinfo_InitFromObject(_PyXI_excinfo *info, PyObject *obj) error: assert(failure != NULL); - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); return failure; } @@ -1656,7 +1667,7 @@ _PyXI_ExcInfoAsObject(_PyXI_excinfo *info) void _PyXI_ClearExcInfo(_PyXI_excinfo *info) { - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); } @@ -1694,6 +1705,14 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) PyErr_SetString(PyExc_InterpreterError, "failed to apply namespace to __main__"); break; + case _PyXI_ERR_PRESERVE_FAILURE: + PyErr_SetString(PyExc_InterpreterError, + "failed to preserve objects across session"); + break; + case _PyXI_ERR_EXC_PROPAGATION_FAILURE: + PyErr_SetString(PyExc_InterpreterError, + "failed to transfer exception between interpreters"); + break; case _PyXI_ERR_NOT_SHAREABLE: _set_xid_lookup_failure(tstate, NULL, NULL, NULL); break; @@ -1743,7 +1762,7 @@ _PyXI_InitError(_PyXI_error *error, PyObject *excobj, _PyXI_errcode code) assert(excobj == NULL); assert(code != _PyXI_ERR_NO_ERROR); error->code = code; - _PyXI_excinfo_Clear(&error->uncaught); + _PyXI_excinfo_clear(&error->uncaught); } return failure; } @@ -1753,7 +1772,7 @@ _PyXI_ApplyError(_PyXI_error *error) { PyThreadState *tstate = PyThreadState_Get(); if (error->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { - // Raise an exception that proxies the propagated exception. + // We will raise an exception that proxies the propagated exception. return _PyXI_excinfo_AsObject(&error->uncaught); } else if (error->code == _PyXI_ERR_NOT_SHAREABLE) { @@ -1839,7 +1858,8 @@ _sharednsitem_has_value(_PyXI_namespace_item *item, int64_t *p_interpid) } static int -_sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) +_sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value, + xidata_fallback_t fallback) { assert(_sharednsitem_is_initialized(item)); assert(item->xidata == NULL); @@ -1848,8 +1868,7 @@ _sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) return -1; } PyThreadState *tstate = PyThreadState_Get(); - // XXX Use _PyObject_GetXIDataWithFallback()? - if (_PyObject_GetXIDataNoFallback(tstate, value, item->xidata) != 0) { + if (_PyObject_GetXIData(tstate, value, fallback, item->xidata) < 0) { PyMem_RawFree(item->xidata); item->xidata = NULL; // The caller may want to propagate PyExc_NotShareableError @@ -1881,7 +1900,8 @@ _sharednsitem_clear(_PyXI_namespace_item *item) } static int -_sharednsitem_copy_from_ns(struct _sharednsitem *item, PyObject *ns) +_sharednsitem_copy_from_ns(struct _sharednsitem *item, PyObject *ns, + xidata_fallback_t fallback) { assert(item->name != NULL); assert(item->xidata == NULL); @@ -1893,7 +1913,7 @@ _sharednsitem_copy_from_ns(struct _sharednsitem *item, PyObject *ns) // When applied, this item will be set to the default (or fail). return 0; } - if (_sharednsitem_set_value(item, value) < 0) { + if (_sharednsitem_set_value(item, value, fallback) < 0) { return -1; } return 0; @@ -2144,18 +2164,21 @@ _create_sharedns(PyObject *names) return NULL; } -static void _propagate_not_shareable_error(_PyXI_session *); +static void _propagate_not_shareable_error(_PyXI_errcode *); static int -_fill_sharedns(_PyXI_namespace *ns, PyObject *nsobj, _PyXI_session *session) +_fill_sharedns(_PyXI_namespace *ns, PyObject *nsobj, + xidata_fallback_t fallback, _PyXI_errcode *p_errcode) { // All items are expected to be shareable. assert(_sharedns_check_counts(ns)); assert(ns->numnames == ns->maxitems); assert(ns->numvalues == 0); for (Py_ssize_t i=0; i < ns->maxitems; i++) { - if (_sharednsitem_copy_from_ns(&ns->items[i], nsobj) < 0) { - _propagate_not_shareable_error(session); + if (_sharednsitem_copy_from_ns(&ns->items[i], nsobj, fallback) < 0) { + if (p_errcode != NULL) { + _propagate_not_shareable_error(p_errcode); + } // Clear out the ones we set so far. for (Py_ssize_t j=0; j < i; j++) { _sharednsitem_clear_value(&ns->items[j]); @@ -2221,6 +2244,18 @@ _apply_sharedns(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) /* switched-interpreter sessions */ /*********************************/ +struct xi_session_error { + // This is set if the interpreter is entered and raised an exception + // that needs to be handled in some special way during exit. + _PyXI_errcode *override; + // This is set if exit captured an exception to propagate. + _PyXI_error *info; + + // -- pre-allocated memory -- + _PyXI_error _info; + _PyXI_errcode _override; +}; + struct xi_session { #define SESSION_UNUSED 0 #define SESSION_ACTIVE 1 @@ -2249,18 +2284,14 @@ struct xi_session { // beginning of the session as a convenience. PyObject *main_ns; - // This is set if the interpreter is entered and raised an exception - // that needs to be handled in some special way during exit. - _PyXI_errcode *error_override; - // This is set if exit captured an exception to propagate. - _PyXI_error *error; + // This is a dict of objects that will be available (via sharing) + // once the session exits. Do not access this directly; use + // _PyXI_Preserve() and _PyXI_GetPreserved() instead; + PyObject *_preserved; - // -- pre-allocated memory -- - _PyXI_error _error; - _PyXI_errcode _error_override; + struct xi_session_error error; }; - _PyXI_session * _PyXI_NewSession(void) { @@ -2286,9 +2317,25 @@ _session_is_active(_PyXI_session *session) return session->status == SESSION_ACTIVE; } -static int _ensure_main_ns(_PyXI_session *); +static int +_session_pop_error(_PyXI_session *session, struct xi_session_error *err) +{ + if (session->error.info == NULL) { + assert(session->error.override == NULL); + *err = (struct xi_session_error){0}; + return 0; + } + *err = session->error; + err->info = &err->_info; + if (err->override != NULL) { + err->override = &err->_override; + } + session->error = (struct xi_session_error){0}; + return 1; +} + +static int _ensure_main_ns(_PyXI_session *, _PyXI_errcode *); static inline void _session_set_error(_PyXI_session *, _PyXI_errcode); -static void _capture_current_exception(_PyXI_session *); /* enter/exit a cross-interpreter session */ @@ -2305,9 +2352,9 @@ _enter_session(_PyXI_session *session, PyInterpreterState *interp) assert(!session->running); assert(session->main_ns == NULL); // Set elsewhere and cleared in _capture_current_exception(). - assert(session->error_override == NULL); - // Set elsewhere and cleared in _PyXI_ApplyCapturedException(). - assert(session->error == NULL); + assert(session->error.override == NULL); + // Set elsewhere and cleared in _PyXI_Exit(). + assert(session->error.info == NULL); // Switch to interpreter. PyThreadState *tstate = PyThreadState_Get(); @@ -2336,14 +2383,16 @@ _exit_session(_PyXI_session *session) PyThreadState *tstate = session->init_tstate; assert(tstate != NULL); assert(PyThreadState_Get() == tstate); + assert(!_PyErr_Occurred(tstate)); // Release any of the entered interpreters resources. Py_CLEAR(session->main_ns); + Py_CLEAR(session->_preserved); // Ensure this thread no longer owns __main__. if (session->running) { _PyInterpreterState_SetNotRunningMain(tstate->interp); - assert(!PyErr_Occurred()); + assert(!_PyErr_Occurred(tstate)); session->running = 0; } @@ -2360,21 +2409,16 @@ _exit_session(_PyXI_session *session) assert(!session->own_init_tstate); } - // For now the error data persists past the exit. - *session = (_PyXI_session){ - .error_override = session->error_override, - .error = session->error, - ._error = session->_error, - ._error_override = session->_error_override, - }; + assert(session->error.info == NULL); + assert(session->error.override == _PyXI_ERR_NO_ERROR); + + *session = (_PyXI_session){0}; } static void -_propagate_not_shareable_error(_PyXI_session *session) +_propagate_not_shareable_error(_PyXI_errcode *p_errcode) { - if (session == NULL) { - return; - } + assert(p_errcode != NULL); PyThreadState *tstate = PyThreadState_Get(); PyObject *exctype = get_notshareableerror_type(tstate); if (exctype == NULL) { @@ -2384,46 +2428,46 @@ _propagate_not_shareable_error(_PyXI_session *session) } if (PyErr_ExceptionMatches(exctype)) { // We want to propagate the exception directly. - _session_set_error(session, _PyXI_ERR_NOT_SHAREABLE); + *p_errcode = _PyXI_ERR_NOT_SHAREABLE; } } -PyObject * -_PyXI_ApplyCapturedException(_PyXI_session *session) -{ - assert(!PyErr_Occurred()); - assert(session->error != NULL); - PyObject *res = _PyXI_ApplyError(session->error); - assert((res == NULL) != (PyErr_Occurred() == NULL)); - session->error = NULL; - return res; -} - -int -_PyXI_HasCapturedException(_PyXI_session *session) -{ - return session->error != NULL; -} - int _PyXI_Enter(_PyXI_session *session, - PyInterpreterState *interp, PyObject *nsupdates) + PyInterpreterState *interp, PyObject *nsupdates, + _PyXI_session_result *result) { // Convert the attrs for cross-interpreter use. _PyXI_namespace *sharedns = NULL; if (nsupdates != NULL) { Py_ssize_t len = PyDict_Size(nsupdates); if (len < 0) { + if (result != NULL) { + result->errcode = _PyXI_ERR_APPLY_NS_FAILURE; + } return -1; } if (len > 0) { sharedns = _create_sharedns(nsupdates); if (sharedns == NULL) { + if (result != NULL) { + result->errcode = _PyXI_ERR_APPLY_NS_FAILURE; + } return -1; } - if (_fill_sharedns(sharedns, nsupdates, NULL) < 0) { - assert(session->error == NULL); + // For now we limit it to shareable objects. + xidata_fallback_t fallback = _PyXIDATA_XIDATA_ONLY; + _PyXI_errcode errcode = _PyXI_ERR_NO_ERROR; + if (_fill_sharedns(sharedns, nsupdates, fallback, &errcode) < 0) { + assert(PyErr_Occurred()); + assert(session->error.info == NULL); + if (errcode == _PyXI_ERR_NO_ERROR) { + errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; + } _destroy_sharedns(sharedns); + if (result != NULL) { + result->errcode = errcode; + } return -1; } } @@ -2445,8 +2489,7 @@ _PyXI_Enter(_PyXI_session *session, // Apply the cross-interpreter data. if (sharedns != NULL) { - if (_ensure_main_ns(session) < 0) { - errcode = _PyXI_ERR_MAIN_NS_FAILURE; + if (_ensure_main_ns(session, &errcode) < 0) { goto error; } if (_apply_sharedns(sharedns, session->main_ns, NULL) < 0) { @@ -2462,19 +2505,124 @@ _PyXI_Enter(_PyXI_session *session, error: // We want to propagate all exceptions here directly (best effort). + assert(errcode != _PyXI_ERR_NO_ERROR); _session_set_error(session, errcode); + assert(!PyErr_Occurred()); + + // Exit the session. + struct xi_session_error err; + (void)_session_pop_error(session, &err); _exit_session(session); + if (sharedns != NULL) { _destroy_sharedns(sharedns); } + + // Apply the error from the other interpreter. + PyObject *excinfo = _PyXI_ApplyError(err.info); + _PyXI_excinfo_clear(&err.info->uncaught); + if (excinfo != NULL) { + if (result != NULL) { + result->excinfo = excinfo; + } + else { +#ifdef Py_DEBUG + fprintf(stderr, "_PyXI_Enter(): uncaught exception discarded"); +#endif + } + } + assert(PyErr_Occurred()); + return -1; } -void -_PyXI_Exit(_PyXI_session *session) +static int _pop_preserved(_PyXI_session *, _PyXI_namespace **, PyObject **, + _PyXI_errcode *); +static int _finish_preserved(_PyXI_namespace *, PyObject **); + +int +_PyXI_Exit(_PyXI_session *session, _PyXI_errcode errcode, + _PyXI_session_result *result) { - _capture_current_exception(session); + int res = 0; + + // Capture the raised exception, if any. + assert(session->error.info == NULL); + if (PyErr_Occurred()) { + _session_set_error(session, errcode); + assert(!PyErr_Occurred()); + } + else { + assert(errcode == _PyXI_ERR_NO_ERROR); + assert(session->error.override == NULL); + } + + // Capture the preserved namespace. + _PyXI_namespace *preserved = NULL; + PyObject *preservedobj = NULL; + if (result != NULL) { + errcode = _PyXI_ERR_NO_ERROR; + if (_pop_preserved(session, &preserved, &preservedobj, &errcode) < 0) { + if (session->error.info != NULL) { + // XXX Chain the exception (i.e. set __context__)? + PyErr_FormatUnraisable( + "Exception ignored while capturing preserved objects"); + } + else { + _session_set_error(session, errcode); + } + } + } + + // Exit the session. + struct xi_session_error err; + (void)_session_pop_error(session, &err); _exit_session(session); + + // Restore the preserved namespace. + assert(preserved == NULL || preservedobj == NULL); + if (_finish_preserved(preserved, &preservedobj) < 0) { + assert(preservedobj == NULL); + if (err.info != NULL) { + // XXX Chain the exception (i.e. set __context__)? + PyErr_FormatUnraisable( + "Exception ignored while capturing preserved objects"); + } + else { + errcode = _PyXI_ERR_PRESERVE_FAILURE; + _propagate_not_shareable_error(&errcode); + } + } + if (result != NULL) { + result->preserved = preservedobj; + result->errcode = errcode; + } + + // Apply the error from the other interpreter, if any. + if (err.info != NULL) { + res = -1; + assert(!PyErr_Occurred()); + PyObject *excinfo = _PyXI_ApplyError(err.info); + _PyXI_excinfo_clear(&err.info->uncaught); + if (excinfo == NULL) { + assert(PyErr_Occurred()); + if (result != NULL) { + _PyXI_ClearResult(result); + *result = (_PyXI_session_result){ + .errcode = _PyXI_ERR_EXC_PROPAGATION_FAILURE, + }; + } + } + else if (result != NULL) { + result->excinfo = excinfo; + } + else { +#ifdef Py_DEBUG + fprintf(stderr, "_PyXI_Exit(): uncaught exception discarded"); +#endif + } + } + return res; } @@ -2483,15 +2631,15 @@ _PyXI_Exit(_PyXI_session *session) static void _capture_current_exception(_PyXI_session *session) { - assert(session->error == NULL); + assert(session->error.info == NULL); if (!PyErr_Occurred()) { - assert(session->error_override == NULL); + assert(session->error.override == NULL); return; } // Handle the exception override. - _PyXI_errcode *override = session->error_override; - session->error_override = NULL; + _PyXI_errcode *override = session->error.override; + session->error.override = NULL; _PyXI_errcode errcode = override != NULL ? *override : _PyXI_ERR_UNCAUGHT_EXCEPTION; @@ -2514,7 +2662,7 @@ _capture_current_exception(_PyXI_session *session) } // Capture the exception. - _PyXI_error *err = &session->_error; + _PyXI_error *err = &session->error._info; *err = (_PyXI_error){ .interp = session->init_tstate->interp, }; @@ -2541,7 +2689,7 @@ _capture_current_exception(_PyXI_session *session) // Finished! assert(!PyErr_Occurred()); - session->error = err; + session->error.info = err; } static inline void @@ -2549,15 +2697,19 @@ _session_set_error(_PyXI_session *session, _PyXI_errcode errcode) { assert(_session_is_active(session)); assert(PyErr_Occurred()); + if (errcode == _PyXI_ERR_NO_ERROR) { + // We're a bit forgiving here. + errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; + } if (errcode != _PyXI_ERR_UNCAUGHT_EXCEPTION) { - session->_error_override = errcode; - session->error_override = &session->_error_override; + session->error._override = errcode; + session->error.override = &session->error._override; } _capture_current_exception(session); } static int -_ensure_main_ns(_PyXI_session *session) +_ensure_main_ns(_PyXI_session *session, _PyXI_errcode *p_errcode) { assert(_session_is_active(session)); if (session->main_ns != NULL) { @@ -2566,11 +2718,17 @@ _ensure_main_ns(_PyXI_session *session) // Cache __main__.__dict__. PyObject *main_mod = _Py_GetMainModule(session->init_tstate); if (_Py_CheckMainModule(main_mod) < 0) { + if (p_errcode != NULL) { + *p_errcode = _PyXI_ERR_MAIN_NS_FAILURE; + } return -1; } PyObject *ns = PyModule_GetDict(main_mod); // borrowed Py_DECREF(main_mod); if (ns == NULL) { + if (p_errcode != NULL) { + *p_errcode = _PyXI_ERR_MAIN_NS_FAILURE; + } return -1; } session->main_ns = Py_NewRef(ns); @@ -2578,21 +2736,150 @@ _ensure_main_ns(_PyXI_session *session) } PyObject * -_PyXI_GetMainNamespace(_PyXI_session *session) +_PyXI_GetMainNamespace(_PyXI_session *session, _PyXI_errcode *p_errcode) { if (!_session_is_active(session)) { PyErr_SetString(PyExc_RuntimeError, "session not active"); return NULL; } - if (_ensure_main_ns(session) < 0) { - _session_set_error(session, _PyXI_ERR_MAIN_NS_FAILURE); - _capture_current_exception(session); + if (_ensure_main_ns(session, p_errcode) < 0) { return NULL; } return session->main_ns; } +static int +_pop_preserved(_PyXI_session *session, + _PyXI_namespace **p_xidata, PyObject **p_obj, + _PyXI_errcode *p_errcode) +{ + assert(_PyThreadState_GET() == session->init_tstate); // active session + if (session->_preserved == NULL) { + *p_xidata = NULL; + *p_obj = NULL; + return 0; + } + if (session->init_tstate == session->prev_tstate) { + // We did not switch interpreters. + *p_xidata = NULL; + *p_obj = session->_preserved; + session->_preserved = NULL; + return 0; + } + *p_obj = NULL; + + // We did switch interpreters. + Py_ssize_t len = PyDict_Size(session->_preserved); + if (len < 0) { + if (p_errcode != NULL) { + *p_errcode = _PyXI_ERR_PRESERVE_FAILURE; + } + return -1; + } + else if (len == 0) { + *p_xidata = NULL; + } + else { + _PyXI_namespace *xidata = _create_sharedns(session->_preserved); + if (xidata == NULL) { + if (p_errcode != NULL) { + *p_errcode = _PyXI_ERR_PRESERVE_FAILURE; + } + return -1; + } + _PyXI_errcode errcode = _PyXI_ERR_NO_ERROR; + if (_fill_sharedns(xidata, session->_preserved, + _PyXIDATA_FULL_FALLBACK, &errcode) < 0) + { + assert(session->error.info == NULL); + if (errcode != _PyXI_ERR_NOT_SHAREABLE) { + errcode = _PyXI_ERR_PRESERVE_FAILURE; + } + if (p_errcode != NULL) { + *p_errcode = errcode; + } + _destroy_sharedns(xidata); + return -1; + } + *p_xidata = xidata; + } + Py_CLEAR(session->_preserved); + return 0; +} + +static int +_finish_preserved(_PyXI_namespace *xidata, PyObject **p_preserved) +{ + if (xidata == NULL) { + return 0; + } + int res = -1; + if (p_preserved != NULL) { + PyObject *ns = PyDict_New(); + if (ns == NULL) { + goto finally; + } + if (_apply_sharedns(xidata, ns, NULL) < 0) { + Py_CLEAR(ns); + goto finally; + } + *p_preserved = ns; + } + res = 0; + +finally: + _destroy_sharedns(xidata); + return res; +} + +int +_PyXI_Preserve(_PyXI_session *session, const char *name, PyObject *value, + _PyXI_errcode *p_errcode) +{ + if (!_session_is_active(session)) { + PyErr_SetString(PyExc_RuntimeError, "session not active"); + return -1; + } + if (session->_preserved == NULL) { + session->_preserved = PyDict_New(); + if (session->_preserved == NULL) { + set_exc_with_cause(PyExc_RuntimeError, + "failed to initialize preserved objects"); + if (p_errcode != NULL) { + *p_errcode = _PyXI_ERR_PRESERVE_FAILURE; + } + return -1; + } + } + if (PyDict_SetItemString(session->_preserved, name, value) < 0) { + set_exc_with_cause(PyExc_RuntimeError, "failed to preserve object"); + if (p_errcode != NULL) { + *p_errcode = _PyXI_ERR_PRESERVE_FAILURE; + } + return -1; + } + return 0; +} + +PyObject * +_PyXI_GetPreserved(_PyXI_session_result *result, const char *name) +{ + PyObject *value = NULL; + if (result->preserved != NULL) { + (void)PyDict_GetItemStringRef(result->preserved, name, &value); + } + return value; +} + +void +_PyXI_ClearResult(_PyXI_session_result *result) +{ + Py_CLEAR(result->preserved); + Py_CLEAR(result->excinfo); +} + + /*********************/ /* runtime lifecycle */ /*********************/ diff --git a/Python/import.c b/Python/import.c index 98557991378e05..184dede335dfd6 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3964,8 +3964,10 @@ PyImport_Import(PyObject *module_name) if (globals != NULL) { Py_INCREF(globals); builtins = PyObject_GetItem(globals, &_Py_ID(__builtins__)); - if (builtins == NULL) + if (builtins == NULL) { + // XXX Fall back to interp->builtins or sys.modules['builtins']? goto err; + } } else { /* No globals -- use standard builtins, and fake globals */ From 1a89991d2362867a9127e151376135615bc92a4c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 30 May 2025 18:52:36 +0300 Subject: [PATCH 422/989] gh-134733: Fix documentation for the show_empty option of ast.dump() (GH-134925) Optional None values are always omitted. --- Doc/library/ast.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index cf22250cac6091..ca0654acb33689 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -2445,8 +2445,9 @@ and classes for traversing abstract syntax trees: indents that many spaces per level. If *indent* is a string (such as ``"\t"``), that string is used to indent each level. - If *show_empty* is ``False`` (the default), empty lists and fields that are ``None`` - will be omitted from the output. + If *show_empty* is false (the default), optional empty lists will be + omitted from the output. + Optional ``None`` values are always omitted. .. versionchanged:: 3.9 Added the *indent* option. From 98a5b830d2463351800f4d76edba1a306a3e0ec9 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 30 May 2025 10:46:16 -0700 Subject: [PATCH 423/989] .gitignore personal Claude Code configs (#134942) .gitignore personal Claude Code configs. https://docs.anthropic.com/en/docs/claude-code/memory --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 2a6f249275c32e..a2542eaf7b698a 100644 --- a/.gitignore +++ b/.gitignore @@ -173,3 +173,7 @@ Python/frozen_modules/MANIFEST # main branch only: ABI files are not checked/maintained. Doc/data/python*.abi + +# People's custom https://docs.anthropic.com/en/docs/claude-code/memory configs. +/.claude/ +CLAUDE.local.md From 310c8cd5e5dcb0fb9509e08c0d5cf32075416878 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 30 May 2025 11:28:14 -0700 Subject: [PATCH 424/989] rearrange my gitingore addition w/comment to make backporting easier (#134945) --- .gitignore | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index a2542eaf7b698a..cdb0352e0a8836 100644 --- a/.gitignore +++ b/.gitignore @@ -171,9 +171,10 @@ Python/frozen_modules/MANIFEST /python !/Python/ -# main branch only: ABI files are not checked/maintained. -Doc/data/python*.abi - # People's custom https://docs.anthropic.com/en/docs/claude-code/memory configs. /.claude/ CLAUDE.local.md + +#### main branch only stuff below this line, things to backport go above. #### +# main branch only: ABI files are not checked/maintained. +Doc/data/python*.abi From 8865b4f95b32097099d252111669b88ec7c1eb7f Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 30 May 2025 19:37:29 +0100 Subject: [PATCH 425/989] gh-134923: Use /GENPROFILE and /USEPROFILE for Windows PGO builds (GH-134924) --- .../2025-05-30-11-02-30.gh-issue-134923.gBkRg4.rst | 3 +++ PCbuild/_testclinic_limited.vcxproj | 1 + PCbuild/pyproject.props | 11 ++++------- 3 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-05-30-11-02-30.gh-issue-134923.gBkRg4.rst diff --git a/Misc/NEWS.d/next/Build/2025-05-30-11-02-30.gh-issue-134923.gBkRg4.rst b/Misc/NEWS.d/next/Build/2025-05-30-11-02-30.gh-issue-134923.gBkRg4.rst new file mode 100644 index 00000000000000..a742a6add8ae93 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-05-30-11-02-30.gh-issue-134923.gBkRg4.rst @@ -0,0 +1,3 @@ +Windows builds with profile-guided optimization enabled now use +``/GENPROFILE`` and ``/USEPROFILE`` instead of deprecated ``/LTCG:`` +options. diff --git a/PCbuild/_testclinic_limited.vcxproj b/PCbuild/_testclinic_limited.vcxproj index 183a55080e8693..95c205309b1f30 100644 --- a/PCbuild/_testclinic_limited.vcxproj +++ b/PCbuild/_testclinic_limited.vcxproj @@ -70,6 +70,7 @@ {01FDF29A-40A1-46DF-84F5-85EBBD2A2410} _testclinic_limited Win32Proj + false diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index 7272542e13a5ca..ce51e342241686 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -96,19 +96,16 @@ MachineX64 MachineARM MachineARM64 - $(OutDir)$(TargetName).pgd - UseLinkTimeCodeGeneration - PGInstrument - PGUpdate + UseLinkTimeCodeGeneration advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;%(AdditionalDependencies) /OPT:REF,NOICF %(AdditionalOptions) -d2:-pattern-opt-disable:-932189325 %(AdditionalOptions) + /GENPROFILE %(AdditionalOptions) + /USEPROFILE %(AdditionalOptions) false - true - true - true + true $(PySourcePath)PC;$(PySourcePath)Include;$(IntDir);%(AdditionalIncludeDirectories) From 8e8786f8986353e20c1c4406c34409a6139fa073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Kiss=20Koll=C3=A1r?= Date: Sat, 31 May 2025 03:32:36 +0300 Subject: [PATCH 426/989] gh-91048: Reorder result tuple of parse_code_object (#134898) Reorder result tuple of parse_code_object The standard followed by APIs like pstat.Stats is to take a file, line, function triplet. The parse_code_object function (and callers exposing this in Python like RemoteUnwinder.get_stack_trace) return function, file, line triplets which requires the caller to reorder these when using it in classes like pstat.Stats. --- Lib/test/test_external_inspection.py | 76 ++++++++++++++-------------- Modules/_remote_debugging_module.c | 8 +-- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 2b4b63a030b1af..303af25fc7a715 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -114,17 +114,17 @@ def foo(): p.wait(timeout=SHORT_TIMEOUT) thread_expected_stack_trace = [ - ("foo", script_name, 15), - ("baz", script_name, 12), - ("bar", script_name, 9), - ('Thread.run', threading.__file__, ANY) + (script_name, 15, "foo"), + (script_name, 12, "baz"), + (script_name, 9, "bar"), + (threading.__file__, ANY, 'Thread.run') ] # Is possible that there are more threads, so we check that the # expected stack traces are in the result (looking at you Windows!) self.assertIn((ANY, thread_expected_stack_trace), stack_trace) # Check that the main thread stack trace is in the result - frame = ("", script_name, 19) + frame = (script_name, 19, "") for _, stack in stack_trace: if frame in stack: break @@ -222,47 +222,47 @@ def new_eager_loop(): root_task = "Task-1" expected_stack_trace = [ [ - ("c5", script_name, 10), - ("c4", script_name, 14), - ("c3", script_name, 17), - ("c2", script_name, 20), + (script_name, 10, "c5"), + (script_name, 14, "c4"), + (script_name, 17, "c3"), + (script_name, 20, "c2"), ], "c2_root", [ [ [ ( - "TaskGroup._aexit", taskgroups.__file__, ANY, + "TaskGroup._aexit" ), ( - "TaskGroup.__aexit__", taskgroups.__file__, ANY, + "TaskGroup.__aexit__" ), - ("main", script_name, 26), + (script_name, 26, "main"), ], "Task-1", [], ], [ - [("c1", script_name, 23)], + [(script_name, 23, "c1")], "sub_main_1", [ [ [ ( - "TaskGroup._aexit", taskgroups.__file__, ANY, + "TaskGroup._aexit" ), ( - "TaskGroup.__aexit__", taskgroups.__file__, ANY, + "TaskGroup.__aexit__" ), - ("main", script_name, 26), + (script_name, 26, "main"), ], "Task-1", [], @@ -270,22 +270,22 @@ def new_eager_loop(): ], ], [ - [("c1", script_name, 23)], + [(script_name, 23, "c1")], "sub_main_2", [ [ [ ( - "TaskGroup._aexit", taskgroups.__file__, ANY, + "TaskGroup._aexit" ), ( - "TaskGroup.__aexit__", taskgroups.__file__, ANY, + "TaskGroup.__aexit__" ), - ("main", script_name, 26), + (script_name, 26, "main"), ], "Task-1", [], @@ -363,9 +363,9 @@ async def main(): expected_stack_trace = [ [ - ("gen_nested_call", script_name, 10), - ("gen", script_name, 16), - ("main", script_name, 19), + (script_name, 10, "gen_nested_call"), + (script_name, 16, "gen"), + (script_name, 19, "main"), ], "Task-1", [], @@ -439,9 +439,9 @@ async def main(): stack_trace[2].sort(key=lambda x: x[1]) expected_stack_trace = [ - [("deep", script_name, 11), ("c1", script_name, 15)], + [(script_name, 11, "deep"), (script_name, 15, "c1")], "Task-2", - [[[("main", script_name, 21)], "Task-1", []]], + [[[(script_name, 21, "main")], "Task-1", []]], ] self.assertEqual(stack_trace, expected_stack_trace) @@ -515,16 +515,16 @@ async def main(): stack_trace[2].sort(key=lambda x: x[1]) expected_stack_trace = [ [ - ("deep", script_name, 11), - ("c1", script_name, 15), - ("staggered_race..run_one_coro", staggered.__file__, ANY), + (script_name, 11, "deep"), + (script_name, 15, "c1"), + (staggered.__file__, ANY, "staggered_race..run_one_coro"), ], "Task-2", [ [ [ - ("staggered_race", staggered.__file__, ANY), - ("main", script_name, 21), + (staggered.__file__, ANY, "staggered_race"), + (script_name, 21, "main"), ], "Task-1", [], @@ -662,16 +662,16 @@ async def main(): self.assertIn((ANY, "Task-1", []), entries) main_stack = [ ( - "TaskGroup._aexit", taskgroups.__file__, ANY, + "TaskGroup._aexit", ), ( - "TaskGroup.__aexit__", taskgroups.__file__, ANY, + "TaskGroup.__aexit__", ), - ("main", script_name, 60), + (script_name, 60, "main"), ] self.assertIn( (ANY, "server task", [[main_stack, ANY]]), @@ -686,16 +686,16 @@ async def main(): [ [ ( - "TaskGroup._aexit", taskgroups.__file__, ANY, + "TaskGroup._aexit", ), ( - "TaskGroup.__aexit__", taskgroups.__file__, ANY, + "TaskGroup.__aexit__", ), - ("echo_client_spam", script_name, 41), + (script_name, 41, "echo_client_spam"), ], ANY, ] @@ -741,14 +741,14 @@ def test_self_trace(self): stack[:2], [ ( - "get_stack_trace", __file__, get_stack_trace.__code__.co_firstlineno + 2, + "get_stack_trace", ), ( - "TestGetStackTrace.test_self_trace", __file__, self.test_self_trace.__code__.co_firstlineno + 6, + "TestGetStackTrace.test_self_trace", ), ] ) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 86e269f31246e5..ea58f38006e199 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -1562,9 +1562,9 @@ parse_code_object(RemoteUnwinderObject *unwinder, Py_INCREF(meta->func_name); Py_INCREF(meta->file_name); - PyTuple_SET_ITEM(tuple, 0, meta->func_name); - PyTuple_SET_ITEM(tuple, 1, meta->file_name); - PyTuple_SET_ITEM(tuple, 2, lineno); + PyTuple_SET_ITEM(tuple, 0, meta->file_name); + PyTuple_SET_ITEM(tuple, 1, lineno); + PyTuple_SET_ITEM(tuple, 2, meta->func_name); *result = tuple; return 0; @@ -2921,4 +2921,4 @@ PyMODINIT_FUNC PyInit__remote_debugging(void) { return PyModuleDef_Init(&remote_debugging_module); -} \ No newline at end of file +} From 4d31d19a1df0a6e658e6a320cde8355f5f6ea27b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 31 May 2025 10:32:53 +0300 Subject: [PATCH 427/989] gh-134718: Omit optional Load() values in ast.dump() (GH-134934) --- Doc/library/ast.rst | 296 ++++++++---------- Lib/ast.py | 6 + Lib/test/test_ast/test_ast.py | 71 ++--- ...-05-30-18-13-48.gh-issue-134718.5FEspx.rst | 1 + 4 files changed, 177 insertions(+), 197 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-30-18-13-48.gh-issue-134718.5FEspx.rst diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index ca0654acb33689..ef6c62dca1e124 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -252,12 +252,11 @@ Root nodes >>> print(ast.dump(ast.parse('(int, str) -> List[int]', mode='func_type'), indent=4)) FunctionType( argtypes=[ - Name(id='int', ctx=Load()), - Name(id='str', ctx=Load())], + Name(id='int'), + Name(id='str')], returns=Subscript( - value=Name(id='List', ctx=Load()), - slice=Name(id='int', ctx=Load()), - ctx=Load())) + value=Name(id='List'), + slice=Name(id='int'))) .. versionadded:: 3.8 @@ -312,14 +311,14 @@ Literals values=[ Constant(value='sin('), FormattedValue( - value=Name(id='a', ctx=Load()), + value=Name(id='a'), conversion=-1), Constant(value=') is '), FormattedValue( value=Call( - func=Name(id='sin', ctx=Load()), + func=Name(id='sin'), args=[ - Name(id='a', ctx=Load())]), + Name(id='a')]), conversion=-1, format_spec=JoinedStr( values=[ @@ -341,16 +340,14 @@ Literals elts=[ Constant(value=1), Constant(value=2), - Constant(value=3)], - ctx=Load())) + Constant(value=3)])) >>> print(ast.dump(ast.parse('(1, 2, 3)', mode='eval'), indent=4)) Expression( body=Tuple( elts=[ Constant(value=1), Constant(value=2), - Constant(value=3)], - ctx=Load())) + Constant(value=3)])) .. class:: Set(elts) @@ -388,7 +385,7 @@ Literals None], values=[ Constant(value=1), - Name(id='d', ctx=Load())])) + Name(id='d')])) Variables @@ -414,7 +411,7 @@ Variables Module( body=[ Expr( - value=Name(id='a', ctx=Load()))]) + value=Name(id='a'))]) >>> print(ast.dump(ast.parse('a = 1'), indent=4)) Module( @@ -452,7 +449,7 @@ Variables value=Name(id='b', ctx=Store()), ctx=Store())], ctx=Store())], - value=Name(id='it', ctx=Load()))]) + value=Name(id='it'))]) .. _ast-expressions: @@ -475,7 +472,7 @@ Expressions Expr( value=UnaryOp( op=USub(), - operand=Name(id='a', ctx=Load())))]) + operand=Name(id='a')))]) .. class:: UnaryOp(op, operand) @@ -498,7 +495,7 @@ Expressions Expression( body=UnaryOp( op=Not(), - operand=Name(id='x', ctx=Load()))) + operand=Name(id='x'))) .. class:: BinOp(left, op, right) @@ -511,9 +508,9 @@ Expressions >>> print(ast.dump(ast.parse('x + y', mode='eval'), indent=4)) Expression( body=BinOp( - left=Name(id='x', ctx=Load()), + left=Name(id='x'), op=Add(), - right=Name(id='y', ctx=Load()))) + right=Name(id='y'))) .. class:: Add @@ -549,8 +546,8 @@ Expressions body=BoolOp( op=Or(), values=[ - Name(id='x', ctx=Load()), - Name(id='y', ctx=Load())])) + Name(id='x'), + Name(id='y')])) .. class:: And @@ -575,7 +572,7 @@ Expressions LtE(), Lt()], comparators=[ - Name(id='a', ctx=Load()), + Name(id='a'), Constant(value=10)])) @@ -609,18 +606,17 @@ Expressions >>> print(ast.dump(ast.parse('func(a, b=c, *d, **e)', mode='eval'), indent=4)) Expression( body=Call( - func=Name(id='func', ctx=Load()), + func=Name(id='func'), args=[ - Name(id='a', ctx=Load()), + Name(id='a'), Starred( - value=Name(id='d', ctx=Load()), - ctx=Load())], + value=Name(id='d'))], keywords=[ keyword( arg='b', - value=Name(id='c', ctx=Load())), + value=Name(id='c')), keyword( - value=Name(id='e', ctx=Load()))])) + value=Name(id='e'))])) .. class:: keyword(arg, value) @@ -639,9 +635,9 @@ Expressions >>> print(ast.dump(ast.parse('a if b else c', mode='eval'), indent=4)) Expression( body=IfExp( - test=Name(id='b', ctx=Load()), - body=Name(id='a', ctx=Load()), - orelse=Name(id='c', ctx=Load()))) + test=Name(id='b'), + body=Name(id='a'), + orelse=Name(id='c'))) .. class:: Attribute(value, attr, ctx) @@ -656,9 +652,8 @@ Expressions >>> print(ast.dump(ast.parse('snake.colour', mode='eval'), indent=4)) Expression( body=Attribute( - value=Name(id='snake', ctx=Load()), - attr='colour', - ctx=Load())) + value=Name(id='snake'), + attr='colour')) .. class:: NamedExpr(target, value) @@ -694,15 +689,13 @@ Subscripting >>> print(ast.dump(ast.parse('l[1:2, 3]', mode='eval'), indent=4)) Expression( body=Subscript( - value=Name(id='l', ctx=Load()), + value=Name(id='l'), slice=Tuple( elts=[ Slice( lower=Constant(value=1), upper=Constant(value=2)), - Constant(value=3)], - ctx=Load()), - ctx=Load())) + Constant(value=3)]))) .. class:: Slice(lower, upper, step) @@ -716,11 +709,10 @@ Subscripting >>> print(ast.dump(ast.parse('l[1:2]', mode='eval'), indent=4)) Expression( body=Subscript( - value=Name(id='l', ctx=Load()), + value=Name(id='l'), slice=Slice( lower=Constant(value=1), - upper=Constant(value=2)), - ctx=Load())) + upper=Constant(value=2)))) Comprehensions @@ -745,11 +737,11 @@ Comprehensions ... )) Expression( body=ListComp( - elt=Name(id='x', ctx=Load()), + elt=Name(id='x'), generators=[ comprehension( target=Name(id='x', ctx=Store()), - iter=Name(id='numbers', ctx=Load()), + iter=Name(id='numbers'), is_async=0)])) >>> print(ast.dump( ... ast.parse('{x: x**2 for x in numbers}', mode='eval'), @@ -757,15 +749,15 @@ Comprehensions ... )) Expression( body=DictComp( - key=Name(id='x', ctx=Load()), + key=Name(id='x'), value=BinOp( - left=Name(id='x', ctx=Load()), + left=Name(id='x'), op=Pow(), right=Constant(value=2)), generators=[ comprehension( target=Name(id='x', ctx=Store()), - iter=Name(id='numbers', ctx=Load()), + iter=Name(id='numbers'), is_async=0)])) >>> print(ast.dump( ... ast.parse('{x for x in numbers}', mode='eval'), @@ -773,11 +765,11 @@ Comprehensions ... )) Expression( body=SetComp( - elt=Name(id='x', ctx=Load()), + elt=Name(id='x'), generators=[ comprehension( target=Name(id='x', ctx=Store()), - iter=Name(id='numbers', ctx=Load()), + iter=Name(id='numbers'), is_async=0)])) @@ -798,17 +790,17 @@ Comprehensions Expression( body=ListComp( elt=Call( - func=Name(id='ord', ctx=Load()), + func=Name(id='ord'), args=[ - Name(id='c', ctx=Load())]), + Name(id='c')]), generators=[ comprehension( target=Name(id='line', ctx=Store()), - iter=Name(id='file', ctx=Load()), + iter=Name(id='file'), is_async=0), comprehension( target=Name(id='c', ctx=Store()), - iter=Name(id='line', ctx=Load()), + iter=Name(id='line'), is_async=0)])) >>> print(ast.dump(ast.parse('(n**2 for n in it if n>5 if n<10)', mode='eval'), @@ -816,22 +808,22 @@ Comprehensions Expression( body=GeneratorExp( elt=BinOp( - left=Name(id='n', ctx=Load()), + left=Name(id='n'), op=Pow(), right=Constant(value=2)), generators=[ comprehension( target=Name(id='n', ctx=Store()), - iter=Name(id='it', ctx=Load()), + iter=Name(id='it'), ifs=[ Compare( - left=Name(id='n', ctx=Load()), + left=Name(id='n'), ops=[ Gt()], comparators=[ Constant(value=5)]), Compare( - left=Name(id='n', ctx=Load()), + left=Name(id='n'), ops=[ Lt()], comparators=[ @@ -842,11 +834,11 @@ Comprehensions ... indent=4)) # Async comprehension Expression( body=ListComp( - elt=Name(id='i', ctx=Load()), + elt=Name(id='i'), generators=[ comprehension( target=Name(id='i', ctx=Store()), - iter=Name(id='soc', ctx=Load()), + iter=Name(id='soc'), is_async=1)])) @@ -888,7 +880,7 @@ Statements Name(id='a', ctx=Store()), Name(id='b', ctx=Store())], ctx=Store())], - value=Name(id='c', ctx=Load()))]) + value=Name(id='c'))]) .. class:: AnnAssign(target, annotation, value, simple) @@ -911,7 +903,7 @@ Statements body=[ AnnAssign( target=Name(id='c', ctx=Store()), - annotation=Name(id='int', ctx=Load()), + annotation=Name(id='int'), simple=1)]) >>> print(ast.dump(ast.parse('(a): int = 1'), indent=4)) # Annotation with parenthesis @@ -919,7 +911,7 @@ Statements body=[ AnnAssign( target=Name(id='a', ctx=Store()), - annotation=Name(id='int', ctx=Load()), + annotation=Name(id='int'), value=Constant(value=1), simple=0)]) @@ -928,10 +920,10 @@ Statements body=[ AnnAssign( target=Attribute( - value=Name(id='a', ctx=Load()), + value=Name(id='a'), attr='b', ctx=Store()), - annotation=Name(id='int', ctx=Load()), + annotation=Name(id='int'), simple=0)]) >>> print(ast.dump(ast.parse('a[1]: int'), indent=4)) # Subscript annotation @@ -939,10 +931,10 @@ Statements body=[ AnnAssign( target=Subscript( - value=Name(id='a', ctx=Load()), + value=Name(id='a'), slice=Constant(value=1), ctx=Store()), - annotation=Name(id='int', ctx=Load()), + annotation=Name(id='int'), simple=0)]) @@ -979,8 +971,8 @@ Statements Module( body=[ Raise( - exc=Name(id='x', ctx=Load()), - cause=Name(id='y', ctx=Load()))]) + exc=Name(id='x'), + cause=Name(id='y'))]) .. class:: Assert(test, msg) @@ -994,8 +986,8 @@ Statements Module( body=[ Assert( - test=Name(id='x', ctx=Load()), - msg=Name(id='y', ctx=Load()))]) + test=Name(id='x'), + msg=Name(id='y'))]) .. class:: Delete(targets) @@ -1041,7 +1033,7 @@ Statements body=[ TypeAlias( name=Name(id='Alias', ctx=Store()), - value=Name(id='int', ctx=Load()))]) + value=Name(id='int'))]) .. versionadded:: 3.12 @@ -1134,13 +1126,13 @@ Control flow Module( body=[ If( - test=Name(id='x', ctx=Load()), + test=Name(id='x'), body=[ Expr( value=Constant(value=Ellipsis))], orelse=[ If( - test=Name(id='y', ctx=Load()), + test=Name(id='y'), body=[ Expr( value=Constant(value=Ellipsis))], @@ -1174,7 +1166,7 @@ Control flow body=[ For( target=Name(id='x', ctx=Store()), - iter=Name(id='y', ctx=Load()), + iter=Name(id='y'), body=[ Expr( value=Constant(value=Ellipsis))], @@ -1199,7 +1191,7 @@ Control flow Module( body=[ While( - test=Name(id='x', ctx=Load()), + test=Name(id='x'), body=[ Expr( value=Constant(value=Ellipsis))], @@ -1227,11 +1219,11 @@ Control flow body=[ For( target=Name(id='a', ctx=Store()), - iter=Name(id='b', ctx=Load()), + iter=Name(id='b'), body=[ If( test=Compare( - left=Name(id='a', ctx=Load()), + left=Name(id='a'), ops=[ Gt()], comparators=[ @@ -1269,12 +1261,12 @@ Control flow value=Constant(value=Ellipsis))], handlers=[ ExceptHandler( - type=Name(id='Exception', ctx=Load()), + type=Name(id='Exception'), body=[ Expr( value=Constant(value=Ellipsis))]), ExceptHandler( - type=Name(id='OtherException', ctx=Load()), + type=Name(id='OtherException'), name='e', body=[ Expr( @@ -1309,7 +1301,7 @@ Control flow value=Constant(value=Ellipsis))], handlers=[ ExceptHandler( - type=Name(id='Exception', ctx=Load()), + type=Name(id='Exception'), body=[ Expr( value=Constant(value=Ellipsis))])])]) @@ -1337,12 +1329,12 @@ Control flow body=[ Expr( value=BinOp( - left=Name(id='a', ctx=Load()), + left=Name(id='a'), op=Add(), right=Constant(value=1)))], handlers=[ ExceptHandler( - type=Name(id='TypeError', ctx=Load()), + type=Name(id='TypeError'), body=[ Pass()])])]) @@ -1375,18 +1367,18 @@ Control flow With( items=[ withitem( - context_expr=Name(id='a', ctx=Load()), + context_expr=Name(id='a'), optional_vars=Name(id='b', ctx=Store())), withitem( - context_expr=Name(id='c', ctx=Load()), + context_expr=Name(id='c'), optional_vars=Name(id='d', ctx=Store()))], body=[ Expr( value=Call( - func=Name(id='something', ctx=Load()), + func=Name(id='something'), args=[ - Name(id='b', ctx=Load()), - Name(id='d', ctx=Load())]))])]) + Name(id='b'), + Name(id='d')]))])]) Pattern matching @@ -1426,14 +1418,14 @@ Pattern matching Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchSequence( patterns=[ MatchAs(name='x')]), guard=Compare( - left=Name(id='x', ctx=Load()), + left=Name(id='x'), ops=[ Gt()], comparators=[ @@ -1443,7 +1435,7 @@ Pattern matching value=Constant(value=Ellipsis))]), match_case( pattern=MatchClass( - cls=Name(id='tuple', ctx=Load())), + cls=Name(id='tuple')), body=[ Expr( value=Constant(value=Ellipsis))])])]) @@ -1467,7 +1459,7 @@ Pattern matching Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchValue( @@ -1494,7 +1486,7 @@ Pattern matching Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchSingleton(value=None), @@ -1521,7 +1513,7 @@ Pattern matching Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchSequence( @@ -1554,7 +1546,7 @@ Pattern matching Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchSequence( @@ -1603,7 +1595,7 @@ Pattern matching Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchMapping( @@ -1653,11 +1645,11 @@ Pattern matching Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchClass( - cls=Name(id='Point2D', ctx=Load()), + cls=Name(id='Point2D'), patterns=[ MatchValue( value=Constant(value=0)), @@ -1668,7 +1660,7 @@ Pattern matching value=Constant(value=Ellipsis))]), match_case( pattern=MatchClass( - cls=Name(id='Point3D', ctx=Load()), + cls=Name(id='Point3D'), kwd_attrs=[ 'x', 'y', @@ -1709,7 +1701,7 @@ Pattern matching Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchAs( @@ -1746,7 +1738,7 @@ Pattern matching Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchOr( @@ -1786,7 +1778,7 @@ Type annotations body=[ AnnAssign( target=Name(id='x', ctx=Store()), - annotation=Name(id='bool', ctx=Load()), + annotation=Name(id='bool'), value=Constant(value=1), simple=1)], type_ignores=[ @@ -1824,12 +1816,11 @@ aliases. type_params=[ TypeVar( name='T', - bound=Name(id='int', ctx=Load()), - default_value=Name(id='bool', ctx=Load()))], + bound=Name(id='int'), + default_value=Name(id='bool'))], value=Subscript( - value=Name(id='list', ctx=Load()), - slice=Name(id='T', ctx=Load()), - ctx=Load()))]) + value=Name(id='list'), + slice=Name(id='T')))]) .. versionadded:: 3.12 @@ -1854,17 +1845,14 @@ aliases. name='P', default_value=List( elts=[ - Name(id='int', ctx=Load()), - Name(id='str', ctx=Load())], - ctx=Load()))], + Name(id='int'), + Name(id='str')]))], value=Subscript( - value=Name(id='Callable', ctx=Load()), + value=Name(id='Callable'), slice=Tuple( elts=[ - Name(id='P', ctx=Load()), - Name(id='int', ctx=Load())], - ctx=Load()), - ctx=Load()))]) + Name(id='P'), + Name(id='int')])))]) .. versionadded:: 3.12 @@ -1885,18 +1873,13 @@ aliases. TypeAlias( name=Name(id='Alias', ctx=Store()), type_params=[ - TypeVarTuple( - name='Ts', - default_value=Tuple(ctx=Load()))], + TypeVarTuple(name='Ts', default_value=Tuple())], value=Subscript( - value=Name(id='tuple', ctx=Load()), + value=Name(id='tuple'), slice=Tuple( elts=[ Starred( - value=Name(id='Ts', ctx=Load()), - ctx=Load())], - ctx=Load()), - ctx=Load()))]) + value=Name(id='Ts'))])))]) .. versionadded:: 3.12 @@ -2001,8 +1984,8 @@ Function and class definitions body=[ Pass()], decorator_list=[ - Name(id='decorator1', ctx=Load()), - Name(id='decorator2', ctx=Load())], + Name(id='decorator1'), + Name(id='decorator2')], returns=Constant(value='return annotation'))]) @@ -2032,14 +2015,14 @@ Function and class definitions body=[ Expr( value=Yield( - value=Name(id='x', ctx=Load())))]) + value=Name(id='x')))]) >>> print(ast.dump(ast.parse('yield from x'), indent=4)) Module( body=[ Expr( value=YieldFrom( - value=Name(id='x', ctx=Load())))]) + value=Name(id='x')))]) .. class:: Global(names) @@ -2094,17 +2077,17 @@ Function and class definitions ClassDef( name='Foo', bases=[ - Name(id='base1', ctx=Load()), - Name(id='base2', ctx=Load())], + Name(id='base1'), + Name(id='base2')], keywords=[ keyword( arg='metaclass', - value=Name(id='meta', ctx=Load()))], + value=Name(id='meta'))], body=[ Pass()], decorator_list=[ - Name(id='decorator1', ctx=Load()), - Name(id='decorator2', ctx=Load())])]) + Name(id='decorator1'), + Name(id='decorator2')])]) .. versionchanged:: 3.12 Added ``type_params``. @@ -2141,7 +2124,7 @@ Async and await Expr( value=Await( value=Call( - func=Name(id='other_func', ctx=Load()))))])]) + func=Name(id='other_func'))))])]) .. class:: AsyncFor(target, iter, body, orelse, type_comment) @@ -2402,7 +2385,7 @@ and classes for traversing abstract syntax trees: def visit_Name(self, node): return Subscript( - value=Name(id='data', ctx=Load()), + value=Name(id='data'), slice=Constant(value=node.id), ctx=node.ctx ) @@ -2445,42 +2428,35 @@ and classes for traversing abstract syntax trees: indents that many spaces per level. If *indent* is a string (such as ``"\t"``), that string is used to indent each level. - If *show_empty* is false (the default), optional empty lists will be - omitted from the output. + If *show_empty* is false (the default), optional empty lists and + ``Load()`` values will be omitted from the output. Optional ``None`` values are always omitted. + .. doctest:: + + >>> tree = ast.parse('print(None)', '?', 'eval') + >>> print(ast.dump(tree, indent=4)) + Expression( + body=Call( + func=Name(id='print'), + args=[ + Constant(value=None)])) + >>> print(ast.dump(tree, indent=4, show_empty=True)) + Expression( + body=Call( + func=Name(id='print', ctx=Load()), + args=[ + Constant(value=None)], + keywords=[])) + .. versionchanged:: 3.9 Added the *indent* option. .. versionchanged:: 3.13 Added the *show_empty* option. - .. doctest:: - - >>> print(ast.dump(ast.parse("""\ - ... async def f(): - ... await other_func() - ... """), indent=4, show_empty=True)) - Module( - body=[ - AsyncFunctionDef( - name='f', - args=arguments( - posonlyargs=[], - args=[], - kwonlyargs=[], - kw_defaults=[], - defaults=[]), - body=[ - Expr( - value=Await( - value=Call( - func=Name(id='other_func', ctx=Load()), - args=[], - keywords=[])))], - decorator_list=[], - type_params=[])], - type_ignores=[]) + .. versionchanged:: next + Omit optional ``Load()`` values by default. .. _ast-compiler-flags: diff --git a/Lib/ast.py b/Lib/ast.py index 2f11683ecf7c68..6d3daf64f5c6d7 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -154,6 +154,12 @@ def _format(node, level=0): if not keywords: args_buffer.append(repr(value)) continue + elif isinstance(value, Load): + field_type = cls._field_types.get(name, object) + if field_type is expr_context: + if not keywords: + args_buffer.append(repr(value)) + continue if not keywords: args.extend(args_buffer) args_buffer = [] diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 59263012bc1440..cc46529c0ef105 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -1372,17 +1372,17 @@ def test_parse_in_error(self): def test_dump(self): node = ast.parse('spam(eggs, "and cheese")') self.assertEqual(ast.dump(node), - "Module(body=[Expr(value=Call(func=Name(id='spam', ctx=Load()), " - "args=[Name(id='eggs', ctx=Load()), Constant(value='and cheese')]))])" + "Module(body=[Expr(value=Call(func=Name(id='spam'), " + "args=[Name(id='eggs'), Constant(value='and cheese')]))])" ) self.assertEqual(ast.dump(node, annotate_fields=False), - "Module([Expr(Call(Name('spam', Load()), [Name('eggs', Load()), " + "Module([Expr(Call(Name('spam'), [Name('eggs'), " "Constant('and cheese')]))])" ) self.assertEqual(ast.dump(node, include_attributes=True), - "Module(body=[Expr(value=Call(func=Name(id='spam', ctx=Load(), " + "Module(body=[Expr(value=Call(func=Name(id='spam', " "lineno=1, col_offset=0, end_lineno=1, end_col_offset=4), " - "args=[Name(id='eggs', ctx=Load(), lineno=1, col_offset=5, " + "args=[Name(id='eggs', lineno=1, col_offset=5, " "end_lineno=1, end_col_offset=9), Constant(value='and cheese', " "lineno=1, col_offset=11, end_lineno=1, end_col_offset=23)], " "lineno=1, col_offset=0, end_lineno=1, end_col_offset=24), " @@ -1396,18 +1396,18 @@ def test_dump_indent(self): body=[ Expr( value=Call( - func=Name(id='spam', ctx=Load()), + func=Name(id='spam'), args=[ - Name(id='eggs', ctx=Load()), + Name(id='eggs'), Constant(value='and cheese')]))])""") self.assertEqual(ast.dump(node, annotate_fields=False, indent='\t'), """\ Module( \t[ \t\tExpr( \t\t\tCall( -\t\t\t\tName('spam', Load()), +\t\t\t\tName('spam'), \t\t\t\t[ -\t\t\t\t\tName('eggs', Load()), +\t\t\t\t\tName('eggs'), \t\t\t\t\tConstant('and cheese')]))])""") self.assertEqual(ast.dump(node, include_attributes=True, indent=3), """\ Module( @@ -1416,7 +1416,6 @@ def test_dump_indent(self): value=Call( func=Name( id='spam', - ctx=Load(), lineno=1, col_offset=0, end_lineno=1, @@ -1424,7 +1423,6 @@ def test_dump_indent(self): args=[ Name( id='eggs', - ctx=Load(), lineno=1, col_offset=5, end_lineno=1, @@ -1454,23 +1452,23 @@ def test_dump_incomplete(self): ) node = ast.Raise(exc=ast.Name(id='e', ctx=ast.Load()), lineno=3, col_offset=4) self.assertEqual(ast.dump(node), - "Raise(exc=Name(id='e', ctx=Load()))" + "Raise(exc=Name(id='e'))" ) self.assertEqual(ast.dump(node, annotate_fields=False), - "Raise(Name('e', Load()))" + "Raise(Name('e'))" ) self.assertEqual(ast.dump(node, include_attributes=True), - "Raise(exc=Name(id='e', ctx=Load()), lineno=3, col_offset=4)" + "Raise(exc=Name(id='e'), lineno=3, col_offset=4)" ) self.assertEqual(ast.dump(node, annotate_fields=False, include_attributes=True), - "Raise(Name('e', Load()), lineno=3, col_offset=4)" + "Raise(Name('e'), lineno=3, col_offset=4)" ) node = ast.Raise(cause=ast.Name(id='e', ctx=ast.Load())) self.assertEqual(ast.dump(node), - "Raise(cause=Name(id='e', ctx=Load()))" + "Raise(cause=Name(id='e'))" ) self.assertEqual(ast.dump(node, annotate_fields=False), - "Raise(cause=Name('e', Load()))" + "Raise(cause=Name('e'))" ) # Arguments: node = ast.arguments(args=[ast.arg("x")]) @@ -1502,10 +1500,10 @@ def test_dump_incomplete(self): [ast.Name('dataclass', ctx=ast.Load())], ) self.assertEqual(ast.dump(node), - "ClassDef(name='T', keywords=[keyword(arg='a', value=Constant(value=None))], decorator_list=[Name(id='dataclass', ctx=Load())])", + "ClassDef(name='T', keywords=[keyword(arg='a', value=Constant(value=None))], decorator_list=[Name(id='dataclass')])", ) self.assertEqual(ast.dump(node, annotate_fields=False), - "ClassDef('T', [], [keyword('a', Constant(None))], [], [Name('dataclass', Load())])", + "ClassDef('T', [], [keyword('a', Constant(None))], [], [Name('dataclass')])", ) def test_dump_show_empty(self): @@ -1533,7 +1531,7 @@ def check_text(code, empty, full, **kwargs): check_node( # Corner case: there are no real `Name` instances with `id=''`: ast.Name(id='', ctx=ast.Load()), - empty="Name(id='', ctx=Load())", + empty="Name(id='')", full="Name(id='', ctx=Load())", ) @@ -1581,26 +1579,26 @@ def check_text(code, empty, full, **kwargs): check_text( "def a(b: int = 0, *, c): ...", - empty="Module(body=[FunctionDef(name='a', args=arguments(args=[arg(arg='b', annotation=Name(id='int', ctx=Load()))], kwonlyargs=[arg(arg='c')], kw_defaults=[None], defaults=[Constant(value=0)]), body=[Expr(value=Constant(value=Ellipsis))])])", + empty="Module(body=[FunctionDef(name='a', args=arguments(args=[arg(arg='b', annotation=Name(id='int'))], kwonlyargs=[arg(arg='c')], kw_defaults=[None], defaults=[Constant(value=0)]), body=[Expr(value=Constant(value=Ellipsis))])])", full="Module(body=[FunctionDef(name='a', args=arguments(posonlyargs=[], args=[arg(arg='b', annotation=Name(id='int', ctx=Load()))], kwonlyargs=[arg(arg='c')], kw_defaults=[None], defaults=[Constant(value=0)]), body=[Expr(value=Constant(value=Ellipsis))], decorator_list=[], type_params=[])], type_ignores=[])", ) check_text( "def a(b: int = 0, *, c): ...", - empty="Module(body=[FunctionDef(name='a', args=arguments(args=[arg(arg='b', annotation=Name(id='int', ctx=Load(), lineno=1, col_offset=9, end_lineno=1, end_col_offset=12), lineno=1, col_offset=6, end_lineno=1, end_col_offset=12)], kwonlyargs=[arg(arg='c', lineno=1, col_offset=21, end_lineno=1, end_col_offset=22)], kw_defaults=[None], defaults=[Constant(value=0, lineno=1, col_offset=15, end_lineno=1, end_col_offset=16)]), body=[Expr(value=Constant(value=Ellipsis, lineno=1, col_offset=25, end_lineno=1, end_col_offset=28), lineno=1, col_offset=25, end_lineno=1, end_col_offset=28)], lineno=1, col_offset=0, end_lineno=1, end_col_offset=28)])", + empty="Module(body=[FunctionDef(name='a', args=arguments(args=[arg(arg='b', annotation=Name(id='int', lineno=1, col_offset=9, end_lineno=1, end_col_offset=12), lineno=1, col_offset=6, end_lineno=1, end_col_offset=12)], kwonlyargs=[arg(arg='c', lineno=1, col_offset=21, end_lineno=1, end_col_offset=22)], kw_defaults=[None], defaults=[Constant(value=0, lineno=1, col_offset=15, end_lineno=1, end_col_offset=16)]), body=[Expr(value=Constant(value=Ellipsis, lineno=1, col_offset=25, end_lineno=1, end_col_offset=28), lineno=1, col_offset=25, end_lineno=1, end_col_offset=28)], lineno=1, col_offset=0, end_lineno=1, end_col_offset=28)])", full="Module(body=[FunctionDef(name='a', args=arguments(posonlyargs=[], args=[arg(arg='b', annotation=Name(id='int', ctx=Load(), lineno=1, col_offset=9, end_lineno=1, end_col_offset=12), lineno=1, col_offset=6, end_lineno=1, end_col_offset=12)], kwonlyargs=[arg(arg='c', lineno=1, col_offset=21, end_lineno=1, end_col_offset=22)], kw_defaults=[None], defaults=[Constant(value=0, lineno=1, col_offset=15, end_lineno=1, end_col_offset=16)]), body=[Expr(value=Constant(value=Ellipsis, lineno=1, col_offset=25, end_lineno=1, end_col_offset=28), lineno=1, col_offset=25, end_lineno=1, end_col_offset=28)], decorator_list=[], type_params=[], lineno=1, col_offset=0, end_lineno=1, end_col_offset=28)], type_ignores=[])", include_attributes=True, ) check_text( 'spam(eggs, "and cheese")', - empty="Module(body=[Expr(value=Call(func=Name(id='spam', ctx=Load()), args=[Name(id='eggs', ctx=Load()), Constant(value='and cheese')]))])", + empty="Module(body=[Expr(value=Call(func=Name(id='spam'), args=[Name(id='eggs'), Constant(value='and cheese')]))])", full="Module(body=[Expr(value=Call(func=Name(id='spam', ctx=Load()), args=[Name(id='eggs', ctx=Load()), Constant(value='and cheese')], keywords=[]))], type_ignores=[])", ) check_text( 'spam(eggs, text="and cheese")', - empty="Module(body=[Expr(value=Call(func=Name(id='spam', ctx=Load()), args=[Name(id='eggs', ctx=Load())], keywords=[keyword(arg='text', value=Constant(value='and cheese'))]))])", + empty="Module(body=[Expr(value=Call(func=Name(id='spam'), args=[Name(id='eggs')], keywords=[keyword(arg='text', value=Constant(value='and cheese'))]))])", full="Module(body=[Expr(value=Call(func=Name(id='spam', ctx=Load()), args=[Name(id='eggs', ctx=Load())], keywords=[keyword(arg='text', value=Constant(value='and cheese'))]))], type_ignores=[])", ) @@ -1634,12 +1632,12 @@ def test_fix_missing_locations(self): self.assertEqual(src, ast.fix_missing_locations(src)) self.maxDiff = None self.assertEqual(ast.dump(src, include_attributes=True), - "Module(body=[Expr(value=Call(func=Name(id='write', ctx=Load(), " + "Module(body=[Expr(value=Call(func=Name(id='write', " "lineno=1, col_offset=0, end_lineno=1, end_col_offset=5), " "args=[Constant(value='spam', lineno=1, col_offset=6, end_lineno=1, " "end_col_offset=12)], lineno=1, col_offset=0, end_lineno=1, " "end_col_offset=13), lineno=1, col_offset=0, end_lineno=1, " - "end_col_offset=13), Expr(value=Call(func=Name(id='spam', ctx=Load(), " + "end_col_offset=13), Expr(value=Call(func=Name(id='spam', " "lineno=1, col_offset=0, end_lineno=1, end_col_offset=0), " "args=[Constant(value='eggs', lineno=1, col_offset=0, end_lineno=1, " "end_col_offset=0)], lineno=1, col_offset=0, end_lineno=1, " @@ -3359,7 +3357,7 @@ def test_exec_mode_flag(self): body=[ AnnAssign( target=Name(id='x', ctx=Store()), - annotation=Name(id='bool', ctx=Load()), + annotation=Name(id='bool'), value=Constant(value=1), simple=1)], type_ignores=[ @@ -3387,7 +3385,7 @@ def test_eval_mode_flag(self): expect = ''' Expression( body=Call( - func=Name(id='print', ctx=Load()), + func=Name(id='print'), args=[ Constant(value=1), Constant(value=2), @@ -3403,12 +3401,11 @@ def test_func_type_mode_flag(self): expect = ''' FunctionType( argtypes=[ - Name(id='int', ctx=Load()), - Name(id='str', ctx=Load())], + Name(id='int'), + Name(id='str')], returns=Subscript( - value=Name(id='list', ctx=Load()), - slice=Name(id='int', ctx=Load()), - ctx=Load())) + value=Name(id='list'), + slice=Name(id='int'))) ''' for flag in ('-m=func_type', '--mode=func_type'): with self.subTest(flag=flag): @@ -3422,7 +3419,7 @@ def test_no_type_comments_flag(self): body=[ AnnAssign( target=Name(id='x', ctx=Store()), - annotation=Name(id='bool', ctx=Load()), + annotation=Name(id='bool'), value=Constant(value=1), simple=1)]) ''' @@ -3467,7 +3464,7 @@ def test_feature_version_flag(self): Module( body=[ Match( - subject=Name(id='x', ctx=Load()), + subject=Name(id='x'), cases=[ match_case( pattern=MatchValue( @@ -3490,7 +3487,7 @@ def test_no_optimize_flag(self): Module( body=[ Match( - subject=Name(id='a', ctx=Load()), + subject=Name(id='a'), cases=[ match_case( pattern=MatchValue( @@ -3516,7 +3513,7 @@ def test_optimize_flag(self): Module( body=[ Match( - subject=Name(id='a', ctx=Load()), + subject=Name(id='a'), cases=[ match_case( pattern=MatchValue( diff --git a/Misc/NEWS.d/next/Library/2025-05-30-18-13-48.gh-issue-134718.5FEspx.rst b/Misc/NEWS.d/next/Library/2025-05-30-18-13-48.gh-issue-134718.5FEspx.rst new file mode 100644 index 00000000000000..06c1d5583be0bb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-30-18-13-48.gh-issue-134718.5FEspx.rst @@ -0,0 +1 @@ +By default, omit optional ``Load()`` values in :func:`ast.dump`. From c6e63d9d351f6d952000ec3bf84b3a7607989f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 31 May 2025 09:37:47 +0200 Subject: [PATCH 428/989] gh-134696: align OpenSSL and HACL*-based hash functions constructors AC signatures (#134713) OpenSSL and HACL*-based hash functions constructors now support both `data` and `string` parameters. Previously these constructor functions inconsistently supported sometimes `data` and sometimes `string`, while the documentation expected `data` to be given in all cases. --- Lib/hashlib.py | 12 +- Lib/test/test_hashlib.py | 62 ++- ...-05-26-14-04-39.gh-issue-134696.P04xUa.rst | 5 + Modules/_hashopenssl.c | 153 +++--- Modules/blake2module.c | 30 +- Modules/clinic/_hashopenssl.c.h | 441 +++++++++++------- Modules/clinic/blake2module.c.h | 98 ++-- Modules/clinic/md5module.c.h | 34 +- Modules/clinic/sha1module.c.h | 34 +- Modules/clinic/sha2module.c.h | 130 ++++-- Modules/clinic/sha3module.c.h | 128 ++++- Modules/hashlib.h | 29 ++ Modules/md5module.c | 13 +- Modules/sha1module.c | 12 +- Modules/sha2module.c | 49 +- Modules/sha3module.c | 21 +- 16 files changed, 830 insertions(+), 421 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-26-14-04-39.gh-issue-134696.P04xUa.rst diff --git a/Lib/hashlib.py b/Lib/hashlib.py index abacac22ea0106..0e9bd98aa1fc31 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -141,29 +141,29 @@ def __get_openssl_constructor(name): return __get_builtin_constructor(name) -def __py_new(name, data=b'', **kwargs): +def __py_new(name, *args, **kwargs): """new(name, data=b'', **kwargs) - Return a new hashing object using the named algorithm; optionally initialized with data (which must be a bytes-like object). """ - return __get_builtin_constructor(name)(data, **kwargs) + return __get_builtin_constructor(name)(*args, **kwargs) -def __hash_new(name, data=b'', **kwargs): +def __hash_new(name, *args, **kwargs): """new(name, data=b'') - Return a new hashing object using the named algorithm; optionally initialized with data (which must be a bytes-like object). """ if name in __block_openssl_constructor: # Prefer our builtin blake2 implementation. - return __get_builtin_constructor(name)(data, **kwargs) + return __get_builtin_constructor(name)(*args, **kwargs) try: - return _hashlib.new(name, data, **kwargs) + return _hashlib.new(name, *args, **kwargs) except ValueError: # If the _hashlib module (OpenSSL) doesn't support the named # hash, try using our builtin implementations. # This allows for SHA224/256 and SHA384/512 support even though # the OpenSSL library prior to 0.9.8 doesn't provide them. - return __get_builtin_constructor(name)(data) + return __get_builtin_constructor(name)(*args, **kwargs) try: diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 161c7652d7ab11..51b82fe3b516b5 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -12,6 +12,7 @@ import itertools import logging import os +import re import sys import sysconfig import tempfile @@ -140,11 +141,10 @@ def __init__(self, *args, **kwargs): # of hashlib.new given the algorithm name. for algorithm, constructors in self.constructors_to_test.items(): constructors.add(getattr(hashlib, algorithm)) - def _test_algorithm_via_hashlib_new(data=None, _alg=algorithm, **kwargs): - if data is None: - return hashlib.new(_alg, **kwargs) - return hashlib.new(_alg, data, **kwargs) - constructors.add(_test_algorithm_via_hashlib_new) + def c(*args, __algorithm_name=algorithm, **kwargs): + return hashlib.new(__algorithm_name, *args, **kwargs) + c.__name__ = f'do_test_algorithm_via_hashlib_new_{algorithm}' + constructors.add(c) _hashlib = self._conditional_import_module('_hashlib') self._hashlib = _hashlib @@ -249,6 +249,56 @@ def test_usedforsecurity_false(self): self._hashlib.new("md5", usedforsecurity=False) self._hashlib.openssl_md5(usedforsecurity=False) + def test_clinic_signature(self): + for constructor in self.hash_constructors: + with self.subTest(constructor.__name__): + constructor(b'') + constructor(data=b'') + constructor(string=b'') # should be deprecated in the future + + def test_clinic_signature_errors(self): + nomsg = b'' + mymsg = b'msg' + conflicting_call = re.escape( + "'data' and 'string' are mutually exclusive " + "and support for 'string' keyword parameter " + "is slated for removal in a future version." + ) + duplicated_param = re.escape("given by name ('data') and position") + unexpected_param = re.escape("got an unexpected keyword argument '_'") + for args, kwds, errmsg in [ + # Reject duplicated arguments before unknown keyword arguments. + ((nomsg,), dict(data=nomsg, _=nomsg), duplicated_param), + ((mymsg,), dict(data=nomsg, _=nomsg), duplicated_param), + # Reject duplicated arguments before conflicting ones. + *itertools.product( + [[nomsg], [mymsg]], + [dict(data=nomsg), dict(data=nomsg, string=nomsg)], + [duplicated_param] + ), + # Reject unknown keyword arguments before conflicting ones. + *itertools.product( + [()], + [ + dict(_=None), + dict(data=nomsg, _=None), + dict(string=nomsg, _=None), + dict(string=nomsg, data=nomsg, _=None), + ], + [unexpected_param] + ), + ((nomsg,), dict(_=None), unexpected_param), + ((mymsg,), dict(_=None), unexpected_param), + # Reject conflicting arguments. + [(nomsg,), dict(string=nomsg), conflicting_call], + [(mymsg,), dict(string=nomsg), conflicting_call], + [(), dict(data=nomsg, string=nomsg), conflicting_call], + ]: + for constructor in self.hash_constructors: + with self.subTest(constructor.__name__, args=args, kwds=kwds): + with self.assertRaisesRegex(TypeError, errmsg): + constructor(*args, **kwds) + def test_unknown_hash(self): self.assertRaises(ValueError, hashlib.new, 'spam spam spam spam spam') self.assertRaises(TypeError, hashlib.new, 1) @@ -718,8 +768,6 @@ def check_blake2(self, constructor, salt_size, person_size, key_size, self.assertRaises(ValueError, constructor, node_offset=-1) self.assertRaises(OverflowError, constructor, node_offset=max_offset+1) - self.assertRaises(TypeError, constructor, data=b'') - self.assertRaises(TypeError, constructor, string=b'') self.assertRaises(TypeError, constructor, '') constructor( diff --git a/Misc/NEWS.d/next/Library/2025-05-26-14-04-39.gh-issue-134696.P04xUa.rst b/Misc/NEWS.d/next/Library/2025-05-26-14-04-39.gh-issue-134696.P04xUa.rst new file mode 100644 index 00000000000000..282eb088b89503 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-26-14-04-39.gh-issue-134696.P04xUa.rst @@ -0,0 +1,5 @@ +Built-in HACL* and OpenSSL implementations of hash function constructors +now correctly accept the same *documented* named arguments. For instance, +:func:`~hashlib.md5` could be previously invoked as ``md5(data=data)`` +or ``md5(string=string)`` depending on the underlying implementation +but these calls were not compatible. Patch by Bénédikt Tran. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index dab0bb9b67fa00..331275076d7937 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -1039,6 +1039,14 @@ _hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, return (PyObject *)self; } +#define CALL_HASHLIB_NEW(MODULE, NAME, DATA, STRING, USEDFORSECURITY) \ + do { \ + PyObject *data_obj; \ + if (_Py_hashlib_data_argument(&data_obj, DATA, STRING) < 0) { \ + return NULL; \ + } \ + return _hashlib_HASH(MODULE, NAME, data_obj, USEDFORSECURITY); \ + } while (0) /* The module-level function: new() */ @@ -1046,9 +1054,10 @@ _hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, _hashlib.new as _hashlib_HASH_new name: str - string as data_obj: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new hash object using the named algorithm. @@ -1059,131 +1068,137 @@ The MD5 and SHA1 algorithms are always supported. [clinic start generated code]*/ static PyObject * -_hashlib_HASH_new_impl(PyObject *module, const char *name, - PyObject *data_obj, int usedforsecurity) -/*[clinic end generated code: output=30c6e7b9a5a4dce3 input=28848db5ccd0a9b5]*/ +_hashlib_HASH_new_impl(PyObject *module, const char *name, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=b905aaf9840c1bbd input=c34af6c6e696d44e]*/ { - return _hashlib_HASH(module, name, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, name, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_md5 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a md5 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_md5_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=87b0186440a44f8c input=990e36d5e689b16e]*/ +_hashlib_openssl_md5_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=ca8cf184d90f7432 input=e7c0adbd6a867db1]*/ { - return _hashlib_HASH(module, Py_hash_md5, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_md5, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha1 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha1 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=6813024cf690670d input=948f2f4b6deabc10]*/ +_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=1736fb7b310d64be input=f7e5bb1711e952d8]*/ { - return _hashlib_HASH(module, Py_hash_sha1, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha1, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha224 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha224 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=a2dfe7cc4eb14ebb input=f9272821fadca505]*/ +_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=0d6ff57be5e5c140 input=3820fff7ed3a53b8]*/ { - return _hashlib_HASH(module, Py_hash_sha224, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha224, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha256 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha256 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=1f874a34870f0a68 input=549fad9d2930d4c5]*/ +_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=412ea7111555b6e7 input=9a2f115cf1f7e0eb]*/ { - return _hashlib_HASH(module, Py_hash_sha256, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha256, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha384 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha384 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=58529eff9ca457b2 input=48601a6e3bf14ad7]*/ +_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=2e0dc395b59ed726 input=1ea48f6f01e77cfb]*/ { - return _hashlib_HASH(module, Py_hash_sha384, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha384, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha512 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha512 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=2c744c9e4a40d5f6 input=c5c46a2a817aa98f]*/ +_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=4bdd760388dbfc0f input=3cf56903e07d1f5c]*/ { - return _hashlib_HASH(module, Py_hash_sha512, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha512, data, string, usedforsecurity); } @@ -1192,77 +1207,81 @@ _hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, /*[clinic input] _hashlib.openssl_sha3_224 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-224 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=144641c1d144b974 input=e3a01b2888916157]*/ +_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=6d8dc2a924f3ba35 input=7f14f16a9f6a3158]*/ { - return _hashlib_HASH(module, Py_hash_sha3_224, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_224, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha3_256 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-256 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=c61f1ab772d06668 input=e2908126c1b6deed]*/ +_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=9e520f537b3a4622 input=7987150939d5e352]*/ { - return _hashlib_HASH(module, Py_hash_sha3_256, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_256, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha3_384 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-384 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=f68e4846858cf0ee input=ec0edf5c792f8252]*/ +_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=d239ba0463fd6138 input=fc943401f67e3b81]*/ { - return _hashlib_HASH(module, Py_hash_sha3_384, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_384, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha3_512 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-512 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=2eede478c159354a input=64e2cc0c094d56f4]*/ +_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=17662f21038c2278 input=6601ddd2c6c1516d]*/ { - return _hashlib_HASH(module, Py_hash_sha3_512, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_512, data, string, usedforsecurity); } #endif /* PY_OPENSSL_HAS_SHA3 */ @@ -1270,42 +1289,46 @@ _hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, /*[clinic input] _hashlib.openssl_shake_128 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a shake-128 variable hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=bc49cdd8ada1fa97 input=6c9d67440eb33ec8]*/ +_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=4e6afed8d18980ad input=373c3f1c93d87b37]*/ { - return _hashlib_HASH(module, Py_hash_shake_128, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_shake_128, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_shake_256 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a shake-256 variable hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=358d213be8852df7 input=479cbe9fefd4a9f8]*/ +_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=62481bce4a77d16c input=101c139ea2ddfcbf]*/ { - return _hashlib_HASH(module, Py_hash_shake_256, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_shake_256, data, string, usedforsecurity); } #endif /* PY_OPENSSL_HAS_SHAKE */ +#undef CALL_HASHLIB_NEW + /*[clinic input] _hashlib.pbkdf2_hmac as pbkdf2_hmac diff --git a/Modules/blake2module.c b/Modules/blake2module.c index f9acc57f1b2fa3..07aa89f573f05f 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -655,8 +655,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, /*[clinic input] @classmethod _blake2.blake2b.__new__ as py_blake2b_new - data: object(c_default="NULL") = b'' - / + data as data_obj: object(c_default="NULL") = b'' * digest_size: int(c_default="HACL_HASH_BLAKE2B_OUT_BYTES") = _blake2.blake2b.MAX_DIGEST_SIZE key: Py_buffer(c_default="NULL", py_default="b''") = None @@ -670,26 +669,31 @@ _blake2.blake2b.__new__ as py_blake2b_new inner_size: int = 0 last_node: bool = False usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new BLAKE2b hash object. [clinic start generated code]*/ static PyObject * -py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2b_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity) -/*[clinic end generated code: output=32bfd8f043c6896f input=8fee2b7b11428b2d]*/ + int inner_size, int last_node, int usedforsecurity, + PyObject *string) +/*[clinic end generated code: output=de64bd850606b6a0 input=78cf60a2922d2f90]*/ { + PyObject *data; + if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { + return NULL; + } return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); } /*[clinic input] @classmethod _blake2.blake2s.__new__ as py_blake2s_new - data: object(c_default="NULL") = b'' - / + data as data_obj: object(c_default="NULL") = b'' * digest_size: int(c_default="HACL_HASH_BLAKE2S_OUT_BYTES") = _blake2.blake2s.MAX_DIGEST_SIZE key: Py_buffer(c_default="NULL", py_default="b''") = None @@ -703,18 +707,24 @@ _blake2.blake2s.__new__ as py_blake2s_new inner_size: int = 0 last_node: bool = False usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new BLAKE2s hash object. [clinic start generated code]*/ static PyObject * -py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity) -/*[clinic end generated code: output=556181f73905c686 input=8165a11980eac7f3]*/ + int inner_size, int last_node, int usedforsecurity, + PyObject *string) +/*[clinic end generated code: output=582a0c4295cc3a3c input=6843d6332eefd295]*/ { + PyObject *data; + if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { + return NULL; + } return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); } diff --git a/Modules/clinic/_hashopenssl.c.h b/Modules/clinic/_hashopenssl.c.h index b2f6b25a235e68..61ea10e2a48284 100644 --- a/Modules/clinic/_hashopenssl.c.h +++ b/Modules/clinic/_hashopenssl.c.h @@ -233,7 +233,7 @@ _hashlib_HASHXOF_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nar #endif /* defined(PY_OPENSSL_HAS_SHAKE) */ PyDoc_STRVAR(_hashlib_HASH_new__doc__, -"new($module, /, name, string=b\'\', *, usedforsecurity=True)\n" +"new($module, /, name, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new hash object using the named algorithm.\n" @@ -247,8 +247,8 @@ PyDoc_STRVAR(_hashlib_HASH_new__doc__, {"new", _PyCFunction_CAST(_hashlib_HASH_new), METH_FASTCALL|METH_KEYWORDS, _hashlib_HASH_new__doc__}, static PyObject * -_hashlib_HASH_new_impl(PyObject *module, const char *name, - PyObject *data_obj, int usedforsecurity); +_hashlib_HASH_new_impl(PyObject *module, const char *name, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_HASH_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -256,7 +256,7 @@ _hashlib_HASH_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -265,7 +265,7 @@ _hashlib_HASH_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(name), &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(name), &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -274,18 +274,19 @@ _hashlib_HASH_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"name", "string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"name", "data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "new", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; const char *name; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -309,7 +310,7 @@ _hashlib_HASH_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO goto skip_optional_pos; } if (args[1]) { - data_obj = args[1]; + data = args[1]; if (!--noptargs) { goto skip_optional_pos; } @@ -318,19 +319,25 @@ _hashlib_HASH_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[2]); - if (usedforsecurity < 0) { - goto exit; + if (args[2]) { + usedforsecurity = PyObject_IsTrue(args[2]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[3]; skip_optional_kwonly: - return_value = _hashlib_HASH_new_impl(module, name, data_obj, usedforsecurity); + return_value = _hashlib_HASH_new_impl(module, name, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_md5__doc__, -"openssl_md5($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_md5($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Returns a md5 hash object; optionally initialized with a string"); @@ -339,8 +346,8 @@ PyDoc_STRVAR(_hashlib_openssl_md5__doc__, {"openssl_md5", _PyCFunction_CAST(_hashlib_openssl_md5), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_md5__doc__}, static PyObject * -_hashlib_openssl_md5_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_md5_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -348,7 +355,7 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -357,7 +364,7 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -366,17 +373,18 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_md5", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -387,7 +395,7 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -396,19 +404,25 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_md5_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_md5_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha1__doc__, -"openssl_sha1($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha1($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Returns a sha1 hash object; optionally initialized with a string"); @@ -417,8 +431,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha1__doc__, {"openssl_sha1", _PyCFunction_CAST(_hashlib_openssl_sha1), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha1__doc__}, static PyObject * -_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -426,7 +440,7 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -435,7 +449,7 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -444,17 +458,18 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha1", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -465,7 +480,7 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -474,19 +489,26 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha1_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha1_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha224__doc__, -"openssl_sha224($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha224($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha224 hash object; optionally initialized with a string"); @@ -495,8 +517,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha224__doc__, {"openssl_sha224", _PyCFunction_CAST(_hashlib_openssl_sha224), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha224__doc__}, static PyObject * -_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -504,7 +526,7 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -513,7 +535,7 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -522,17 +544,18 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -543,7 +566,7 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -552,19 +575,26 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha224_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha224_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha256__doc__, -"openssl_sha256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha256($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha256 hash object; optionally initialized with a string"); @@ -573,8 +603,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha256__doc__, {"openssl_sha256", _PyCFunction_CAST(_hashlib_openssl_sha256), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha256__doc__}, static PyObject * -_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -582,7 +612,7 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -591,7 +621,7 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -600,17 +630,18 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -621,7 +652,7 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -630,19 +661,26 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha256_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha256_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha384__doc__, -"openssl_sha384($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha384($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha384 hash object; optionally initialized with a string"); @@ -651,8 +689,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha384__doc__, {"openssl_sha384", _PyCFunction_CAST(_hashlib_openssl_sha384), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha384__doc__}, static PyObject * -_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -660,7 +698,7 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -669,7 +707,7 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -678,17 +716,18 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha384", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -699,7 +738,7 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -708,19 +747,26 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha384_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha384_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha512__doc__, -"openssl_sha512($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha512($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha512 hash object; optionally initialized with a string"); @@ -729,8 +775,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha512__doc__, {"openssl_sha512", _PyCFunction_CAST(_hashlib_openssl_sha512), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha512__doc__}, static PyObject * -_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -738,7 +784,7 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -747,7 +793,7 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -756,17 +802,18 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha512", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -777,7 +824,7 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -786,12 +833,18 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha512_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha512_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -800,7 +853,8 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_224__doc__, -"openssl_sha3_224($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_224($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-224 hash object; optionally initialized with a string"); @@ -809,8 +863,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_224__doc__, {"openssl_sha3_224", _PyCFunction_CAST(_hashlib_openssl_sha3_224), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_224__doc__}, static PyObject * -_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -818,7 +872,7 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -827,7 +881,7 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -836,17 +890,18 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -857,7 +912,7 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -866,12 +921,18 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_224_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_224_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -882,7 +943,8 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_256__doc__, -"openssl_sha3_256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_256($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-256 hash object; optionally initialized with a string"); @@ -891,8 +953,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_256__doc__, {"openssl_sha3_256", _PyCFunction_CAST(_hashlib_openssl_sha3_256), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_256__doc__}, static PyObject * -_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -900,7 +962,7 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -909,7 +971,7 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -918,17 +980,18 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -939,7 +1002,7 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -948,12 +1011,18 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_256_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_256_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -964,7 +1033,8 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_384__doc__, -"openssl_sha3_384($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_384($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-384 hash object; optionally initialized with a string"); @@ -973,8 +1043,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_384__doc__, {"openssl_sha3_384", _PyCFunction_CAST(_hashlib_openssl_sha3_384), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_384__doc__}, static PyObject * -_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -982,7 +1052,7 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -991,7 +1061,7 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1000,17 +1070,18 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_384", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1021,7 +1092,7 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1030,12 +1101,18 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_384_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_384_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1046,7 +1123,8 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_512__doc__, -"openssl_sha3_512($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_512($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-512 hash object; optionally initialized with a string"); @@ -1055,8 +1133,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_512__doc__, {"openssl_sha3_512", _PyCFunction_CAST(_hashlib_openssl_sha3_512), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_512__doc__}, static PyObject * -_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -1064,7 +1142,7 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -1073,7 +1151,7 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1082,17 +1160,18 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_512", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1103,7 +1182,7 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1112,12 +1191,18 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_512_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_512_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1128,7 +1213,8 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHAKE) PyDoc_STRVAR(_hashlib_openssl_shake_128__doc__, -"openssl_shake_128($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_shake_128($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a shake-128 variable hash object; optionally initialized with a string"); @@ -1137,8 +1223,8 @@ PyDoc_STRVAR(_hashlib_openssl_shake_128__doc__, {"openssl_shake_128", _PyCFunction_CAST(_hashlib_openssl_shake_128), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_shake_128__doc__}, static PyObject * -_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -1146,7 +1232,7 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -1155,7 +1241,7 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1164,17 +1250,18 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_shake_128", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1185,7 +1272,7 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1194,12 +1281,18 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_shake_128_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_shake_128_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1210,7 +1303,8 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n #if defined(PY_OPENSSL_HAS_SHAKE) PyDoc_STRVAR(_hashlib_openssl_shake_256__doc__, -"openssl_shake_256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_shake_256($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a shake-256 variable hash object; optionally initialized with a string"); @@ -1219,8 +1313,8 @@ PyDoc_STRVAR(_hashlib_openssl_shake_256__doc__, {"openssl_shake_256", _PyCFunction_CAST(_hashlib_openssl_shake_256), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_shake_256__doc__}, static PyObject * -_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -1228,7 +1322,7 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -1237,7 +1331,7 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1246,17 +1340,18 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_shake_256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1267,7 +1362,7 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1276,12 +1371,18 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_shake_256_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_shake_256_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1883,4 +1984,4 @@ _hashlib_compare_digest(PyObject *module, PyObject *const *args, Py_ssize_t narg #ifndef _HASHLIB_SCRYPT_METHODDEF #define _HASHLIB_SCRYPT_METHODDEF #endif /* !defined(_HASHLIB_SCRYPT_METHODDEF) */ -/*[clinic end generated code: output=dc03b64435166a64 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=29f4aaf01714778e input=a9049054013a1b77]*/ diff --git a/Modules/clinic/blake2module.c.h b/Modules/clinic/blake2module.c.h index bb2e308574a50a..9e9cd56e569b24 100644 --- a/Modules/clinic/blake2module.c.h +++ b/Modules/clinic/blake2module.c.h @@ -10,20 +10,21 @@ preserve #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(py_blake2b_new__doc__, -"blake2b(data=b\'\', /, *, digest_size=_blake2.blake2b.MAX_DIGEST_SIZE,\n" +"blake2b(data=b\'\', *, digest_size=_blake2.blake2b.MAX_DIGEST_SIZE,\n" " key=b\'\', salt=b\'\', person=b\'\', fanout=1, depth=1, leaf_size=0,\n" " node_offset=0, node_depth=0, inner_size=0, last_node=False,\n" -" usedforsecurity=True)\n" +" usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new BLAKE2b hash object."); static PyObject * -py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2b_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity); + int inner_size, int last_node, int usedforsecurity, + PyObject *string); static PyObject * py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -31,7 +32,7 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 12 + #define NUM_KEYWORDS 14 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -40,7 +41,7 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -49,18 +50,18 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "blake2b", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[13]; + PyObject *argsbuf[14]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; + PyObject *data_obj = NULL; int digest_size = HACL_HASH_BLAKE2B_OUT_BYTES; Py_buffer key = {NULL, NULL}; Py_buffer salt = {NULL, NULL}; @@ -73,18 +74,23 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) int inner_size = 0; int last_node = 0; int usedforsecurity = 1; + PyObject *string = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (nargs < 1) { - goto skip_optional_posonly; + if (!noptargs) { + goto skip_optional_pos; } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: + if (fastargs[0]) { + data_obj = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } +skip_optional_pos: if (!noptargs) { goto skip_optional_kwonly; } @@ -182,12 +188,18 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto skip_optional_kwonly; } } - usedforsecurity = PyObject_IsTrue(fastargs[12]); - if (usedforsecurity < 0) { - goto exit; + if (fastargs[12]) { + usedforsecurity = PyObject_IsTrue(fastargs[12]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = fastargs[13]; skip_optional_kwonly: - return_value = py_blake2b_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); + return_value = py_blake2b_new_impl(type, data_obj, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity, string); exit: /* Cleanup for key */ @@ -207,20 +219,21 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } PyDoc_STRVAR(py_blake2s_new__doc__, -"blake2s(data=b\'\', /, *, digest_size=_blake2.blake2s.MAX_DIGEST_SIZE,\n" +"blake2s(data=b\'\', *, digest_size=_blake2.blake2s.MAX_DIGEST_SIZE,\n" " key=b\'\', salt=b\'\', person=b\'\', fanout=1, depth=1, leaf_size=0,\n" " node_offset=0, node_depth=0, inner_size=0, last_node=False,\n" -" usedforsecurity=True)\n" +" usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new BLAKE2s hash object."); static PyObject * -py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity); + int inner_size, int last_node, int usedforsecurity, + PyObject *string); static PyObject * py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -228,7 +241,7 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 12 + #define NUM_KEYWORDS 14 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -237,7 +250,7 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -246,18 +259,18 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "blake2s", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[13]; + PyObject *argsbuf[14]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; + PyObject *data_obj = NULL; int digest_size = HACL_HASH_BLAKE2S_OUT_BYTES; Py_buffer key = {NULL, NULL}; Py_buffer salt = {NULL, NULL}; @@ -270,18 +283,23 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) int inner_size = 0; int last_node = 0; int usedforsecurity = 1; + PyObject *string = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (nargs < 1) { - goto skip_optional_posonly; + if (!noptargs) { + goto skip_optional_pos; } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: + if (fastargs[0]) { + data_obj = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } +skip_optional_pos: if (!noptargs) { goto skip_optional_kwonly; } @@ -379,12 +397,18 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto skip_optional_kwonly; } } - usedforsecurity = PyObject_IsTrue(fastargs[12]); - if (usedforsecurity < 0) { - goto exit; + if (fastargs[12]) { + usedforsecurity = PyObject_IsTrue(fastargs[12]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = fastargs[13]; skip_optional_kwonly: - return_value = py_blake2s_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); + return_value = py_blake2s_new_impl(type, data_obj, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity, string); exit: /* Cleanup for key */ @@ -478,4 +502,4 @@ _blake2_blake2b_hexdigest(PyObject *self, PyObject *Py_UNUSED(ignored)) { return _blake2_blake2b_hexdigest_impl((Blake2Object *)self); } -/*[clinic end generated code: output=d30e8293bd8e2950 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=eed18dcfaf6f7731 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/md5module.c.h b/Modules/clinic/md5module.c.h index 9ca4f6528ce8f5..f76902586dddb2 100644 --- a/Modules/clinic/md5module.c.h +++ b/Modules/clinic/md5module.c.h @@ -89,7 +89,7 @@ MD5Type_update(PyObject *self, PyObject *obj) } PyDoc_STRVAR(_md5_md5__doc__, -"md5($module, /, string=b\'\', *, usedforsecurity=True)\n" +"md5($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new MD5 hash object; optionally initialized with a string."); @@ -98,7 +98,8 @@ PyDoc_STRVAR(_md5_md5__doc__, {"md5", _PyCFunction_CAST(_md5_md5), METH_FASTCALL|METH_KEYWORDS, _md5_md5__doc__}, static PyObject * -_md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity); +_md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -106,7 +107,7 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -115,7 +116,7 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -124,17 +125,18 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "md5", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -145,7 +147,7 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -154,14 +156,20 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _md5_md5_impl(module, string, usedforsecurity); + return_value = _md5_md5_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } -/*[clinic end generated code: output=73f4d2034d9fcc63 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=920fe54b9ed06f92 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha1module.c.h b/Modules/clinic/sha1module.c.h index 3e5fd1a41ce21f..4a58d0cd9b82a4 100644 --- a/Modules/clinic/sha1module.c.h +++ b/Modules/clinic/sha1module.c.h @@ -89,7 +89,7 @@ SHA1Type_update(PyObject *self, PyObject *obj) } PyDoc_STRVAR(_sha1_sha1__doc__, -"sha1($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha1($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA1 hash object; optionally initialized with a string."); @@ -98,7 +98,8 @@ PyDoc_STRVAR(_sha1_sha1__doc__, {"sha1", _PyCFunction_CAST(_sha1_sha1), METH_FASTCALL|METH_KEYWORDS, _sha1_sha1__doc__}, static PyObject * -_sha1_sha1_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -106,7 +107,7 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -115,7 +116,7 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -124,17 +125,18 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha1", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -145,7 +147,7 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -154,14 +156,20 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha1_sha1_impl(module, string, usedforsecurity); + return_value = _sha1_sha1_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } -/*[clinic end generated code: output=06161e87e2d645d4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=fd5a917404b68c4f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha2module.c.h b/Modules/clinic/sha2module.c.h index 26612125e75df9..07be91e4f6c68f 100644 --- a/Modules/clinic/sha2module.c.h +++ b/Modules/clinic/sha2module.c.h @@ -169,7 +169,7 @@ SHA512Type_update(PyObject *self, PyObject *obj) } PyDoc_STRVAR(_sha2_sha256__doc__, -"sha256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha256($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-256 hash object; optionally initialized with a string."); @@ -178,7 +178,8 @@ PyDoc_STRVAR(_sha2_sha256__doc__, {"sha256", _PyCFunction_CAST(_sha2_sha256), METH_FASTCALL|METH_KEYWORDS, _sha2_sha256__doc__}, static PyObject * -_sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -186,7 +187,7 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -195,7 +196,7 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -204,17 +205,18 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -225,7 +227,7 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -234,19 +236,25 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha256_impl(module, string, usedforsecurity); + return_value = _sha2_sha256_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } PyDoc_STRVAR(_sha2_sha224__doc__, -"sha224($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha224($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-224 hash object; optionally initialized with a string."); @@ -255,7 +263,8 @@ PyDoc_STRVAR(_sha2_sha224__doc__, {"sha224", _PyCFunction_CAST(_sha2_sha224), METH_FASTCALL|METH_KEYWORDS, _sha2_sha224__doc__}, static PyObject * -_sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -263,7 +272,7 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -272,7 +281,7 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -281,17 +290,18 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -302,7 +312,7 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -311,19 +321,25 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha224_impl(module, string, usedforsecurity); + return_value = _sha2_sha224_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } PyDoc_STRVAR(_sha2_sha512__doc__, -"sha512($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha512($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-512 hash object; optionally initialized with a string."); @@ -332,7 +348,8 @@ PyDoc_STRVAR(_sha2_sha512__doc__, {"sha512", _PyCFunction_CAST(_sha2_sha512), METH_FASTCALL|METH_KEYWORDS, _sha2_sha512__doc__}, static PyObject * -_sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -340,7 +357,7 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -349,7 +366,7 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -358,17 +375,18 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha512", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -379,7 +397,7 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -388,19 +406,25 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha512_impl(module, string, usedforsecurity); + return_value = _sha2_sha512_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } PyDoc_STRVAR(_sha2_sha384__doc__, -"sha384($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha384($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-384 hash object; optionally initialized with a string."); @@ -409,7 +433,8 @@ PyDoc_STRVAR(_sha2_sha384__doc__, {"sha384", _PyCFunction_CAST(_sha2_sha384), METH_FASTCALL|METH_KEYWORDS, _sha2_sha384__doc__}, static PyObject * -_sha2_sha384_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -417,7 +442,7 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -426,7 +451,7 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -435,17 +460,18 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha384", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -456,7 +482,7 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -465,14 +491,20 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha384_impl(module, string, usedforsecurity); + return_value = _sha2_sha384_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } -/*[clinic end generated code: output=af11090855b7c85a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=90625b237c774a9f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha3module.c.h b/Modules/clinic/sha3module.c.h index 25f72b74f801db..121be2c0758695 100644 --- a/Modules/clinic/sha3module.c.h +++ b/Modules/clinic/sha3module.c.h @@ -10,13 +10,14 @@ preserve #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(py_sha3_new__doc__, -"sha3_224(data=b\'\', /, *, usedforsecurity=True)\n" +"sha3_224(data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA3 hash object."); static PyObject * -py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity); +py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, + PyObject *string); static PyObject * py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -24,7 +25,7 @@ py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 1 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -33,7 +34,7 @@ py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -42,40 +43,51 @@ py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha3_224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; + PyObject *data_obj = NULL; int usedforsecurity = 1; + PyObject *string = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (nargs < 1) { - goto skip_optional_posonly; + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + data_obj = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: +skip_optional_pos: if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(fastargs[1]); - if (usedforsecurity < 0) { - goto exit; + if (fastargs[1]) { + usedforsecurity = PyObject_IsTrue(fastargs[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = fastargs[2]; skip_optional_kwonly: - return_value = py_sha3_new_impl(type, data, usedforsecurity); + return_value = py_sha3_new_impl(type, data_obj, usedforsecurity, string); exit: return return_value; @@ -158,24 +170,57 @@ _sha3_sha3_224_update(PyObject *self, PyObject *data) } PyDoc_STRVAR(_sha3_shake_128_digest__doc__, -"digest($self, length, /)\n" +"digest($self, /, length)\n" "--\n" "\n" "Return the digest value as a bytes object."); #define _SHA3_SHAKE_128_DIGEST_METHODDEF \ - {"digest", (PyCFunction)_sha3_shake_128_digest, METH_O, _sha3_shake_128_digest__doc__}, + {"digest", _PyCFunction_CAST(_sha3_shake_128_digest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_digest__doc__}, static PyObject * _sha3_shake_128_digest_impl(SHA3object *self, unsigned long length); static PyObject * -_sha3_shake_128_digest(PyObject *self, PyObject *arg) +_sha3_shake_128_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(length), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"length", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "digest", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; unsigned long length; - if (!_PyLong_UnsignedLong_Converter(arg, &length)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { goto exit; } return_value = _sha3_shake_128_digest_impl((SHA3object *)self, length); @@ -185,24 +230,57 @@ _sha3_shake_128_digest(PyObject *self, PyObject *arg) } PyDoc_STRVAR(_sha3_shake_128_hexdigest__doc__, -"hexdigest($self, length, /)\n" +"hexdigest($self, /, length)\n" "--\n" "\n" "Return the digest value as a string of hexadecimal digits."); #define _SHA3_SHAKE_128_HEXDIGEST_METHODDEF \ - {"hexdigest", (PyCFunction)_sha3_shake_128_hexdigest, METH_O, _sha3_shake_128_hexdigest__doc__}, + {"hexdigest", _PyCFunction_CAST(_sha3_shake_128_hexdigest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_hexdigest__doc__}, static PyObject * _sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length); static PyObject * -_sha3_shake_128_hexdigest(PyObject *self, PyObject *arg) +_sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(length), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"length", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "hexdigest", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; unsigned long length; - if (!_PyLong_UnsignedLong_Converter(arg, &length)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { goto exit; } return_value = _sha3_shake_128_hexdigest_impl((SHA3object *)self, length); @@ -210,4 +288,4 @@ _sha3_shake_128_hexdigest(PyObject *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=5b3ac1c06c6899ea input=a9049054013a1b77]*/ +/*[clinic end generated code: output=65e437799472b89f input=a9049054013a1b77]*/ diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 7105e68af7b806..a80b195a765792 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -76,3 +76,32 @@ * to allow the user to optimize based on the platform they're using. */ #define HASHLIB_GIL_MINSIZE 2048 +static inline int +_Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string) +{ + if (data != NULL && string == NULL) { + // called as H(data) or H(data=...) + *res = data; + return 1; + } + else if (data == NULL && string != NULL) { + // called as H(string=...) + *res = string; + return 1; + } + else if (data == NULL && string == NULL) { + // fast path when no data is given + assert(!PyErr_Occurred()); + *res = NULL; + return 0; + } + else { + // called as H(data=..., string) + *res = NULL; + PyErr_SetString(PyExc_TypeError, + "'data' and 'string' are mutually exclusive " + "and support for 'string' keyword parameter " + "is slated for removal in a future version."); + return -1; + } +} diff --git a/Modules/md5module.c b/Modules/md5module.c index c36eb41d4d201e..9b5ea2d6e02605 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -276,17 +276,24 @@ static PyType_Spec md5_type_spec = { /*[clinic input] _md5.md5 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new MD5 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=587071f76254a4ac input=7a144a1905636985]*/ +_md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=d45e187d3d16f3a8 input=7ea5c5366dbb44bf]*/ { + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } + MD5object *new; Py_buffer buf; diff --git a/Modules/sha1module.c b/Modules/sha1module.c index f4a00cdb422156..a746bf74f8d4c1 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -272,19 +272,25 @@ static PyType_Spec sha1_type_spec = { /*[clinic input] _sha1.sha1 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA1 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha1_sha1_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=6f8b3af05126e18e input=bd54b68e2bf36a8a]*/ +_sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=0d453775924f88a7 input=807f25264e0ac656]*/ { SHA1object *new; Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); diff --git a/Modules/sha2module.c b/Modules/sha2module.c index e88d7cb2d456bf..72931910c5d720 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -594,18 +594,24 @@ static PyType_Spec sha512_type_spec = { /*[clinic input] _sha2.sha256 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-256 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=243c9dd289931f87 input=6249da1de607280a]*/ +_sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=49828a7bcd418f45 input=9ce1d70e669abc14]*/ { Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); @@ -651,18 +657,25 @@ _sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity) /*[clinic input] _sha2.sha224 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-224 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=68191f232e4a3843 input=c42bcba47fd7d2b7]*/ +_sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=2163cb03b6cf6157 input=612f7682a889bc2a]*/ { Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } + if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); } @@ -706,19 +719,25 @@ _sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity) /*[clinic input] _sha2.sha512 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-512 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=d55c8996eca214d7 input=0576ae2a6ebfad25]*/ +_sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=cc3fcfce001a4538 input=19c9f2c06d59563a]*/ { SHA512object *new; Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } sha2_state *state = sha2_get_state(module); @@ -763,19 +782,25 @@ _sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity) /*[clinic input] _sha2.sha384 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-384 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha384_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=b29a0d81d51d1368 input=4e9199d8de0d2f9b]*/ +_sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=b6e3db593b5a0330 input=9fd50c942ad9e0bf]*/ { SHA512object *new; Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } sha2_state *state = sha2_get_state(module); diff --git a/Modules/sha3module.c b/Modules/sha3module.c index a7edf5c66a1e76..cfbf0cbcc042c5 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -105,18 +105,25 @@ sha3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *buf, Py_ssize_t len) /*[clinic input] @classmethod _sha3.sha3_224.__new__ as py_sha3_new - data: object(c_default="NULL") = b'' - / + + data as data_obj: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new SHA3 hash object. [clinic start generated code]*/ static PyObject * -py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity) -/*[clinic end generated code: output=90409addc5d5e8b0 input=637e5f8f6a93982a]*/ +py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, + PyObject *string) +/*[clinic end generated code: output=dcec1eca20395f2a input=c106e0b4e2d67d58]*/ { + PyObject *data; + if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { + return NULL; + } + Py_buffer buf = {NULL, NULL}; SHA3State *state = _PyType_GetModuleState(type); SHA3object *self = newSHA3object(type); @@ -503,14 +510,13 @@ _SHAKE_digest(PyObject *op, unsigned long digestlen, int hex) _sha3.shake_128.digest length: unsigned_long - / Return the digest value as a bytes object. [clinic start generated code]*/ static PyObject * _sha3_shake_128_digest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=2313605e2f87bb8f input=418ef6a36d2e6082]*/ +/*[clinic end generated code: output=2313605e2f87bb8f input=93d6d6ff32904f18]*/ { return _SHAKE_digest((PyObject *)self, length, 0); } @@ -520,14 +526,13 @@ _sha3_shake_128_digest_impl(SHA3object *self, unsigned long length) _sha3.shake_128.hexdigest length: unsigned_long - / Return the digest value as a string of hexadecimal digits. [clinic start generated code]*/ static PyObject * _sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=bf8e2f1e490944a8 input=69fb29b0926ae321]*/ +/*[clinic end generated code: output=bf8e2f1e490944a8 input=562d74e7060b56ab]*/ { return _SHAKE_digest((PyObject *)self, length, 1); } From 68784fed78aa297f0de0d038742495709185bef5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 31 May 2025 11:23:01 +0300 Subject: [PATCH 429/989] gh-133489: Remove size restrictions on getrandbits() and randbytes() (GH-133658) random.getrandbits() can now generate more that 2**31 bits. random.randbytes() can now generate more that 256 MiB. --- Lib/test/test_random.py | 28 +++++++++++++++++++ ...-05-08-13-43-19.gh-issue-133489.9eGS1Z.rst | 2 ++ Modules/_randommodule.c | 20 ++++++------- Modules/clinic/_randommodule.c.h | 10 +++---- Objects/longobject.c | 13 ++------- 5 files changed, 47 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-08-13-43-19.gh-issue-133489.9eGS1Z.rst diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index bd76d636e4f0fc..54910cd8054a1f 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -392,6 +392,8 @@ def test_getrandbits(self): self.assertRaises(TypeError, self.gen.getrandbits) self.assertRaises(TypeError, self.gen.getrandbits, 1, 2) self.assertRaises(ValueError, self.gen.getrandbits, -1) + self.assertRaises(OverflowError, self.gen.getrandbits, 1<<1000) + self.assertRaises(ValueError, self.gen.getrandbits, -1<<1000) self.assertRaises(TypeError, self.gen.getrandbits, 10.1) def test_pickling(self): @@ -435,6 +437,8 @@ def test_randbytes(self): self.assertRaises(TypeError, self.gen.randbytes) self.assertRaises(TypeError, self.gen.randbytes, 1, 2) self.assertRaises(ValueError, self.gen.randbytes, -1) + self.assertRaises(OverflowError, self.gen.randbytes, 1<<1000) + self.assertRaises((ValueError, OverflowError), self.gen.randbytes, -1<<1000) self.assertRaises(TypeError, self.gen.randbytes, 1.0) def test_mu_sigma_default_args(self): @@ -806,6 +810,22 @@ def test_getrandbits(self): self.assertEqual(self.gen.getrandbits(100), 97904845777343510404718956115) + def test_getrandbits_2G_bits(self): + size = 2**31 + self.gen.seed(1234567) + x = self.gen.getrandbits(size) + self.assertEqual(x.bit_length(), size) + self.assertEqual(x & (2**100-1), 890186470919986886340158459475) + self.assertEqual(x >> (size-100), 1226514312032729439655761284440) + + @support.bigmemtest(size=2**32, memuse=1/8+2/15, dry_run=False) + def test_getrandbits_4G_bits(self, size): + self.gen.seed(1234568) + x = self.gen.getrandbits(size) + self.assertEqual(x.bit_length(), size) + self.assertEqual(x & (2**100-1), 287241425661104632871036099814) + self.assertEqual(x >> (size-100), 739728759900339699429794460738) + def test_randrange_uses_getrandbits(self): # Verify use of getrandbits by randrange # Use same seed as in the cross-platform repeatability test @@ -962,6 +982,14 @@ def test_randbytes_getrandbits(self): self.assertEqual(self.gen.randbytes(n), gen2.getrandbits(n * 8).to_bytes(n, 'little')) + @support.bigmemtest(size=2**29, memuse=1+16/15, dry_run=False) + def test_randbytes_256M(self, size): + self.gen.seed(2849427419) + x = self.gen.randbytes(size) + self.assertEqual(len(x), size) + self.assertEqual(x[:12].hex(), 'f6fd9ae63855ab91ea238b4f') + self.assertEqual(x[-12:].hex(), '0e7af69a84ee99bf4a11becc') + def test_sample_counts_equivalence(self): # Test the documented strong equivalence to a sample with repeated elements. # We run this test on random.Random() which makes deterministic selections diff --git a/Misc/NEWS.d/next/Library/2025-05-08-13-43-19.gh-issue-133489.9eGS1Z.rst b/Misc/NEWS.d/next/Library/2025-05-08-13-43-19.gh-issue-133489.9eGS1Z.rst new file mode 100644 index 00000000000000..0c07beb76938f0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-08-13-43-19.gh-issue-133489.9eGS1Z.rst @@ -0,0 +1,2 @@ +:func:`random.getrandbits` can now generate more that 2\ :sup:`31` bits. +:func:`random.randbytes` can now generate more that 256 MiB. diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c index d5bac2f5b78120..2f4f388ce1161a 100644 --- a/Modules/_randommodule.c +++ b/Modules/_randommodule.c @@ -497,34 +497,32 @@ _random_Random_setstate_impl(RandomObject *self, PyObject *state) _random.Random.getrandbits self: self(type="RandomObject *") - k: int + k: uint64 / getrandbits(k) -> x. Generates an int with k random bits. [clinic start generated code]*/ static PyObject * -_random_Random_getrandbits_impl(RandomObject *self, int k) -/*[clinic end generated code: output=b402f82a2158887f input=87603cd60f79f730]*/ +_random_Random_getrandbits_impl(RandomObject *self, uint64_t k) +/*[clinic end generated code: output=c30ef8435f3433cf input=64226ac13bb4d2a3]*/ { - int i, words; + Py_ssize_t i, words; uint32_t r; uint32_t *wordarray; PyObject *result; - if (k < 0) { - PyErr_SetString(PyExc_ValueError, - "number of bits must be non-negative"); - return NULL; - } - if (k == 0) return PyLong_FromLong(0); if (k <= 32) /* Fast path */ return PyLong_FromUnsignedLong(genrand_uint32(self) >> (32 - k)); - words = (k - 1) / 32 + 1; + if ((k - 1u) / 32u + 1u > PY_SSIZE_T_MAX / 4u) { + PyErr_NoMemory(); + return NULL; + } + words = (k - 1u) / 32u + 1u; wordarray = (uint32_t *)PyMem_Malloc(words * 4); if (wordarray == NULL) { PyErr_NoMemory(); diff --git a/Modules/clinic/_randommodule.c.h b/Modules/clinic/_randommodule.c.h index 1e989e970c9de5..2563a16aea0b6f 100644 --- a/Modules/clinic/_randommodule.c.h +++ b/Modules/clinic/_randommodule.c.h @@ -3,6 +3,7 @@ preserve [clinic start generated code]*/ #include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_long.h" // _PyLong_UInt64_Converter() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_random_Random_random__doc__, @@ -124,16 +125,15 @@ PyDoc_STRVAR(_random_Random_getrandbits__doc__, {"getrandbits", (PyCFunction)_random_Random_getrandbits, METH_O, _random_Random_getrandbits__doc__}, static PyObject * -_random_Random_getrandbits_impl(RandomObject *self, int k); +_random_Random_getrandbits_impl(RandomObject *self, uint64_t k); static PyObject * _random_Random_getrandbits(PyObject *self, PyObject *arg) { PyObject *return_value = NULL; - int k; + uint64_t k; - k = PyLong_AsInt(arg); - if (k == -1 && PyErr_Occurred()) { + if (!_PyLong_UInt64_Converter(arg, &k)) { goto exit; } Py_BEGIN_CRITICAL_SECTION(self); @@ -143,4 +143,4 @@ _random_Random_getrandbits(PyObject *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=4458b5a69201ebea input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7ce97b2194eecaf7 input=a9049054013a1b77]*/ diff --git a/Objects/longobject.c b/Objects/longobject.c index 0b2dfa003fac53..2b533312fee673 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -971,16 +971,9 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n, ++numsignificantbytes; } - /* How many Python int digits do we need? We have - 8*numsignificantbytes bits, and each Python int digit has - PyLong_SHIFT bits, so it's the ceiling of the quotient. */ - /* catch overflow before it happens */ - if (numsignificantbytes > (PY_SSIZE_T_MAX - PyLong_SHIFT) / 8) { - PyErr_SetString(PyExc_OverflowError, - "byte array too long to convert to int"); - return NULL; - } - ndigits = (numsignificantbytes * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; + /* avoid integer overflow */ + ndigits = numsignificantbytes / PyLong_SHIFT * 8 + + (numsignificantbytes % PyLong_SHIFT * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; v = long_alloc(ndigits); if (v == NULL) return NULL; From 3c66e5976669a599adfb260514c03815b1a9e4e9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 31 May 2025 12:03:08 +0300 Subject: [PATCH 430/989] gh-134918: Fix and improve doctest's documentation (GH-134919) --- Doc/library/doctest.rst | 95 ++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index b86fef9fd6f310..8236d703fc1e45 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -174,7 +174,7 @@ with assorted summaries at the end. You can force verbose mode by passing ``verbose=True`` to :func:`testmod`, or prohibit it by passing ``verbose=False``. In either of those cases, -``sys.argv`` is not examined by :func:`testmod` (so passing ``-v`` or not +:data:`sys.argv` is not examined by :func:`testmod` (so passing ``-v`` or not has no effect). There is also a command line shortcut for running :func:`testmod`, see section @@ -231,7 +231,7 @@ documentation:: As with :func:`testmod`, :func:`testfile` won't display anything unless an example fails. If an example does fail, then the failing example(s) and the cause(s) of the failure(s) are printed to stdout, using the same format as -:func:`testmod`. +:func:`!testmod`. By default, :func:`testfile` looks for files in the calling module's directory. See section :ref:`doctest-basic-api` for a description of the optional arguments @@ -311,6 +311,9 @@ Which Docstrings Are Examined? The module docstring, and all function, class and method docstrings are searched. Objects imported into the module are not searched. +.. attribute:: module.__test__ + :no-typesetting: + In addition, there are cases when you want tests to be part of a module but not part of the help text, which requires that the tests not be included in the docstring. Doctest looks for a module-level variable called ``__test__`` and uses it to locate other @@ -533,7 +536,7 @@ Some details you should read once, but won't need to remember: * The interactive shell omits the traceback header line for some :exc:`SyntaxError`\ s. But doctest uses the traceback header line to distinguish exceptions from non-exceptions. So in the rare case where you need - to test a :exc:`SyntaxError` that omits the traceback header, you will need to + to test a :exc:`!SyntaxError` that omits the traceback header, you will need to manually add the traceback header line to your test example. .. index:: single: ^ (caret); marker @@ -860,15 +863,15 @@ The :const:`ELLIPSIS` directive gives a nice approach for the last example: Floating-point numbers are also subject to small output variations across -platforms, because Python defers to the platform C library for float formatting, -and C libraries vary widely in quality here. :: +platforms, because Python defers to the platform C library for some +floating-point calculations, and C libraries vary widely in quality here. :: - >>> 1./7 # risky - 0.14285714285714285 - >>> print(1./7) # safer - 0.142857142857 - >>> print(round(1./7, 6)) # much safer - 0.142857 + >>> 1000**0.1 # risky + 1.9952623149688797 + >>> round(1000**0.1, 9) # safer + 1.995262315 + >>> print(f'{1000**0.1:.4f}') # much safer + 1.9953 Numbers of the form ``I/2.**J`` are safe across all platforms, and I often contrive doctest examples to produce numbers of that form:: @@ -938,13 +941,13 @@ and :ref:`doctest-simple-testfile`. Optional argument *verbose* prints lots of stuff if true, and prints only failures if false; by default, or if ``None``, it's true if and only if ``'-v'`` - is in ``sys.argv``. + is in :data:`sys.argv`. Optional argument *report* prints a summary at the end when true, else prints nothing at the end. In verbose mode, the summary is detailed, else the summary is very brief (in fact, empty if all tests passed). - Optional argument *optionflags* (default value 0) takes the + Optional argument *optionflags* (default value ``0``) takes the :ref:`bitwise OR ` of option flags. See section :ref:`doctest-options`. @@ -1045,7 +1048,7 @@ from text files and modules with doctests: The returned :class:`unittest.TestSuite` is to be run by the unittest framework and runs the interactive examples in each file. If an example in any file - fails, then the synthesized unit test fails, and a :exc:`failureException` + fails, then the synthesized unit test fails, and a :exc:`~unittest.TestCase.failureException` exception is raised showing the name of the file containing the test and a (sometimes approximate) line number. If all the examples in a file are skipped, then the synthesized unit test is also marked as skipped. @@ -1078,13 +1081,14 @@ from text files and modules with doctests: Optional argument *setUp* specifies a set-up function for the test suite. This is called before running the tests in each file. The *setUp* function - will be passed a :class:`DocTest` object. The setUp function can access the - test globals as the *globs* attribute of the test passed. + will be passed a :class:`DocTest` object. The *setUp* function can access the + test globals as the :attr:`~DocTest.globs` attribute of the test passed. Optional argument *tearDown* specifies a tear-down function for the test suite. This is called after running the tests in each file. The *tearDown* - function will be passed a :class:`DocTest` object. The setUp function can - access the test globals as the *globs* attribute of the test passed. + function will be passed a :class:`DocTest` object. The *tearDown* function can + access the test globals as the :attr:`~DocTest.globs` attribute of the test + passed. Optional argument *globs* is a dictionary containing the initial global variables for the tests. A new copy of this dictionary is created for each @@ -1111,11 +1115,12 @@ from text files and modules with doctests: Convert doctest tests for a module to a :class:`unittest.TestSuite`. The returned :class:`unittest.TestSuite` is to be run by the unittest framework - and runs each doctest in the module. If any of the doctests fail, then the - synthesized unit test fails, and a :exc:`failureException` exception is raised + and runs each doctest in the module. + Each docstring is run as a separate unit test. + If any of the doctests fail, then the synthesized unit test fails, + and a :exc:`unittest.TestCase.failureException` exception is raised showing the name of the file containing the test and a (sometimes approximate) line number. If all the examples in a docstring are skipped, then the - synthesized unit test is also marked as skipped. Optional argument *module* provides the module to be tested. It can be a module object or a (possibly dotted) module name. If not specified, the module calling @@ -1123,7 +1128,7 @@ from text files and modules with doctests: Optional argument *globs* is a dictionary containing the initial global variables for the tests. A new copy of this dictionary is created for each - test. By default, *globs* is a new empty dictionary. + test. By default, *globs* is the module's :attr:`~module.__dict__`. Optional argument *extraglobs* specifies an extra set of global variables, which is merged into *globs*. By default, no extra globals are used. @@ -1132,7 +1137,7 @@ from text files and modules with doctests: drop-in replacement) that is used to extract doctests from the module. Optional arguments *setUp*, *tearDown*, and *optionflags* are the same as for - function :func:`DocFileSuite` above. + function :func:`DocFileSuite` above, but they are called for each docstring. This function uses the same search technique as :func:`testmod`. @@ -1140,12 +1145,6 @@ from text files and modules with doctests: :func:`DocTestSuite` returns an empty :class:`unittest.TestSuite` if *module* contains no docstrings instead of raising :exc:`ValueError`. -.. exception:: failureException - - When doctests which have been converted to unit tests by :func:`DocFileSuite` - or :func:`DocTestSuite` fail, this exception is raised showing the name of - the file containing the test and a (sometimes approximate) line number. - Under the covers, :func:`DocTestSuite` creates a :class:`unittest.TestSuite` out of :class:`!doctest.DocTestCase` instances, and :class:`!DocTestCase` is a subclass of :class:`unittest.TestCase`. :class:`!DocTestCase` isn't documented @@ -1158,15 +1157,15 @@ of :class:`!DocTestCase`. So both ways of creating a :class:`unittest.TestSuite` run instances of :class:`!DocTestCase`. This is important for a subtle reason: when you run -:mod:`doctest` functions yourself, you can control the :mod:`doctest` options in -use directly, by passing option flags to :mod:`doctest` functions. However, if -you're writing a :mod:`unittest` framework, :mod:`unittest` ultimately controls +:mod:`doctest` functions yourself, you can control the :mod:`!doctest` options in +use directly, by passing option flags to :mod:`!doctest` functions. However, if +you're writing a :mod:`unittest` framework, :mod:`!unittest` ultimately controls when and how tests get run. The framework author typically wants to control -:mod:`doctest` reporting options (perhaps, e.g., specified by command line -options), but there's no way to pass options through :mod:`unittest` to -:mod:`doctest` test runners. +:mod:`!doctest` reporting options (perhaps, e.g., specified by command line +options), but there's no way to pass options through :mod:`!unittest` to +:mod:`!doctest` test runners. -For this reason, :mod:`doctest` also supports a notion of :mod:`doctest` +For this reason, :mod:`doctest` also supports a notion of :mod:`!doctest` reporting flags specific to :mod:`unittest` support, via this function: @@ -1181,12 +1180,12 @@ reporting flags specific to :mod:`unittest` support, via this function: :mod:`unittest`: the :meth:`!runTest` method of :class:`!DocTestCase` looks at the option flags specified for the test case when the :class:`!DocTestCase` instance was constructed. If no reporting flags were specified (which is the - typical and expected case), :mod:`!doctest`'s :mod:`unittest` reporting flags are + typical and expected case), :mod:`!doctest`'s :mod:`!unittest` reporting flags are :ref:`bitwise ORed ` into the option flags, and the option flags so augmented are passed to the :class:`DocTestRunner` instance created to run the doctest. If any reporting flags were specified when the :class:`!DocTestCase` instance was constructed, :mod:`!doctest`'s - :mod:`unittest` reporting flags are ignored. + :mod:`!unittest` reporting flags are ignored. The value of the :mod:`unittest` reporting flags in effect before the function was called is returned by the function. @@ -1279,7 +1278,7 @@ DocTest Objects .. attribute:: filename The name of the file that this :class:`DocTest` was extracted from; or - ``None`` if the filename is unknown, or if the :class:`DocTest` was not + ``None`` if the filename is unknown, or if the :class:`!DocTest` was not extracted from a file. @@ -1419,10 +1418,10 @@ DocTestFinder objects The globals for each :class:`DocTest` is formed by combining *globs* and *extraglobs* (bindings in *extraglobs* override bindings in *globs*). A new - shallow copy of the globals dictionary is created for each :class:`DocTest`. - If *globs* is not specified, then it defaults to the module's *__dict__*, if - specified, or ``{}`` otherwise. If *extraglobs* is not specified, then it - defaults to ``{}``. + shallow copy of the globals dictionary is created for each :class:`!DocTest`. + If *globs* is not specified, then it defaults to the module's + :attr:`~module.__dict__`, if specified, or ``{}`` otherwise. + If *extraglobs* is not specified, then it defaults to ``{}``. .. _doctest-doctestparser: @@ -1446,7 +1445,7 @@ DocTestParser objects :class:`DocTest` object. *globs*, *name*, *filename*, and *lineno* are attributes for the new - :class:`DocTest` object. See the documentation for :class:`DocTest` for more + :class:`!DocTest` object. See the documentation for :class:`DocTest` for more information. @@ -1461,7 +1460,7 @@ DocTestParser objects Divide the given string into examples and intervening text, and return them as a list of alternating :class:`Example`\ s and strings. Line numbers for the - :class:`Example`\ s are 0-based. The optional argument *name* is a name + :class:`!Example`\ s are 0-based. The optional argument *name* is a name identifying this string, and is only used for error messages. @@ -1501,7 +1500,7 @@ DocTestRunner objects :class:`OutputChecker`. This comparison may be customized with a number of option flags; see section :ref:`doctest-options` for more information. If the option flags are insufficient, then the comparison may also be customized by - passing a subclass of :class:`OutputChecker` to the constructor. + passing a subclass of :class:`!OutputChecker` to the constructor. The test runner's display output can be controlled in two ways. First, an output function can be passed to :meth:`run`; this function will be called @@ -1540,7 +1539,7 @@ DocTestRunner objects output; it should not be called directly. *example* is the example about to be processed. *test* is the test - *containing example*. *out* is the output function that was passed to + containing *example*. *out* is the output function that was passed to :meth:`DocTestRunner.run`. @@ -1940,7 +1939,7 @@ several options for organizing tests: containing test cases for the named topics. These functions can be included in the same file as the module, or separated out into a separate test file. -* Define a ``__test__`` dictionary mapping from regression test topics to +* Define a :attr:`~module.__test__` dictionary mapping from regression test topics to docstrings containing test cases. When you have placed your tests in a module, the module can itself be the test From ad39f017881e0bd8ffd809755ebf76380b928ad3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 31 May 2025 13:01:46 +0300 Subject: [PATCH 431/989] gh-108885: Use subtests for doctest examples run by unittest (GH-134890) Run each example as a subtest in unit tests synthesized by doctest.DocFileSuite() and doctest.DocTestSuite(). Add the doctest.DocTestRunner.report_skip() method. --- Doc/library/doctest.rst | 52 ++- Lib/doctest.py | 95 +++-- Lib/test/test_doctest/test_doctest.py | 338 +++++++++--------- Lib/test/test_regrtest.py | 2 +- ...-05-29-17-39-13.gh-issue-108885.MegCRA.rst | 3 + 5 files changed, 279 insertions(+), 211 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-29-17-39-13.gh-issue-108885.MegCRA.rst diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index 8236d703fc1e45..fb43cf918b84dd 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -1046,12 +1046,15 @@ from text files and modules with doctests: Convert doctest tests from one or more text files to a :class:`unittest.TestSuite`. - The returned :class:`unittest.TestSuite` is to be run by the unittest framework - and runs the interactive examples in each file. If an example in any file - fails, then the synthesized unit test fails, and a :exc:`~unittest.TestCase.failureException` - exception is raised showing the name of the file containing the test and a - (sometimes approximate) line number. If all the examples in a file are - skipped, then the synthesized unit test is also marked as skipped. + The returned :class:`unittest.TestSuite` is to be run by the unittest + framework and runs the interactive examples in each file. + Each file is run as a separate unit test, and each example in a file + is run as a :ref:`subtest `. + If any example in a file fails, then the synthesized unit test fails. + The traceback for failure or error contains the name of the file + containing the test and a (sometimes approximate) line number. + If all the examples in a file are skipped, then the synthesized unit + test is also marked as skipped. Pass one or more paths (as strings) to text files to be examined. @@ -1109,18 +1112,23 @@ from text files and modules with doctests: The global ``__file__`` is added to the globals provided to doctests loaded from a text file using :func:`DocFileSuite`. + .. versionchanged:: next + Run each example as a :ref:`subtest `. + .. function:: DocTestSuite(module=None, globs=None, extraglobs=None, test_finder=None, setUp=None, tearDown=None, optionflags=0, checker=None) Convert doctest tests for a module to a :class:`unittest.TestSuite`. - The returned :class:`unittest.TestSuite` is to be run by the unittest framework - and runs each doctest in the module. - Each docstring is run as a separate unit test. - If any of the doctests fail, then the synthesized unit test fails, - and a :exc:`unittest.TestCase.failureException` exception is raised - showing the name of the file containing the test and a (sometimes approximate) - line number. If all the examples in a docstring are skipped, then the + The returned :class:`unittest.TestSuite` is to be run by the unittest + framework and runs each doctest in the module. + Each docstring is run as a separate unit test, and each example in + a docstring is run as a :ref:`subtest `. + If any of the doctests fail, then the synthesized unit test fails. + The traceback for failure or error contains the name of the file + containing the test and a (sometimes approximate) line number. + If all the examples in a docstring are skipped, then the + synthesized unit test is also marked as skipped. Optional argument *module* provides the module to be tested. It can be a module object or a (possibly dotted) module name. If not specified, the module calling @@ -1145,6 +1153,9 @@ from text files and modules with doctests: :func:`DocTestSuite` returns an empty :class:`unittest.TestSuite` if *module* contains no docstrings instead of raising :exc:`ValueError`. + .. versionchanged:: next + Run each example as a :ref:`subtest `. + Under the covers, :func:`DocTestSuite` creates a :class:`unittest.TestSuite` out of :class:`!doctest.DocTestCase` instances, and :class:`!DocTestCase` is a subclass of :class:`unittest.TestCase`. :class:`!DocTestCase` isn't documented @@ -1507,7 +1518,7 @@ DocTestRunner objects with strings that should be displayed. It defaults to ``sys.stdout.write``. If capturing the output is not sufficient, then the display output can be also customized by subclassing DocTestRunner, and overriding the methods - :meth:`report_start`, :meth:`report_success`, + :meth:`report_skip`, :meth:`report_start`, :meth:`report_success`, :meth:`report_unexpected_exception`, and :meth:`report_failure`. The optional keyword argument *checker* specifies the :class:`OutputChecker` @@ -1532,6 +1543,19 @@ DocTestRunner objects :class:`DocTestRunner` defines the following methods: + .. method:: report_skip(out, test, example) + + Report that the given example was skipped. This method is provided to + allow subclasses of :class:`DocTestRunner` to customize their output; it + should not be called directly. + + *example* is the example about to be processed. *test* is the test + containing *example*. *out* is the output function that was passed to + :meth:`DocTestRunner.run`. + + .. versionadded:: next + + .. method:: report_start(out, test, example) Report that the test runner is about to process the given example. This method diff --git a/Lib/doctest.py b/Lib/doctest.py index dec10a345165da..c8c95ecbb273b2 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -101,6 +101,7 @@ def _test(): import re import sys import traceback +import types import unittest from io import StringIO, IncrementalNewlineDecoder from collections import namedtuple @@ -108,8 +109,6 @@ def _test(): from _colorize import ANSIColors, can_colorize -__unittest = True - class TestResults(namedtuple('TestResults', 'failed attempted')): def __new__(cls, failed, attempted, *, skipped=0): results = super().__new__(cls, failed, attempted) @@ -387,7 +386,7 @@ def __init__(self, out): self.__out = out self.__debugger_used = False # do not play signal games in the pdb - pdb.Pdb.__init__(self, stdout=out, nosigint=True) + super().__init__(stdout=out, nosigint=True) # still use input() to get user input self.use_rawinput = 1 @@ -1280,6 +1279,11 @@ def __init__(self, checker=None, verbose=None, optionflags=0): # Reporting methods #///////////////////////////////////////////////////////////////// + def report_skip(self, out, test, example): + """ + Report that the given example was skipped. + """ + def report_start(self, out, test, example): """ Report that the test runner is about to process the given @@ -1377,6 +1381,8 @@ def __run(self, test, compileflags, out): # If 'SKIP' is set, then skip this example. if self.optionflags & SKIP: + if not quiet: + self.report_skip(out, test, example) skips += 1 continue @@ -2274,12 +2280,63 @@ def set_unittest_reportflags(flags): return old +class _DocTestCaseRunner(DocTestRunner): + + def __init__(self, *args, test_case, test_result, **kwargs): + super().__init__(*args, **kwargs) + self._test_case = test_case + self._test_result = test_result + self._examplenum = 0 + + def _subTest(self): + subtest = unittest.case._SubTest(self._test_case, str(self._examplenum), {}) + self._examplenum += 1 + return subtest + + def report_skip(self, out, test, example): + unittest.case._addSkip(self._test_result, self._subTest(), '') + + def report_success(self, out, test, example, got): + self._test_result.addSubTest(self._test_case, self._subTest(), None) + + def report_unexpected_exception(self, out, test, example, exc_info): + tb = self._add_traceback(exc_info[2], test, example) + exc_info = (*exc_info[:2], tb) + self._test_result.addSubTest(self._test_case, self._subTest(), exc_info) + + def report_failure(self, out, test, example, got): + msg = ('Failed example:\n' + _indent(example.source) + + self._checker.output_difference(example, got, self.optionflags).rstrip('\n')) + exc = self._test_case.failureException(msg) + tb = self._add_traceback(None, test, example) + exc_info = (type(exc), exc, tb) + self._test_result.addSubTest(self._test_case, self._subTest(), exc_info) + + def _add_traceback(self, traceback, test, example): + if test.lineno is None or example.lineno is None: + lineno = None + else: + lineno = test.lineno + example.lineno + 1 + return types.SimpleNamespace( + tb_frame = types.SimpleNamespace( + f_globals=test.globs, + f_code=types.SimpleNamespace( + co_filename=test.filename, + co_name=test.name, + ), + ), + tb_next = traceback, + tb_lasti = -1, + tb_lineno = lineno, + ) + + class DocTestCase(unittest.TestCase): def __init__(self, test, optionflags=0, setUp=None, tearDown=None, checker=None): - unittest.TestCase.__init__(self) + super().__init__() self._dt_optionflags = optionflags self._dt_checker = checker self._dt_test = test @@ -2303,30 +2360,28 @@ def tearDown(self): test.globs.clear() test.globs.update(self._dt_globs) + def run(self, result=None): + self._test_result = result + return super().run(result) + def runTest(self): test = self._dt_test - old = sys.stdout - new = StringIO() optionflags = self._dt_optionflags + result = self._test_result if not (optionflags & REPORTING_FLAGS): # The option flags don't include any reporting flags, # so add the default reporting flags optionflags |= _unittest_reportflags + if getattr(result, 'failfast', False): + optionflags |= FAIL_FAST - runner = DocTestRunner(optionflags=optionflags, - checker=self._dt_checker, verbose=False) - - try: - runner.DIVIDER = "-"*70 - results = runner.run(test, out=new.write, clear_globs=False) - if results.skipped == results.attempted: - raise unittest.SkipTest("all examples were skipped") - finally: - sys.stdout = old - - if results.failed: - raise self.failureException(self.format_failure(new.getvalue().rstrip('\n'))) + runner = _DocTestCaseRunner(optionflags=optionflags, + checker=self._dt_checker, verbose=False, + test_case=self, test_result=result) + results = runner.run(test, clear_globs=False) + if results.skipped == results.attempted: + raise unittest.SkipTest("all examples were skipped") def format_failure(self, err): test = self._dt_test @@ -2441,7 +2496,7 @@ def shortDescription(self): class SkipDocTestCase(DocTestCase): def __init__(self, module): self.module = module - DocTestCase.__init__(self, None) + super().__init__(None) def setUp(self): self.skipTest("DocTestSuite will not work with -O2 and above") diff --git a/Lib/test/test_doctest/test_doctest.py b/Lib/test/test_doctest/test_doctest.py index 2bfaa6c599cd47..72763d4a0132d0 100644 --- a/Lib/test/test_doctest/test_doctest.py +++ b/Lib/test/test_doctest/test_doctest.py @@ -2269,20 +2269,22 @@ def test_DocTestSuite(): >>> suite = doctest.DocTestSuite(test.test_doctest.sample_doctest) >>> result = suite.run(unittest.TestResult()) >>> result - + >>> for tst, _ in result.failures: ... print(tst) - bad (test.test_doctest.sample_doctest.__test__) - foo (test.test_doctest.sample_doctest) - test_silly_setup (test.test_doctest.sample_doctest) - y_is_one (test.test_doctest.sample_doctest) + bad (test.test_doctest.sample_doctest.__test__) [0] + foo (test.test_doctest.sample_doctest) [0] + >>> for tst, _ in result.errors: + ... print(tst) + test_silly_setup (test.test_doctest.sample_doctest) [1] + y_is_one (test.test_doctest.sample_doctest) [0] We can also supply the module by name: >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest') >>> result = suite.run(unittest.TestResult()) >>> result - + The module need not contain any doctest examples: @@ -2304,21 +2306,26 @@ def test_DocTestSuite(): >>> result >>> len(result.skipped) - 2 + 7 >>> for tst, _ in result.skipped: ... print(tst) + double_skip (test.test_doctest.sample_doctest_skip) [0] + double_skip (test.test_doctest.sample_doctest_skip) [1] double_skip (test.test_doctest.sample_doctest_skip) + partial_skip_fail (test.test_doctest.sample_doctest_skip) [0] + partial_skip_pass (test.test_doctest.sample_doctest_skip) [0] + single_skip (test.test_doctest.sample_doctest_skip) [0] single_skip (test.test_doctest.sample_doctest_skip) >>> for tst, _ in result.failures: ... print(tst) - no_skip_fail (test.test_doctest.sample_doctest_skip) - partial_skip_fail (test.test_doctest.sample_doctest_skip) + no_skip_fail (test.test_doctest.sample_doctest_skip) [0] + partial_skip_fail (test.test_doctest.sample_doctest_skip) [1] We can use the current module: >>> suite = test.test_doctest.sample_doctest.test_suite() >>> suite.run(unittest.TestResult()) - + We can also provide a DocTestFinder: @@ -2326,7 +2333,7 @@ def test_DocTestSuite(): >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', ... test_finder=finder) >>> suite.run(unittest.TestResult()) - + The DocTestFinder need not return any tests: @@ -2342,7 +2349,7 @@ def test_DocTestSuite(): >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', globs={}) >>> suite.run(unittest.TestResult()) - + Alternatively, we can provide extra globals. Here we'll make an error go away by providing an extra global variable: @@ -2350,7 +2357,7 @@ def test_DocTestSuite(): >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', ... extraglobs={'y': 1}) >>> suite.run(unittest.TestResult()) - + You can pass option flags. Here we'll cause an extra error by disabling the blank-line feature: @@ -2358,7 +2365,7 @@ def test_DocTestSuite(): >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', ... optionflags=doctest.DONT_ACCEPT_BLANKLINE) >>> suite.run(unittest.TestResult()) - + You can supply setUp and tearDown functions: @@ -2375,7 +2382,7 @@ def test_DocTestSuite(): >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', ... setUp=setUp, tearDown=tearDown) >>> suite.run(unittest.TestResult()) - + But the tearDown restores sanity: @@ -2393,7 +2400,7 @@ def test_DocTestSuite(): >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', setUp=setUp) >>> suite.run(unittest.TestResult()) - + Here, we didn't need to use a tearDown function because we modified the test globals, which are a copy of the @@ -2409,115 +2416,97 @@ def test_DocTestSuite_errors(): >>> suite = doctest.DocTestSuite(mod) >>> result = suite.run(unittest.TestResult()) >>> result - + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors - File "...sample_doctest_errors.py", line 0, in sample_doctest_errors - - ---------------------------------------------------------------------- - File "...sample_doctest_errors.py", line 5, in test.test_doctest.sample_doctest_errors - Failed example: + Traceback (most recent call last): + File "...sample_doctest_errors.py", line 5, in test.test_doctest.sample_doctest_errors + >...>> 2 + 2 + AssertionError: Failed example: 2 + 2 Expected: 5 Got: 4 - ---------------------------------------------------------------------- - File "...sample_doctest_errors.py", line 7, in test.test_doctest.sample_doctest_errors - Failed example: - 1/0 - Exception raised: - Traceback (most recent call last): - File "", line 1, in - 1/0 - ~^~ - ZeroDivisionError: division by zero >>> print(result.failures[1][1]) # doctest: +ELLIPSIS - AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.__test__.bad - File "...sample_doctest_errors.py", line unknown line number, in bad - - ---------------------------------------------------------------------- - File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad - Failed example: + Traceback (most recent call last): + File "...sample_doctest_errors.py", line None, in test.test_doctest.sample_doctest_errors.__test__.bad + AssertionError: Failed example: 2 + 2 Expected: 5 Got: 4 - ---------------------------------------------------------------------- - File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad - Failed example: - 1/0 - Exception raised: - Traceback (most recent call last): - File "", line 1, in - 1/0 - ~^~ - ZeroDivisionError: division by zero >>> print(result.failures[2][1]) # doctest: +ELLIPSIS - AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.errors - File "...sample_doctest_errors.py", line 14, in errors - - ---------------------------------------------------------------------- - File "...sample_doctest_errors.py", line 16, in test.test_doctest.sample_doctest_errors.errors - Failed example: + Traceback (most recent call last): + File "...sample_doctest_errors.py", line 16, in test.test_doctest.sample_doctest_errors.errors + >...>> 2 + 2 + AssertionError: Failed example: 2 + 2 Expected: 5 Got: 4 - ---------------------------------------------------------------------- - File "...sample_doctest_errors.py", line 18, in test.test_doctest.sample_doctest_errors.errors - Failed example: + + >>> print(result.errors[0][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...sample_doctest_errors.py", line 7, in test.test_doctest.sample_doctest_errors + >...>> 1/0 + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + + >>> print(result.errors[1][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...sample_doctest_errors.py", line None, in test.test_doctest.sample_doctest_errors.__test__.bad + File "", line 1, in 1/0 - Exception raised: - Traceback (most recent call last): - File "", line 1, in - 1/0 - ~^~ - ZeroDivisionError: division by zero - ---------------------------------------------------------------------- - File "...sample_doctest_errors.py", line 23, in test.test_doctest.sample_doctest_errors.errors - Failed example: + ~^~ + ZeroDivisionError: division by zero + + >>> print(result.errors[2][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...sample_doctest_errors.py", line 18, in test.test_doctest.sample_doctest_errors.errors + >...>> 1/0 + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + + >>> print(result.errors[3][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...sample_doctest_errors.py", line 23, in test.test_doctest.sample_doctest_errors.errors + >...>> f() + File "", line 1, in f() - Exception raised: - Traceback (most recent call last): - File "", line 1, in - f() - ~^^ - File "", line 2, in f - 2 + '2' - ~~^~~~~ - TypeError: ... - ---------------------------------------------------------------------- - File "...sample_doctest_errors.py", line 25, in test.test_doctest.sample_doctest_errors.errors - Failed example: - g() - Exception raised: - Traceback (most recent call last): - File "", line 1, in - g() - ~^^ - File "...sample_doctest_errors.py", line 12, in g - [][0] # line 12 - ~~^^^ - IndexError: list index out of range + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... - >>> print(result.failures[3][1]) # doctest: +ELLIPSIS - AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.syntax_error - File "...sample_doctest_errors.py", line 29, in syntax_error + >>> print(result.errors[4][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...sample_doctest_errors.py", line 25, in test.test_doctest.sample_doctest_errors.errors + >...>> g() + File "", line 1, in + g() + ~^^ + File "...sample_doctest_errors.py", line 12, in g + [][0] # line 12 + ~~^^^ + IndexError: list index out of range - ---------------------------------------------------------------------- - File "...sample_doctest_errors.py", line 31, in test.test_doctest.sample_doctest_errors.syntax_error - Failed example: + >>> print(result.errors[5][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...sample_doctest_errors.py", line 31, in test.test_doctest.sample_doctest_errors.syntax_error + >...>> 2+*3 + File "", line 1 2+*3 - Exception raised: - File "", line 1 - 2+*3 - ^ - SyntaxError: invalid syntax + ^ + SyntaxError: invalid syntax """ @@ -2532,7 +2521,7 @@ def test_DocFileSuite(): ... 'test_doctest2.txt', ... 'test_doctest4.txt') >>> suite.run(unittest.TestResult()) - + The test files are looked for in the directory containing the calling module. A package keyword argument can be provided to @@ -2544,14 +2533,14 @@ def test_DocFileSuite(): ... 'test_doctest4.txt', ... package='test.test_doctest') >>> suite.run(unittest.TestResult()) - + '/' should be used as a path separator. It will be converted to a native separator at run time: >>> suite = doctest.DocFileSuite('../test_doctest/test_doctest.txt') >>> suite.run(unittest.TestResult()) - + If DocFileSuite is used from an interactive session, then files are resolved relative to the directory of sys.argv[0]: @@ -2577,7 +2566,7 @@ def test_DocFileSuite(): >>> suite = doctest.DocFileSuite(test_file, module_relative=False) >>> suite.run(unittest.TestResult()) - + It is an error to specify `package` when `module_relative=False`: @@ -2595,12 +2584,15 @@ def test_DocFileSuite(): ... 'test_doctest_skip2.txt') >>> result = suite.run(unittest.TestResult()) >>> result - + >>> len(result.skipped) - 1 + 4 >>> for tst, _ in result.skipped: # doctest: +ELLIPSIS ... print('=', tst) + = ...test_doctest_skip.txt [0] + = ...test_doctest_skip.txt [1] = ...test_doctest_skip.txt + = ...test_doctest_skip2.txt [0] You can specify initial global variables: @@ -2609,7 +2601,7 @@ def test_DocFileSuite(): ... 'test_doctest4.txt', ... globs={'favorite_color': 'blue'}) >>> suite.run(unittest.TestResult()) - + In this case, we supplied a missing favorite color. You can provide doctest options: @@ -2620,7 +2612,7 @@ def test_DocFileSuite(): ... optionflags=doctest.DONT_ACCEPT_BLANKLINE, ... globs={'favorite_color': 'blue'}) >>> suite.run(unittest.TestResult()) - + And, you can provide setUp and tearDown functions: @@ -2639,7 +2631,7 @@ def test_DocFileSuite(): ... 'test_doctest4.txt', ... setUp=setUp, tearDown=tearDown) >>> suite.run(unittest.TestResult()) - + But the tearDown restores sanity: @@ -2681,7 +2673,7 @@ def test_DocFileSuite(): ... 'test_doctest4.txt', ... encoding='utf-8') >>> suite.run(unittest.TestResult()) - + """ def test_DocFileSuite_errors(): @@ -2691,52 +2683,49 @@ def test_DocFileSuite_errors(): >>> suite = doctest.DocFileSuite('test_doctest_errors.txt') >>> result = suite.run(unittest.TestResult()) >>> result - + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - AssertionError: Failed doctest test for test_doctest_errors.txt - File "...test_doctest_errors.txt", line 0 - - ---------------------------------------------------------------------- - File "...test_doctest_errors.txt", line 4, in test_doctest_errors.txt - Failed example: + Traceback (most recent call last): + File "...test_doctest_errors.txt", line 4, in test_doctest_errors.txt + >...>> 2 + 2 + AssertionError: Failed example: 2 + 2 Expected: 5 Got: 4 - ---------------------------------------------------------------------- - File "...test_doctest_errors.txt", line 6, in test_doctest_errors.txt - Failed example: + + >>> print(result.errors[0][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...test_doctest_errors.txt", line 6, in test_doctest_errors.txt + >...>> 1/0 + File "", line 1, in 1/0 - Exception raised: - Traceback (most recent call last): - File "", line 1, in - 1/0 - ~^~ - ZeroDivisionError: division by zero - ---------------------------------------------------------------------- - File "...test_doctest_errors.txt", line 11, in test_doctest_errors.txt - Failed example: + ~^~ + ZeroDivisionError: division by zero + + >>> print(result.errors[1][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...test_doctest_errors.txt", line 11, in test_doctest_errors.txt + >...>> f() + File "", line 1, in f() - Exception raised: - Traceback (most recent call last): - File "", line 1, in - f() - ~^^ - File "", line 2, in f - 2 + '2' - ~~^~~~~ - TypeError: ... - ---------------------------------------------------------------------- - File "...test_doctest_errors.txt", line 13, in test_doctest_errors.txt - Failed example: + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... + + >>> print(result.errors[2][1]) # doctest: +ELLIPSIS + Traceback (most recent call last): + File "...test_doctest_errors.txt", line 13, in test_doctest_errors.txt + >...>> 2+*3 + File "", line 1 2+*3 - Exception raised: - File "", line 1 - 2+*3 - ^ - SyntaxError: invalid syntax + ^ + SyntaxError: invalid syntax + """ def test_trailing_space_in_test(): @@ -2807,16 +2796,25 @@ def test_unittest_reportflags(): >>> import unittest >>> result = suite.run(unittest.TestResult()) >>> result - + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - AssertionError: Failed doctest test for test_doctest.txt - ... - Failed example: - favorite_color - ... - Failed example: + Traceback (most recent call last): + File ... + >...>> if 1: + AssertionError: Failed example: if 1: - ... + print('a') + print() + print('b') + Expected: + a + + b + Got: + a + + b + Note that we see both failures displayed. @@ -2825,18 +2823,8 @@ def test_unittest_reportflags(): Now, when we run the test: - >>> result = suite.run(unittest.TestResult()) - >>> result - - >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - AssertionError: Failed doctest test for test_doctest.txt - ... - Failed example: - favorite_color - Exception raised: - ... - NameError: name 'favorite_color' is not defined - + >>> suite.run(unittest.TestResult()) + We get only the first failure. @@ -2846,22 +2834,20 @@ def test_unittest_reportflags(): >>> suite = doctest.DocFileSuite('test_doctest.txt', ... optionflags=doctest.DONT_ACCEPT_BLANKLINE | doctest.REPORT_NDIFF) - Then the default eporting options are ignored: + Then the default reporting options are ignored: >>> result = suite.run(unittest.TestResult()) >>> result - + *NOTE*: These doctest are intentionally not placed in raw string to depict the trailing whitespace using `\x20` in the diff below. >>> print(result.failures[0][1]) # doctest: +ELLIPSIS - AssertionError: Failed doctest test for test_doctest.txt - ... - Failed example: - favorite_color - ... - Failed example: + Traceback ... + File ... + >...>> if 1: + AssertionError: Failed example: if 1: print('a') print() @@ -3669,9 +3655,9 @@ def test_run_doctestsuite_multiple_times(): >>> import test.test_doctest.sample_doctest >>> suite = doctest.DocTestSuite(test.test_doctest.sample_doctest) >>> suite.run(unittest.TestResult()) - + >>> suite.run(unittest.TestResult()) - + """ diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 8f4fc09442e083..f3ac301686b9fc 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -2067,7 +2067,7 @@ def load_tests(loader, tests, pattern): self.check_executed_tests(output, [testname], failed=[testname], parallel=True, - stats=TestStats(1, 1, 0)) + stats=TestStats(1, 2, 1)) def _check_random_seed(self, run_workers: bool): # gh-109276: When -r/--randomize is used, random.seed() is called diff --git a/Misc/NEWS.d/next/Library/2025-05-29-17-39-13.gh-issue-108885.MegCRA.rst b/Misc/NEWS.d/next/Library/2025-05-29-17-39-13.gh-issue-108885.MegCRA.rst new file mode 100644 index 00000000000000..e37cf121f5f529 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-29-17-39-13.gh-issue-108885.MegCRA.rst @@ -0,0 +1,3 @@ +Run each example as a subtest in unit tests synthesized by +:func:`doctest.DocFileSuite` and :func:`doctest.DocTestSuite`. +Add the :meth:`doctest.DocTestRunner.report_skip` method. From 379d0bc95646dfe923e7ea05fb7f1befbd85572d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 31 May 2025 12:48:34 +0200 Subject: [PATCH 432/989] gh-134696: fix `hashlib` tests for FIPS-only BLAKE-2 buildbot (#134968) --- Lib/test/test_hashlib.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 51b82fe3b516b5..8244f7c7553a37 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -249,6 +249,7 @@ def test_usedforsecurity_false(self): self._hashlib.new("md5", usedforsecurity=False) self._hashlib.openssl_md5(usedforsecurity=False) + @unittest.skipIf(get_fips_mode(), "skip in FIPS mode") def test_clinic_signature(self): for constructor in self.hash_constructors: with self.subTest(constructor.__name__): @@ -256,6 +257,17 @@ def test_clinic_signature(self): constructor(data=b'') constructor(string=b'') # should be deprecated in the future + digest_name = constructor(b'').name + with self.subTest(digest_name): + hashlib.new(digest_name, b'') + hashlib.new(digest_name, data=b'') + hashlib.new(digest_name, string=b'') + if self._hashlib: + self._hashlib.new(digest_name, b'') + self._hashlib.new(digest_name, data=b'') + self._hashlib.new(digest_name, string=b'') + + @unittest.skipIf(get_fips_mode(), "skip in FIPS mode") def test_clinic_signature_errors(self): nomsg = b'' mymsg = b'msg' @@ -295,9 +307,16 @@ def test_clinic_signature_errors(self): [(), dict(data=nomsg, string=nomsg), conflicting_call], ]: for constructor in self.hash_constructors: + digest_name = constructor(b'').name with self.subTest(constructor.__name__, args=args, kwds=kwds): with self.assertRaisesRegex(TypeError, errmsg): constructor(*args, **kwds) + with self.subTest(digest_name, args=args, kwds=kwds): + with self.assertRaisesRegex(TypeError, errmsg): + hashlib.new(digest_name, *args, **kwds) + if self._hashlib: + with self.assertRaisesRegex(TypeError, errmsg): + self._hashlib.new(digest_name, *args, **kwds) def test_unknown_hash(self): self.assertRaises(ValueError, hashlib.new, 'spam spam spam spam spam') From c81446af1dbf3c84bfd4ed604c245dd40463fd3a Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Sat, 31 May 2025 13:35:51 +0200 Subject: [PATCH 433/989] gh-133968: Create the Unicode writer on demand in json (#133832) --- Modules/_json.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 4aa6ae650651b3..57678ad595f928 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -360,13 +360,6 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { return tpl; } -static inline int -_PyUnicodeWriter_IsEmpty(PyUnicodeWriter *writer_pub) -{ - _PyUnicodeWriter *writer = (_PyUnicodeWriter*)writer_pub; - return (writer->pos == 0); -} - static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) { @@ -385,10 +378,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next const void *buf; int kind; - PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); - if (writer == NULL) { - goto bail; - } + PyUnicodeWriter *writer = NULL; len = PyUnicode_GET_LENGTH(pystr); buf = PyUnicode_DATA(pystr); @@ -419,12 +409,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next if (c == '"') { // Fast path for simple case. - if (_PyUnicodeWriter_IsEmpty(writer)) { + if (writer == NULL) { PyObject *ret = PyUnicode_Substring(pystr, end, next); if (ret == NULL) { goto bail; } - PyUnicodeWriter_Discard(writer); *next_end_ptr = next + 1;; return ret; } @@ -432,6 +421,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next else if (c != '\\') { raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; + } else if (writer == NULL) { + writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + goto bail; + } } /* Pick up this chunk if it's not zero length */ From af0d3268d9ae6090877c276c12ee6712b56578e7 Mon Sep 17 00:00:00 2001 From: CF Bolz-Tereick Date: Sat, 31 May 2025 13:38:05 +0200 Subject: [PATCH 434/989] Skip test as cpython_only that checks whether setattr interns the attribute or not (#134972) Skip test that checks whether setattr interns the attribute or not The details of when a string is being interned or not is implementation dependent. --- Lib/test/test_class.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_class.py b/Lib/test/test_class.py index 4c12d43556fc2a..8c7a62a74ba90e 100644 --- a/Lib/test/test_class.py +++ b/Lib/test/test_class.py @@ -652,6 +652,7 @@ class B(A): a = A(hash(A.f)^(-1)) hash(a.f) + @cpython_only def testSetattrWrapperNameIntern(self): # Issue #25794: __setattr__ should intern the attribute name class A: From 895119ec24589cbf522e375aa71f27b9b7383a8b Mon Sep 17 00:00:00 2001 From: CF Bolz-Tereick Date: Sat, 31 May 2025 13:46:22 +0200 Subject: [PATCH 435/989] skip test for sys._stdlib_dir if that is not present (#134973) --- Lib/test/test_sys.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 65d15610ed1505..83745f3d0ba46e 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1299,6 +1299,7 @@ def test_module_names(self): for name in sys.stdlib_module_names: self.assertIsInstance(name, str) + @unittest.skipUnless(hasattr(sys, '_stdlib_dir'), 'need sys._stdlib_dir') def test_stdlib_dir(self): os = import_helper.import_fresh_module('os') marker = getattr(os, '__file__', None) From 5507eff19c757a908a2ff29dfe423e35595fda00 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sat, 31 May 2025 14:56:33 +0300 Subject: [PATCH 436/989] Improve format of `InternalDocs/exception_handling.md` (#134969) --- InternalDocs/exception_handling.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md index 28589787e1fad7..9e38da4c862f16 100644 --- a/InternalDocs/exception_handling.md +++ b/InternalDocs/exception_handling.md @@ -8,7 +8,7 @@ The cost of raising an exception is increased, but not by much. The following code: -``` +```python try: g(0) except: @@ -18,7 +18,7 @@ except: compiles into intermediate code like the following: -``` +```python RESUME 0 1 SETUP_FINALLY 8 (to L1) @@ -118,13 +118,13 @@ All offsets and lengths are in code units, not bytes. We want the format to be compact, but quickly searchable. For it to be compact, it needs to have variable sized entries so that we can store common (small) offsets compactly, but handle large offsets if needed. -For it to be searchable quickly, we need to support binary search giving us log(n) performance in all cases. +For it to be searchable quickly, we need to support binary search giving us `log(n)` performance in all cases. Binary search typically assumes fixed size entries, but that is not necessary, as long as we can identify the start of an entry. It is worth noting that the size (end-start) is always smaller than the end, so we encode the entries as: `start, size, target, depth, push-lasti`. -Also, sizes are limited to 2**30 as the code length cannot exceed 2**31 and each code unit takes 2 bytes. +Also, sizes are limited to `2**30` as the code length cannot exceed `2**31` and each code unit takes 2 bytes. It also happens that depth is generally quite small. So, we need to encode: @@ -140,7 +140,7 @@ lasti (1 bit) We need a marker for the start of the entry, so the first byte of entry will have the most significant bit set. Since the most significant bit is reserved for marking the start of an entry, we have 7 bits per byte to encode offsets. Encoding uses a standard varint encoding, but with only 7 bits instead of the usual 8. -The 8 bits of a byte are (msb left) SXdddddd where S is the start bit. X is the extend bit meaning that the next byte is required to extend the offset. +The 8 bits of a byte are (msb left) `SXdddddd` where `S` is the start bit. `X` is the extend bit meaning that the next byte is required to extend the offset. In addition, we combine `depth` and `lasti` into a single value, `((depth<<1)+lasti)`, before encoding. From f58873e4b2b7aad8e3a08a6188c6eb08d0a3001b Mon Sep 17 00:00:00 2001 From: Itamar Oren Date: Sat, 31 May 2025 07:29:03 -0700 Subject: [PATCH 437/989] gh-134954: Hard-cap max file descriptors in subprocess test fd_status (#134955) * Hard-cap max file descriptors in subprocess test fd_status On some systems, `SC_OPEN_MAX` may return a very large value (i.e. 10**30), leading to the subprocess test timing out (or run forever). Prevent this situation by applying a hard cap on how many file descriptors are checked. * Fix typo in usage docstring s/fd_stats/fd_status/ --- Lib/test/subprocessdata/fd_status.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/subprocessdata/fd_status.py b/Lib/test/subprocessdata/fd_status.py index d12bd95abee61c..90e785981aeab0 100644 --- a/Lib/test/subprocessdata/fd_status.py +++ b/Lib/test/subprocessdata/fd_status.py @@ -2,7 +2,7 @@ file descriptors on stdout. Usage: -fd_stats.py: check all file descriptors +fd_status.py: check all file descriptors (up to 255) fd_status.py fd1 fd2 ...: check only specified file descriptors """ @@ -18,7 +18,7 @@ _MAXFD = os.sysconf("SC_OPEN_MAX") except: _MAXFD = 256 - test_fds = range(0, _MAXFD) + test_fds = range(0, min(_MAXFD, 256)) else: test_fds = map(int, sys.argv[1:]) for fd in test_fds: From 3704171415c1ea6ebbeb2f992758b6565f42e378 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sat, 31 May 2025 18:48:51 +0100 Subject: [PATCH 438/989] gh-134835: Remove outdated list from `howto/urllib2.rst` (GH-134844) :teapot: --- Doc/howto/urllib2.rst | 86 ++++++++++--------------------------------- 1 file changed, 19 insertions(+), 67 deletions(-) diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst index 33a2a7ea89ea07..d79d1abe8d0577 100644 --- a/Doc/howto/urllib2.rst +++ b/Doc/howto/urllib2.rst @@ -245,75 +245,27 @@ codes in the 100--299 range indicate success, you will usually only see error codes in the 400--599 range. :attr:`http.server.BaseHTTPRequestHandler.responses` is a useful dictionary of -response codes in that shows all the response codes used by :rfc:`2616`. The -dictionary is reproduced here for convenience :: +response codes that shows all the response codes used by :rfc:`2616`. +An excerpt from the dictionary is shown below :: - # Table mapping response codes to messages; entries have the - # form {code: (shortmessage, longmessage)}. responses = { - 100: ('Continue', 'Request received, please continue'), - 101: ('Switching Protocols', - 'Switching to new protocol; obey Upgrade header'), - - 200: ('OK', 'Request fulfilled, document follows'), - 201: ('Created', 'Document created, URL follows'), - 202: ('Accepted', - 'Request accepted, processing continues off-line'), - 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), - 204: ('No Content', 'Request fulfilled, nothing follows'), - 205: ('Reset Content', 'Clear input form for further input.'), - 206: ('Partial Content', 'Partial content follows.'), - - 300: ('Multiple Choices', - 'Object has several resources -- see URI list'), - 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), - 302: ('Found', 'Object moved temporarily -- see URI list'), - 303: ('See Other', 'Object moved -- see Method and URL list'), - 304: ('Not Modified', - 'Document has not changed since given time'), - 305: ('Use Proxy', - 'You must use proxy specified in Location to access this ' - 'resource.'), - 307: ('Temporary Redirect', - 'Object moved temporarily -- see URI list'), - - 400: ('Bad Request', - 'Bad request syntax or unsupported method'), - 401: ('Unauthorized', - 'No permission -- see authorization schemes'), - 402: ('Payment Required', - 'No payment -- see charging schemes'), - 403: ('Forbidden', - 'Request forbidden -- authorization will not help'), - 404: ('Not Found', 'Nothing matches the given URI'), - 405: ('Method Not Allowed', - 'Specified method is invalid for this server.'), - 406: ('Not Acceptable', 'URI not available in preferred format.'), - 407: ('Proxy Authentication Required', 'You must authenticate with ' - 'this proxy before proceeding.'), - 408: ('Request Timeout', 'Request timed out; try again later.'), - 409: ('Conflict', 'Request conflict.'), - 410: ('Gone', - 'URI no longer exists and has been permanently removed.'), - 411: ('Length Required', 'Client must specify Content-Length.'), - 412: ('Precondition Failed', 'Precondition in headers is false.'), - 413: ('Request Entity Too Large', 'Entity is too large.'), - 414: ('Request-URI Too Long', 'URI is too long.'), - 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), - 416: ('Requested Range Not Satisfiable', - 'Cannot satisfy request range.'), - 417: ('Expectation Failed', - 'Expect condition could not be satisfied.'), - - 500: ('Internal Server Error', 'Server got itself in trouble'), - 501: ('Not Implemented', - 'Server does not support this operation'), - 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), - 503: ('Service Unavailable', - 'The server cannot process the request due to a high load'), - 504: ('Gateway Timeout', - 'The gateway server did not receive a timely response'), - 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), + ... + : ('OK', 'Request fulfilled, document follows'), + ... + : ('Forbidden', + 'Request forbidden -- authorization will ' + 'not help'), + : ('Not Found', + 'Nothing matches the given URI'), + ... + : ("I'm a Teapot", + 'Server refuses to brew coffee because ' + 'it is a teapot'), + ... + : ('Service Unavailable', + 'The server cannot process the ' + 'request due to a high load'), + ... } When an error is raised the server responds by returning an HTTP error code From cebae977a63f32c3c03d14c040df3cea55b8f585 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sun, 1 Jun 2025 00:33:02 +0300 Subject: [PATCH 439/989] gh-133891: Add missing error check to `SET_COUNT` macro in `_testinternalcapi.c` (#133892) --- Modules/_testinternalcapi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 136e6a7a015049..845c218e679ad2 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1045,6 +1045,9 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) #define SET_COUNT(DICT, STRUCT, NAME) \ do { \ PyObject *count = PyLong_FromLong(STRUCT.NAME); \ + if (count == NULL) { \ + goto error; \ + } \ int res = PyDict_SetItemString(DICT, #NAME, count); \ Py_DECREF(count); \ if (res < 0) { \ From 965c48056633d3f4b41520c8cd07f0275f00fb4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sun, 1 Jun 2025 07:56:56 +0200 Subject: [PATCH 440/989] gh-134970: Fix exception message in argparse module (GH-134971) Fix the "unknown action" exception in argparse.ArgumentParser.add_argument_group() to correctly replace the action class. --- Lib/argparse.py | 2 +- .../Library/2025-05-31-12-08-12.gh-issue-134970.lgSaxq.rst | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-31-12-08-12.gh-issue-134970.lgSaxq.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index d1a6350c3fda6d..83258cf3e0f37d 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1534,7 +1534,7 @@ def add_argument(self, *args, **kwargs): action_name = kwargs.get('action') action_class = self._pop_action_class(kwargs) if not callable(action_class): - raise ValueError('unknown action {action_class!r}') + raise ValueError(f'unknown action {action_class!r}') action = action_class(**kwargs) # raise an error if action for positional argument does not diff --git a/Misc/NEWS.d/next/Library/2025-05-31-12-08-12.gh-issue-134970.lgSaxq.rst b/Misc/NEWS.d/next/Library/2025-05-31-12-08-12.gh-issue-134970.lgSaxq.rst new file mode 100644 index 00000000000000..20f53569ef4566 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-31-12-08-12.gh-issue-134970.lgSaxq.rst @@ -0,0 +1,3 @@ +Fix the "unknown action" exception in +:meth:`argparse.ArgumentParser.add_argument_group` to correctly replace the +action class. From fe6f8a3619242b287a793a5b5d8645f402482c71 Mon Sep 17 00:00:00 2001 From: Rihaan Meher Date: Sun, 1 Jun 2025 04:18:31 -0400 Subject: [PATCH 441/989] gh-133503: clarify `compileall -s/-p` docs (#134756) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/compileall.rst | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Doc/library/compileall.rst b/Doc/library/compileall.rst index c42288419c4d2d..ebbbf857e717a4 100644 --- a/Doc/library/compileall.rst +++ b/Doc/library/compileall.rst @@ -56,11 +56,18 @@ compile Python sources. executed. .. option:: -s strip_prefix + + Remove the given prefix from paths recorded in the ``.pyc`` files. + Paths are made relative to the prefix. + + This option can be used with ``-p`` but not with ``-d``. + .. option:: -p prepend_prefix - Remove (``-s``) or append (``-p``) the given prefix of paths - recorded in the ``.pyc`` files. - Cannot be combined with ``-d``. + Prepend the given prefix to paths recorded in the ``.pyc`` files. + Use ``-p /`` to make the paths absolute. + + This option can be used with ``-s`` but not with ``-d``. .. option:: -x regex From b5952371668089299bc8472c1adb9f8a0e69b4a2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 1 Jun 2025 11:22:15 +0300 Subject: [PATCH 442/989] gh-132983: Minor fixes and clean up for the _zstd module (GH-134930) --- Lib/test/test_zstd.py | 92 ++++++++++++++++++++++++++++++------ Modules/_zstd/_zstdmodule.c | 87 +++++++++++++++++++++++----------- Modules/_zstd/_zstdmodule.h | 10 +++- Modules/_zstd/compressor.c | 69 +++++---------------------- Modules/_zstd/decompressor.c | 67 ++++---------------------- Modules/_zstd/zstddict.c | 1 - 6 files changed, 166 insertions(+), 160 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 014634e450e449..e475d9346b9594 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -1138,27 +1138,41 @@ def test_invalid_dict(self): ZstdDecompressor(zd) # wrong type - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): - ZstdCompressor(zstd_dict=(zd, b'123')) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdCompressor(zstd_dict=[zd, 1]) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdCompressor(zstd_dict=(zd, 1.0)) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdCompressor(zstd_dict=(zd,)) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, 1, 2)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, -1)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, 3)) - - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): - ZstdDecompressor(zstd_dict=(zd, b'123')) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaises(OverflowError): + ZstdCompressor(zstd_dict=(zd, 2**1000)) + with self.assertRaises(OverflowError): + ZstdCompressor(zstd_dict=(zd, -2**1000)) + + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdDecompressor(zstd_dict=[zd, 1]) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdDecompressor(zstd_dict=(zd, 1.0)) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdDecompressor((zd,)) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor((zd, 1, 2)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor((zd, -1)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor((zd, 3)) + with self.assertRaises(OverflowError): + ZstdDecompressor((zd, 2**1000)) + with self.assertRaises(OverflowError): + ZstdDecompressor((zd, -2**1000)) def test_train_dict(self): - - TRAINED_DICT = train_dict(SAMPLES, DICT_SIZE1) ZstdDict(TRAINED_DICT.dict_content, is_raw=False) @@ -1239,18 +1253,37 @@ def test_train_dict_c(self): # argument wrong type with self.assertRaises(TypeError): _zstd.train_dict({}, (), 100) + with self.assertRaises(TypeError): + _zstd.train_dict(bytearray(), (), 100) with self.assertRaises(TypeError): _zstd.train_dict(b'', 99, 100) + with self.assertRaises(TypeError): + _zstd.train_dict(b'', [], 100) with self.assertRaises(TypeError): _zstd.train_dict(b'', (), 100.1) + with self.assertRaises(TypeError): + _zstd.train_dict(b'', (99.1,), 100) + with self.assertRaises(ValueError): + _zstd.train_dict(b'abc', (4, -1), 100) + with self.assertRaises(ValueError): + _zstd.train_dict(b'abc', (2,), 100) + with self.assertRaises(ValueError): + _zstd.train_dict(b'', (99,), 100) # size > size_t with self.assertRaises(ValueError): - _zstd.train_dict(b'', (2**64+1,), 100) + _zstd.train_dict(b'', (2**1000,), 100) + with self.assertRaises(ValueError): + _zstd.train_dict(b'', (-2**1000,), 100) # dict_size <= 0 with self.assertRaises(ValueError): _zstd.train_dict(b'', (), 0) + with self.assertRaises(ValueError): + _zstd.train_dict(b'', (), -1) + + with self.assertRaises(ZstdError): + _zstd.train_dict(b'', (), 1) def test_finalize_dict_c(self): with self.assertRaises(TypeError): @@ -1259,22 +1292,51 @@ def test_finalize_dict_c(self): # argument wrong type with self.assertRaises(TypeError): _zstd.finalize_dict({}, b'', (), 100, 5) + with self.assertRaises(TypeError): + _zstd.finalize_dict(bytearray(TRAINED_DICT.dict_content), b'', (), 100, 5) with self.assertRaises(TypeError): _zstd.finalize_dict(TRAINED_DICT.dict_content, {}, (), 100, 5) + with self.assertRaises(TypeError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, bytearray(), (), 100, 5) with self.assertRaises(TypeError): _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', 99, 100, 5) + with self.assertRaises(TypeError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', [], 100, 5) with self.assertRaises(TypeError): _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100.1, 5) with self.assertRaises(TypeError): _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100, 5.1) + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'abc', (4, -1), 100, 5) + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'abc', (2,), 100, 5) + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (99,), 100, 5) + # size > size_t with self.assertRaises(ValueError): - _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (2**64+1,), 100, 5) + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (2**1000,), 100, 5) + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (-2**1000,), 100, 5) # dict_size <= 0 with self.assertRaises(ValueError): _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 0, 5) + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), -1, 5) + with self.assertRaises(OverflowError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 2**1000, 5) + with self.assertRaises(OverflowError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), -2**1000, 5) + + with self.assertRaises(OverflowError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100, 2**1000) + with self.assertRaises(OverflowError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100, -2**1000) + + with self.assertRaises(ZstdError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100, 5) def test_train_buffer_protocol_samples(self): def _nbytes(dat): diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 986b3579479f0f..b0e50f873f4ca6 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -7,7 +7,6 @@ #include "Python.h" #include "_zstdmodule.h" -#include "zstddict.h" #include // ZSTD_*() #include // ZDICT_*() @@ -20,14 +19,52 @@ module _zstd #include "clinic/_zstdmodule.c.h" +ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, PyObject *dict, int *ptype) +{ + if (state == NULL) { + return NULL; + } + + /* Check ZstdDict */ + if (PyObject_TypeCheck(dict, state->ZstdDict_type)) { + return (ZstdDict*)dict; + } + + /* Check (ZstdDict, type) */ + if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2 + && PyObject_TypeCheck(PyTuple_GET_ITEM(dict, 0), state->ZstdDict_type) + && PyLong_Check(PyTuple_GET_ITEM(dict, 1))) + { + int type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); + if (type == -1 && PyErr_Occurred()) { + return NULL; + } + if (type == DICT_TYPE_DIGESTED + || type == DICT_TYPE_UNDIGESTED + || type == DICT_TYPE_PREFIX) + { + *ptype = type; + return (ZstdDict*)PyTuple_GET_ITEM(dict, 0); + } + } + + /* Wrong type */ + PyErr_SetString(PyExc_TypeError, + "zstd_dict argument should be a ZstdDict object."); + return NULL; +} + /* Format error message and set ZstdError. */ void -set_zstd_error(const _zstd_state* const state, - error_type type, size_t zstd_ret) +set_zstd_error(const _zstd_state *state, error_type type, size_t zstd_ret) { - char *msg; + const char *msg; assert(ZSTD_isError(zstd_ret)); + if (state == NULL) { + return; + } switch (type) { case ERR_DECOMPRESS: msg = "Unable to decompress Zstandard data: %s"; @@ -174,7 +211,7 @@ calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, Py_ssize_t sizes_sum; Py_ssize_t i; - chunks_number = Py_SIZE(samples_sizes); + chunks_number = PyTuple_GET_SIZE(samples_sizes); if ((size_t) chunks_number > UINT32_MAX) { PyErr_Format(PyExc_ValueError, "The number of samples should be <= %u.", UINT32_MAX); @@ -188,20 +225,24 @@ calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, return -1; } - sizes_sum = 0; + sizes_sum = PyBytes_GET_SIZE(samples_bytes); for (i = 0; i < chunks_number; i++) { - PyObject *size = PyTuple_GetItem(samples_sizes, i); - (*chunk_sizes)[i] = PyLong_AsSize_t(size); - if ((*chunk_sizes)[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); + size_t size = PyLong_AsSize_t(PyTuple_GET_ITEM(samples_sizes, i)); + (*chunk_sizes)[i] = size; + if (size == (size_t)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto sum_error; + } return -1; } - sizes_sum += (*chunk_sizes)[i]; + if ((size_t)sizes_sum < size) { + goto sum_error; + } + sizes_sum -= size; } - if (sizes_sum != Py_SIZE(samples_bytes)) { + if (sizes_sum != 0) { +sum_error: PyErr_SetString(PyExc_ValueError, "The samples size tuple doesn't match the " "concatenation's size."); @@ -257,7 +298,7 @@ _zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, /* Train the dictionary */ char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes); - char *samples_buffer = PyBytes_AS_STRING(samples_bytes); + const char *samples_buffer = PyBytes_AS_STRING(samples_bytes); Py_BEGIN_ALLOW_THREADS zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size, samples_buffer, @@ -507,17 +548,10 @@ _zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, { _zstd_state* mod_state = get_zstd_state(module); - if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) { - PyErr_SetString(PyExc_ValueError, - "The two arguments should be CompressionParameter and " - "DecompressionParameter types."); - return NULL; - } - - Py_XSETREF( - mod_state->CParameter_type, (PyTypeObject*)Py_NewRef(c_parameter_type)); - Py_XSETREF( - mod_state->DParameter_type, (PyTypeObject*)Py_NewRef(d_parameter_type)); + Py_INCREF(c_parameter_type); + Py_XSETREF(mod_state->CParameter_type, (PyTypeObject*)c_parameter_type); + Py_INCREF(d_parameter_type); + Py_XSETREF(mod_state->DParameter_type, (PyTypeObject*)d_parameter_type); Py_RETURN_NONE; } @@ -580,7 +614,6 @@ do { \ return -1; } if (PyModule_AddType(m, (PyTypeObject *)mod_state->ZstdError) < 0) { - Py_DECREF(mod_state->ZstdError); return -1; } diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h index 1f4160f474f0b0..c73f15b3c5299b 100644 --- a/Modules/_zstd/_zstdmodule.h +++ b/Modules/_zstd/_zstdmodule.h @@ -5,6 +5,8 @@ #ifndef ZSTD_MODULE_H #define ZSTD_MODULE_H +#include "zstddict.h" + /* Type specs */ extern PyType_Spec zstd_dict_type_spec; extern PyType_Spec zstd_compressor_type_spec; @@ -43,10 +45,14 @@ typedef enum { DICT_TYPE_PREFIX = 2 } dictionary_type; +extern ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, + PyObject *dict, int *type); + /* Format error message and set ZstdError. */ extern void -set_zstd_error(const _zstd_state* const state, - const error_type type, size_t zstd_ret); +set_zstd_error(const _zstd_state *state, + error_type type, size_t zstd_ret); extern void set_parameter_error(int is_compress, int key_v, int value_v); diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 8ff2a3aadc1cd6..e1217635f60cb0 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -16,7 +16,6 @@ class _zstd.ZstdCompressor "ZstdCompressor *" "&zstd_compressor_type_spec" #include "_zstdmodule.h" #include "buffer.h" -#include "zstddict.h" #include "internal/pycore_lock.h" // PyMutex_IsLocked #include // offsetof() @@ -71,9 +70,6 @@ _zstd_set_c_level(ZstdCompressor *self, int level) /* Check error */ if (ZSTD_isError(zstd_ret)) { _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; - } set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); return -1; } @@ -265,56 +261,17 @@ static int _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; - } - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { + /* When compressing, use undigested dictionary by default. */ + int type = DICT_TYPE_UNDIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { return -1; } - else if (ret > 0) { - /* When compressing, use undigested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_UNDIGESTED; - PyMutex_Lock(&zd->lock); - ret = _zstd_load_impl(self, zd, mod_state, type); - PyMutex_Unlock(&zd->lock); - return ret; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED - || type == DICT_TYPE_UNDIGESTED - || type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - PyMutex_Lock(&zd->lock); - ret = _zstd_load_impl(self, zd, mod_state, type); - PyMutex_Unlock(&zd->lock); - return ret; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; } /*[clinic input] @@ -481,9 +438,7 @@ compress_lock_held(ZstdCompressor *self, Py_buffer *data, /* Check error */ if (ZSTD_isError(zstd_ret)) { _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); - } + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); goto error; } @@ -553,9 +508,7 @@ compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data) /* Check error */ if (ZSTD_isError(zstd_ret)) { _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); - } + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); goto error; } diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 26e568cf433308..c53d6e4cb05cf0 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -16,7 +16,6 @@ class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec" #include "_zstdmodule.h" #include "buffer.h" -#include "zstddict.h" #include "internal/pycore_lock.h" // PyMutex_IsLocked #include // bool @@ -61,11 +60,6 @@ _get_DDict(ZstdDict *self) assert(PyMutex_IsLocked(&self->lock)); ZSTD_DDict *ret; - /* Already created */ - if (self->d_dict != NULL) { - return self->d_dict; - } - if (self->d_dict == NULL) { /* Create ZSTD_DDict instance from dictionary content */ Py_BEGIN_ALLOW_THREADS @@ -182,56 +176,17 @@ static int _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) { _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; - } - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { + /* When decompressing, use digested dictionary by default. */ + int type = DICT_TYPE_DIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { return -1; } - else if (ret > 0) { - /* When decompressing, use digested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_DIGESTED; - PyMutex_Lock(&zd->lock); - ret = _zstd_load_impl(self, zd, mod_state, type); - PyMutex_Unlock(&zd->lock); - return ret; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED - || type == DICT_TYPE_UNDIGESTED - || type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - PyMutex_Lock(&zd->lock); - ret = _zstd_load_impl(self, zd, mod_state, type); - PyMutex_Unlock(&zd->lock); - return ret; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; } /* @@ -282,9 +237,7 @@ decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in, /* Check error */ if (ZSTD_isError(zstd_ret)) { _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); - } + set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); goto error; } diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index afc58b42e893d3..14f74aaed46ec5 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -15,7 +15,6 @@ class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" #include "Python.h" #include "_zstdmodule.h" -#include "zstddict.h" #include "clinic/zstddict.c.h" #include "internal/pycore_lock.h" // PyMutex_IsLocked From f806463e16428ea4b379bf547bafa11f43a480ef Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri <47737621+Andrea-Oliveri@users.noreply.github.com> Date: Sun, 1 Jun 2025 14:30:04 +0200 Subject: [PATCH 443/989] gh-134004: Added the reorganize() methods to dbm.sqlite, dbm.dumb and shelve (GH-134028) They are similar to the same named method in dbm.gnu. --- Doc/library/dbm.rst | 40 +++++++++++- Doc/library/shelve.rst | 16 ++++- Doc/whatsnew/3.15.rst | 17 ++++++ Lib/dbm/dumb.py | 32 +++++++++- Lib/dbm/sqlite3.py | 4 ++ Lib/shelve.py | 5 ++ Lib/test/test_dbm.py | 61 +++++++++++++++++++ Misc/ACKS | 1 + ...-05-15-00-27-09.gh-issue-134004.e8k4-R.rst | 2 + 9 files changed, 172 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 36221c026d6d4b..6f548fbb1b39d8 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -15,10 +15,16 @@ * :mod:`dbm.ndbm` If none of these modules are installed, the -slow-but-simple implementation in module :mod:`dbm.dumb` will be used. There +slow-but-simple implementation in module :mod:`dbm.dumb` will be used. There is a `third party interface `_ to the Oracle Berkeley DB. +.. note:: + None of the underlying modules will automatically shrink the disk space used by + the database file. However, :mod:`dbm.sqlite3`, :mod:`dbm.gnu` and :mod:`dbm.dumb` + provide a :meth:`!reorganize` method that can be used for this purpose. + + .. exception:: error A tuple containing the exceptions that can be raised by each of the supported @@ -186,6 +192,17 @@ or any other SQLite browser, including the SQLite CLI. The Unix file access mode of the file (default: octal ``0o666``), used only when the database has to be created. + .. method:: sqlite3.reorganize() + + If you have carried out a lot of deletions and would like to shrink the space + used on disk, this method will reorganize the database; otherwise, deleted file + space will be kept and reused as new (key, value) pairs are added. + + .. note:: + While reorganizing, as much as two times the size of the original database is required + in free disk space. However, be aware that this factor changes for each :mod:`dbm` submodule. + + .. versionadded:: next :mod:`dbm.gnu` --- GNU database manager --------------------------------------- @@ -284,6 +301,10 @@ functionality like crash tolerance. reorganization; otherwise, deleted file space will be kept and reused as new (key, value) pairs are added. + .. note:: + While reorganizing, as much as one time the size of the original database is required + in free disk space. However, be aware that this factor changes for each :mod:`dbm` submodule. + .. method:: gdbm.sync() When the database has been opened in fast mode, this method forces any @@ -438,6 +459,11 @@ The :mod:`!dbm.dumb` module defines the following: with a sufficiently large/complex entry due to stack depth limitations in Python's AST compiler. + .. warning:: + :mod:`dbm.dumb` does not support concurrent read/write access. (Multiple + simultaneous read accesses are safe.) When a program has the database open + for writing, no other program should have it open for reading or writing. + .. versionchanged:: 3.5 :func:`~dbm.dumb.open` always creates a new database when *flag* is ``'n'``. @@ -460,3 +486,15 @@ The :mod:`!dbm.dumb` module defines the following: .. method:: dumbdbm.close() Close the database. + + .. method:: dumbdbm.reorganize() + + If you have carried out a lot of deletions and would like to shrink the space + used on disk, this method will reorganize the database; otherwise, deleted file + space will not be reused. + + .. note:: + While reorganizing, no additional free disk space is required. However, be aware + that this factor changes for each :mod:`dbm` submodule. + + .. versionadded:: next diff --git a/Doc/library/shelve.rst b/Doc/library/shelve.rst index 6e74a59b82b8ec..23a2e0c3d0c758 100644 --- a/Doc/library/shelve.rst +++ b/Doc/library/shelve.rst @@ -75,8 +75,15 @@ Two additional methods are supported: Write back all entries in the cache if the shelf was opened with *writeback* set to :const:`True`. Also empty the cache and synchronize the persistent - dictionary on disk, if feasible. This is called automatically when the shelf - is closed with :meth:`close`. + dictionary on disk, if feasible. This is called automatically when + :meth:`reorganize` is called or the shelf is closed with :meth:`close`. + +.. method:: Shelf.reorganize() + + Calls :meth:`sync` and attempts to shrink space used on disk by removing empty + space resulting from deletions. + + .. versionadded:: next .. method:: Shelf.close() @@ -116,6 +123,11 @@ Restrictions * On macOS :mod:`dbm.ndbm` can silently corrupt the database file on updates, which can cause hard crashes when trying to read from the database. +* :meth:`Shelf.reorganize` may not be available for all database packages and + may temporarely increase resource usage (especially disk space) when called. + Additionally, it will never run automatically and instead needs to be called + explicitly. + .. class:: Shelf(dict, protocol=None, writeback=False, keyencoding='utf-8') diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 6d1f653f086a15..244ce327763f57 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -89,6 +89,14 @@ New modules Improved modules ================ +dbm +--- + +* Added new :meth:`!reorganize` methods to :mod:`dbm.dumb` and :mod:`dbm.sqlite3` + which allow to recover unused free space previously occupied by deleted entries. + (Contributed by Andrea Oliveri in :gh:`134004`.) + + difflib ------- @@ -96,6 +104,15 @@ difflib class, and migrated the output to the HTML5 standard. (Contributed by Jiahao Li in :gh:`134580`.) + +shelve +------ + +* Added new :meth:`!reorganize` method to :mod:`shelve` used to recover unused free + space previously occupied by deleted entries. + (Contributed by Andrea Oliveri in :gh:`134004`.) + + ssl --- diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py index def120ffc3778b..1bc239a84fff83 100644 --- a/Lib/dbm/dumb.py +++ b/Lib/dbm/dumb.py @@ -9,7 +9,7 @@ - seems to contain a bug when updating... - reclaim free space (currently, space once occupied by deleted or expanded -items is never reused) +items is not reused exept if .reorganize() is called) - support concurrent access (currently, if two processes take turns making updates, they can mess up the index) @@ -17,8 +17,6 @@ - support efficient access to large databases (currently, the whole index is read when the database is opened, and some updates rewrite the whole index) -- support opening for read-only (flag = 'm') - """ import ast as _ast @@ -289,6 +287,34 @@ def __enter__(self): def __exit__(self, *args): self.close() + def reorganize(self): + if self._readonly: + raise error('The database is opened for reading only') + self._verify_open() + # Ensure all changes are committed before reorganizing. + self._commit() + # Open file in r+ to allow changing in-place. + with _io.open(self._datfile, 'rb+') as f: + reorganize_pos = 0 + + # Iterate over existing keys, sorted by starting byte. + for key in sorted(self._index, key = lambda k: self._index[k][0]): + pos, siz = self._index[key] + f.seek(pos) + val = f.read(siz) + + f.seek(reorganize_pos) + f.write(val) + self._index[key] = (reorganize_pos, siz) + + blocks_occupied = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE + reorganize_pos += blocks_occupied * _BLOCKSIZE + + f.truncate(reorganize_pos) + # Commit changes to index, which were not in-place. + self._commit() + + def open(file, flag='c', mode=0o666): """Open the database file, filename, and return corresponding object. diff --git a/Lib/dbm/sqlite3.py b/Lib/dbm/sqlite3.py index 7e0ae2a29e3a64..b296a1bcd1bbfa 100644 --- a/Lib/dbm/sqlite3.py +++ b/Lib/dbm/sqlite3.py @@ -15,6 +15,7 @@ STORE_KV = "REPLACE INTO Dict (key, value) VALUES (CAST(? AS BLOB), CAST(? AS BLOB))" DELETE_KEY = "DELETE FROM Dict WHERE key = CAST(? AS BLOB)" ITER_KEYS = "SELECT key FROM Dict" +REORGANIZE = "VACUUM" class error(OSError): @@ -122,6 +123,9 @@ def __enter__(self): def __exit__(self, *args): self.close() + def reorganize(self): + self._execute(REORGANIZE) + def open(filename, /, flag="r", mode=0o666): """Open a dbm.sqlite3 database and return the dbm object. diff --git a/Lib/shelve.py b/Lib/shelve.py index 50584716e9ea64..b53dc8b7a8ece9 100644 --- a/Lib/shelve.py +++ b/Lib/shelve.py @@ -171,6 +171,11 @@ def sync(self): if hasattr(self.dict, 'sync'): self.dict.sync() + def reorganize(self): + self.sync() + if hasattr(self.dict, 'reorganize'): + self.dict.reorganize() + class BsdDbShelf(Shelf): """Shelf implementation using the "BSD" db interface. diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index a10922a403ec38..7e8d78b89405ab 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -135,6 +135,67 @@ def test_anydbm_access(self): assert(f[key] == b"Python:") f.close() + def test_anydbm_readonly_reorganize(self): + self.init_db() + with dbm.open(_fname, 'r') as d: + # Early stopping. + if not hasattr(d, 'reorganize'): + self.skipTest("method reorganize not available this dbm submodule") + + self.assertRaises(dbm.error, lambda: d.reorganize()) + + def test_anydbm_reorganize_not_changed_content(self): + self.init_db() + with dbm.open(_fname, 'c') as d: + # Early stopping. + if not hasattr(d, 'reorganize'): + self.skipTest("method reorganize not available this dbm submodule") + + keys_before = sorted(d.keys()) + values_before = [d[k] for k in keys_before] + d.reorganize() + keys_after = sorted(d.keys()) + values_after = [d[k] for k in keys_before] + self.assertEqual(keys_before, keys_after) + self.assertEqual(values_before, values_after) + + def test_anydbm_reorganize_decreased_size(self): + + def _calculate_db_size(db_path): + if os.path.isfile(db_path): + return os.path.getsize(db_path) + total_size = 0 + for root, _, filenames in os.walk(db_path): + for filename in filenames: + file_path = os.path.join(root, filename) + total_size += os.path.getsize(file_path) + return total_size + + # This test requires relatively large databases to reliably show difference in size before and after reorganizing. + with dbm.open(_fname, 'n') as f: + # Early stopping. + if not hasattr(f, 'reorganize'): + self.skipTest("method reorganize not available this dbm submodule") + + for k in self._dict: + f[k.encode('ascii')] = self._dict[k] * 100000 + db_keys = list(f.keys()) + + # Make sure to calculate size of database only after file is closed to ensure file content are flushed to disk. + size_before = _calculate_db_size(os.path.dirname(_fname)) + + # Delete some elements from the start of the database. + keys_to_delete = db_keys[:len(db_keys) // 2] + with dbm.open(_fname, 'c') as f: + for k in keys_to_delete: + del f[k] + f.reorganize() + + # Make sure to calculate size of database only after file is closed to ensure file content are flushed to disk. + size_after = _calculate_db_size(os.path.dirname(_fname)) + + self.assertLess(size_after, size_before) + def test_open_with_bytes(self): dbm.open(os.fsencode(_fname), "c").close() diff --git a/Misc/ACKS b/Misc/ACKS index 571142e7e49763..2435943f1bb2bd 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1365,6 +1365,7 @@ Milan Oberkirch Pascal Oberndoerfer Géry Ogam Seonkyo Ok +Andrea Oliveri Jeffrey Ollie Adam Olsen Bryan Olson diff --git a/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst new file mode 100644 index 00000000000000..a9a56d9239b305 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst @@ -0,0 +1,2 @@ +:mod:`shelve` as well as underlying :mod:`!dbm.dumb` and :mod:`!dbm.sqlite` now have :meth:`!reorganize` methods to +recover unused free space previously occupied by deleted entries. From ac7511062bf8e16ad489b17990d99abd3b4351f5 Mon Sep 17 00:00:00 2001 From: Sam Ng Date: Sun, 1 Jun 2025 08:35:58 -0700 Subject: [PATCH 444/989] gh-130478: fix HACL* build for macOS Silicon (#134188) --- configure | 12 ++++++++++-- configure.ac | 13 +++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/configure b/configure index c9ce9b4a73527d..b0dc18d5cea749 100755 --- a/configure +++ b/configure @@ -32557,6 +32557,14 @@ LIBHACL_CFLAGS="${LIBHACL_FLAG_I} ${LIBHACL_FLAG_D} \$(PY_STDMODULE_CFLAGS) \$(C LIBHACL_LDFLAGS= # for now, no specific linker flags are needed +if test "$UNIVERSAL_ARCHS" = "universal2" -o \ + \( "$build_cpu" = "aarch64" -a "$build_vendor" = "apple" \) +then + use_hacl_universal2_impl=yes +else + use_hacl_universal2_impl=no +fi + # The SIMD files use aligned_alloc, which is not available on older versions of # Android. # The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. @@ -32610,7 +32618,7 @@ printf "%s\n" "#define HACL_CAN_COMPILE_SIMD128 1" >>confdefs.h # isn't great, so it's disabled on ARM64. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for HACL* SIMD128 implementation" >&5 printf %s "checking for HACL* SIMD128 implementation... " >&6; } - if test "$UNIVERSAL_ARCHS" == "universal2"; then + if test "$use_hacl_universal2_impl" = "yes"; then LIBHACL_BLAKE2_SIMD128_OBJS="Modules/_hacl/Hacl_Hash_Blake2s_Simd128_universal2.o" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: universal2" >&5 printf "%s\n" "universal2" >&6; } @@ -32687,7 +32695,7 @@ printf "%s\n" "#define HACL_CAN_COMPILE_SIMD256 1" >>confdefs.h # wrapped implementation if we're building for universal2. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for HACL* SIMD256 implementation" >&5 printf %s "checking for HACL* SIMD256 implementation... " >&6; } - if test "$UNIVERSAL_ARCHS" == "universal2"; then + if test "$use_hacl_universal2_impl" = "yes"; then LIBHACL_BLAKE2_SIMD256_OBJS="Modules/_hacl/Hacl_Hash_Blake2b_Simd256_universal2.o" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: universal2" >&5 printf "%s\n" "universal2" >&6; } diff --git a/configure.ac b/configure.ac index eb52365d95be21..70ad6da86719c6 100644 --- a/configure.ac +++ b/configure.ac @@ -8001,6 +8001,15 @@ AC_SUBST([LIBHACL_CFLAGS]) LIBHACL_LDFLAGS= # for now, no specific linker flags are needed AC_SUBST([LIBHACL_LDFLAGS]) +dnl Check if universal2 HACL* implementation should be used. +if test "$UNIVERSAL_ARCHS" = "universal2" -o \ + \( "$build_cpu" = "aarch64" -a "$build_vendor" = "apple" \) +then + use_hacl_universal2_impl=yes +else + use_hacl_universal2_impl=no +fi + # The SIMD files use aligned_alloc, which is not available on older versions of # Android. # The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. @@ -8017,7 +8026,7 @@ then # available on x86_64. However, performance of the HACL SIMD128 implementation # isn't great, so it's disabled on ARM64. AC_MSG_CHECKING([for HACL* SIMD128 implementation]) - if test "$UNIVERSAL_ARCHS" == "universal2"; then + if test "$use_hacl_universal2_impl" = "yes"; then [LIBHACL_BLAKE2_SIMD128_OBJS="Modules/_hacl/Hacl_Hash_Blake2s_Simd128_universal2.o"] AC_MSG_RESULT([universal2]) else @@ -8049,7 +8058,7 @@ then # implementation requires symbols that aren't available on ARM64. Use a # wrapped implementation if we're building for universal2. AC_MSG_CHECKING([for HACL* SIMD256 implementation]) - if test "$UNIVERSAL_ARCHS" == "universal2"; then + if test "$use_hacl_universal2_impl" = "yes"; then [LIBHACL_BLAKE2_SIMD256_OBJS="Modules/_hacl/Hacl_Hash_Blake2b_Simd256_universal2.o"] AC_MSG_RESULT([universal2]) else From ee65ebdb50005655d75aca1618d3994a7b7ed869 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Jun 2025 10:25:50 +0200 Subject: [PATCH 445/989] gh-134978: deprecate `string` keyword parameter for hash function constructors (#134979) --- Doc/deprecations/pending-removal-in-3.19.rst | 16 +++++++++++++++ Doc/library/hashlib.rst | 7 +++++++ Doc/whatsnew/3.15.rst | 16 +++++++++++++-- Lib/test/test_hashlib.py | 20 ++++++++++++++++--- ...-05-31-15-49-46.gh-issue-134978.mXXuvW.rst | 7 +++++++ Modules/hashlib.h | 9 +++++++++ 6 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-31-15-49-46.gh-issue-134978.mXXuvW.rst diff --git a/Doc/deprecations/pending-removal-in-3.19.rst b/Doc/deprecations/pending-removal-in-3.19.rst index 3936f63ca5b5af..25f9cba390de68 100644 --- a/Doc/deprecations/pending-removal-in-3.19.rst +++ b/Doc/deprecations/pending-removal-in-3.19.rst @@ -6,3 +6,19 @@ Pending removal in Python 3.19 * Implicitly switching to the MSVC-compatible struct layout by setting :attr:`~ctypes.Structure._pack_` but not :attr:`~ctypes.Structure._layout_` on non-Windows platforms. + +* :mod:`hashlib`: + + - In hash function constructors such as :func:`~hashlib.new` or the + direct hash-named constructors such as :func:`~hashlib.md5` and + :func:`~hashlib.sha256`, their optional initial data parameter could + also be passed a keyword argument named ``data=`` or ``string=`` in + various :mod:`!hashlib` implementations. + + Support for the ``string`` keyword argument name is now deprecated + and slated for removal in Python 3.19. + + Before Python 3.13, the ``string`` keyword parameter was not correctly + supported depending on the backend implementation of hash functions. + Prefer passing the initial data as a positional argument for maximum + backwards compatibility. diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index 4818a4944a512a..8bba6700930bf4 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -94,6 +94,13 @@ accessible by name via :func:`new`. See :data:`algorithms_available`. OpenSSL does not provide we fall back to a verified implementation from the `HACL\* project`_. +.. deprecated-removed:: 3.15 3.19 + The undocumented ``string`` keyword parameter in :func:`!_hashlib.new` + and hash-named constructors such as :func:`!_md5.md5` is deprecated. + Prefer passing the initial data as a positional argument for maximum + backwards compatibility. + + Usage ----- diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 244ce327763f57..b342939f70577f 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -146,8 +146,20 @@ module_name Deprecated ========== -* module_name: - TODO +hashlib +------- + +* In hash function constructors such as :func:`~hashlib.new` or the + direct hash-named constructors such as :func:`~hashlib.md5` and + :func:`~hashlib.sha256`, their optional initial data parameter could + also be passed a keyword argument named ``data=`` or ``string=`` in + various :mod:`hashlib` implementations. + + Support for the ``string`` keyword argument name is now deprecated and + is slated for removal in Python 3.19. Prefer passing the initial data as + a positional argument for maximum backwards compatibility. + + (Contributed by Bénédikt Tran in :gh:`134978`.) .. Add deprecations above alphabetically, not here at the end. diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 8244f7c7553a37..b83ae181718b7a 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -98,6 +98,14 @@ def read_vectors(hash_name): yield parts +DEPRECATED_STRING_PARAMETER = re.escape( + "the 'string' keyword parameter is deprecated since " + "Python 3.15 and slated for removal in Python 3.19; " + "use the 'data' keyword parameter or pass the data " + "to hash as a positional argument instead" +) + + class HashLibTestCase(unittest.TestCase): supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1', 'sha224', 'SHA224', 'sha256', 'SHA256', @@ -255,17 +263,23 @@ def test_clinic_signature(self): with self.subTest(constructor.__name__): constructor(b'') constructor(data=b'') - constructor(string=b'') # should be deprecated in the future + with self.assertWarnsRegex(DeprecationWarning, + DEPRECATED_STRING_PARAMETER): + constructor(string=b'') digest_name = constructor(b'').name with self.subTest(digest_name): hashlib.new(digest_name, b'') hashlib.new(digest_name, data=b'') - hashlib.new(digest_name, string=b'') + with self.assertWarnsRegex(DeprecationWarning, + DEPRECATED_STRING_PARAMETER): + hashlib.new(digest_name, string=b'') if self._hashlib: self._hashlib.new(digest_name, b'') self._hashlib.new(digest_name, data=b'') - self._hashlib.new(digest_name, string=b'') + with self.assertWarnsRegex(DeprecationWarning, + DEPRECATED_STRING_PARAMETER): + self._hashlib.new(digest_name, string=b'') @unittest.skipIf(get_fips_mode(), "skip in FIPS mode") def test_clinic_signature_errors(self): diff --git a/Misc/NEWS.d/next/Library/2025-05-31-15-49-46.gh-issue-134978.mXXuvW.rst b/Misc/NEWS.d/next/Library/2025-05-31-15-49-46.gh-issue-134978.mXXuvW.rst new file mode 100644 index 00000000000000..e75ce1622d6396 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-31-15-49-46.gh-issue-134978.mXXuvW.rst @@ -0,0 +1,7 @@ +:mod:`hashlib`: Supporting the ``string`` keyword parameter in hash function +constructors such as :func:`~hashlib.new` or the direct hash-named constructors +such as :func:`~hashlib.md5` and :func:`~hashlib.sha256` is now deprecated and +slated for removal in Python 3.19. +Prefer passing the initial data as a positional argument for maximum backwards +compatibility. +Patch by Bénédikt Tran. diff --git a/Modules/hashlib.h b/Modules/hashlib.h index a80b195a765792..e82ec92be25c57 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -86,6 +86,15 @@ _Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string) } else if (data == NULL && string != NULL) { // called as H(string=...) + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "the 'string' keyword parameter is deprecated since " + "Python 3.15 and slated for removal in Python 3.19; " + "use the 'data' keyword parameter or pass the data " + "to hash as a positional argument instead", 1) < 0) + { + *res = NULL; + return -1; + } *res = string; return 1; } From 7828d52680907d1661ff6993e540f7026461c390 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 2 Jun 2025 12:45:48 +0300 Subject: [PATCH 446/989] gh-134449: fix grammar for `precision_with_grouping` in format description (#134608) This amends commit f39a07be47cd9219eaf0e538ae32ad8239c88e66. --- Doc/library/string.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index c4012483a52f73..23e15780075435 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -328,7 +328,7 @@ The general form of a *standard format specifier* is: sign: "+" | "-" | " " width_and_precision: [`width_with_grouping`][`precision_with_grouping`] width_with_grouping: [`width`][`grouping`] - precision_with_grouping: "." [`precision`][`grouping`] + precision_with_grouping: "." [`precision`][`grouping`] | "." `grouping` width: `~python-grammar:digit`+ precision: `~python-grammar:digit`+ grouping: "," | "_" From 128195e12eb6d5b9542558453df7045dd7aa1e15 Mon Sep 17 00:00:00 2001 From: GalaxySnail Date: Mon, 2 Jun 2025 18:23:52 +0800 Subject: [PATCH 447/989] gh-122153: indicate that Windows does not support `socket.{send,recv}_fds` (#134960) This amends commit e3b6ff19aaa318a813130ba9ad2ab0a332f27feb. --- Doc/library/socket.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 75fd637045d7d8..bc89a3228f0ed9 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -1492,7 +1492,7 @@ The :mod:`socket` module also offers various network-related services: The *fds* parameter is a sequence of file descriptors. Consult :meth:`~socket.sendmsg` for the documentation of these parameters. - .. availability:: Unix, Windows, not WASI. + .. availability:: Unix, not WASI. Unix platforms supporting :meth:`~socket.sendmsg` and :const:`SCM_RIGHTS` mechanism. @@ -1506,9 +1506,9 @@ The :mod:`socket` module also offers various network-related services: Return ``(msg, list(fds), flags, addr)``. Consult :meth:`~socket.recvmsg` for the documentation of these parameters. - .. availability:: Unix, Windows, not WASI. + .. availability:: Unix, not WASI. - Unix platforms supporting :meth:`~socket.sendmsg` + Unix platforms supporting :meth:`~socket.recvmsg` and :const:`SCM_RIGHTS` mechanism. .. versionadded:: 3.9 From 5f61cde80a9b33c8e118b1c009fe2aaa4bb87356 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 2 Jun 2025 13:38:05 +0300 Subject: [PATCH 448/989] gh-132908: Add math.isnormal/issubnormal() functions (GH132935) --- Doc/library/math.rst | 20 ++++++ Doc/whatsnew/3.14.rst | 2 +- Doc/whatsnew/3.15.rst | 7 ++ Lib/test/test_math.py | 22 ++++++ ...-04-25-16-06-53.gh-issue-132908.wV5rja.rst | 2 + Modules/clinic/mathmodule.c.h | 70 ++++++++++++++++++- Modules/mathmodule.c | 40 +++++++++++ 7 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-25-16-06-53.gh-issue-132908.wV5rja.rst diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 11d3b756e21322..c8061fb16380cd 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -53,6 +53,8 @@ noted otherwise, all return values are floats. :func:`frexp(x) ` Mantissa and exponent of *x* :func:`isclose(a, b, rel_tol, abs_tol) ` Check if the values *a* and *b* are close to each other :func:`isfinite(x) ` Check if *x* is neither an infinity nor a NaN +:func:`isnormal(x) ` Check if *x* is a normal number +:func:`issubnormal(x) ` Check if *x* is a subnormal number :func:`isinf(x) ` Check if *x* is a positive or negative infinity :func:`isnan(x) ` Check if *x* is a NaN (not a number) :func:`ldexp(x, i) ` ``x * (2**i)``, inverse of function :func:`frexp` @@ -373,6 +375,24 @@ Floating point manipulation functions .. versionadded:: 3.2 +.. function:: isnormal(x) + + Return ``True`` if *x* is a normal number, that is a finite + nonzero number that is not a subnormal (see :func:`issubnormal`). + Return ``False`` otherwise. + + .. versionadded:: next + + +.. function:: issubnormal(x) + + Return ``True`` if *x* is a subnormal number, that is a finite + nonzero number with a magnitude smaller than the smallest positive normal + number, see :data:`sys.float_info.min`. Return ``False`` otherwise. + + .. versionadded:: next + + .. function:: isinf(x) Return ``True`` if *x* is a positive or negative infinity, and diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 561d1a8914b50c..27dfc75c90fbe9 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1454,7 +1454,7 @@ math ---- * Added more detailed error messages for domain errors in the module. - (Contributed by by Charlie Zhao and Sergey B Kirpichev in :gh:`101410`.) + (Contributed by Charlie Zhao and Sergey B Kirpichev in :gh:`101410`.) mimetypes diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index b342939f70577f..a27a17afdba2a8 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -105,6 +105,13 @@ difflib (Contributed by Jiahao Li in :gh:`134580`.) +math +---- + +* Add :func:`math.isnormal` and :func:`math.issubnormal` functions. + (Contributed by Sergey B Kirpichev in :gh:`132908`.) + + shelve ------ diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index d14336f8bac498..384ad5c828d9b3 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1973,6 +1973,28 @@ def testIsfinite(self): self.assertFalse(math.isfinite(float("inf"))) self.assertFalse(math.isfinite(float("-inf"))) + def testIsnormal(self): + self.assertTrue(math.isnormal(1.25)) + self.assertTrue(math.isnormal(-1.0)) + self.assertFalse(math.isnormal(0.0)) + self.assertFalse(math.isnormal(-0.0)) + self.assertFalse(math.isnormal(INF)) + self.assertFalse(math.isnormal(NINF)) + self.assertFalse(math.isnormal(NAN)) + self.assertFalse(math.isnormal(FLOAT_MIN/2)) + self.assertFalse(math.isnormal(-FLOAT_MIN/2)) + + def testIssubnormal(self): + self.assertFalse(math.issubnormal(1.25)) + self.assertFalse(math.issubnormal(-1.0)) + self.assertFalse(math.issubnormal(0.0)) + self.assertFalse(math.issubnormal(-0.0)) + self.assertFalse(math.issubnormal(INF)) + self.assertFalse(math.issubnormal(NINF)) + self.assertFalse(math.issubnormal(NAN)) + self.assertTrue(math.issubnormal(FLOAT_MIN/2)) + self.assertTrue(math.issubnormal(-FLOAT_MIN/2)) + def testIsnan(self): self.assertTrue(math.isnan(float("nan"))) self.assertTrue(math.isnan(float("-nan"))) diff --git a/Misc/NEWS.d/next/Library/2025-04-25-16-06-53.gh-issue-132908.wV5rja.rst b/Misc/NEWS.d/next/Library/2025-04-25-16-06-53.gh-issue-132908.wV5rja.rst new file mode 100644 index 00000000000000..e33b061bb9ba1f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-25-16-06-53.gh-issue-132908.wV5rja.rst @@ -0,0 +1,2 @@ +Add :func:`math.isnormal` and :func:`math.issubnormal` functions. Patch by +Sergey B Kirpichev. diff --git a/Modules/clinic/mathmodule.c.h b/Modules/clinic/mathmodule.c.h index 9df73b187bb827..fbb012fb6dd9e1 100644 --- a/Modules/clinic/mathmodule.c.h +++ b/Modules/clinic/mathmodule.c.h @@ -628,6 +628,74 @@ math_isfinite(PyObject *module, PyObject *arg) return return_value; } +PyDoc_STRVAR(math_isnormal__doc__, +"isnormal($module, x, /)\n" +"--\n" +"\n" +"Return True if x is normal, and False otherwise."); + +#define MATH_ISNORMAL_METHODDEF \ + {"isnormal", (PyCFunction)math_isnormal, METH_O, math_isnormal__doc__}, + +static PyObject * +math_isnormal_impl(PyObject *module, double x); + +static PyObject * +math_isnormal(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + double x; + + if (PyFloat_CheckExact(arg)) { + x = PyFloat_AS_DOUBLE(arg); + } + else + { + x = PyFloat_AsDouble(arg); + if (x == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + return_value = math_isnormal_impl(module, x); + +exit: + return return_value; +} + +PyDoc_STRVAR(math_issubnormal__doc__, +"issubnormal($module, x, /)\n" +"--\n" +"\n" +"Return True if x is subnormal, and False otherwise."); + +#define MATH_ISSUBNORMAL_METHODDEF \ + {"issubnormal", (PyCFunction)math_issubnormal, METH_O, math_issubnormal__doc__}, + +static PyObject * +math_issubnormal_impl(PyObject *module, double x); + +static PyObject * +math_issubnormal(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + double x; + + if (PyFloat_CheckExact(arg)) { + x = PyFloat_AS_DOUBLE(arg); + } + else + { + x = PyFloat_AsDouble(arg); + if (x == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + return_value = math_issubnormal_impl(module, x); + +exit: + return return_value; +} + PyDoc_STRVAR(math_isnan__doc__, "isnan($module, x, /)\n" "--\n" @@ -1110,4 +1178,4 @@ math_ulp(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=77e7b8c161c39843 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=44bba3a0a052a364 input=a9049054013a1b77]*/ diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 71d9c1387f5780..bbbb49115681de 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -3118,6 +3118,44 @@ math_isfinite_impl(PyObject *module, double x) } +/*[clinic input] +math.isnormal + + x: double + / + +Return True if x is normal, and False otherwise. +[clinic start generated code]*/ + +static PyObject * +math_isnormal_impl(PyObject *module, double x) +/*[clinic end generated code: output=c7b302b5b89c3541 input=fdaa00c58aa7bc17]*/ +{ + return PyBool_FromLong(isnormal(x)); +} + + +/*[clinic input] +math.issubnormal + + x: double + / + +Return True if x is subnormal, and False otherwise. +[clinic start generated code]*/ + +static PyObject * +math_issubnormal_impl(PyObject *module, double x) +/*[clinic end generated code: output=4e76ac98ddcae761 input=9a20aba7107d0d95]*/ +{ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L + return PyBool_FromLong(issubnormal(x)); +#else + return PyBool_FromLong(isfinite(x) && x && !isnormal(x)); +#endif +} + + /*[clinic input] math.isnan @@ -4145,6 +4183,8 @@ static PyMethodDef math_methods[] = { MATH_HYPOT_METHODDEF MATH_ISCLOSE_METHODDEF MATH_ISFINITE_METHODDEF + MATH_ISNORMAL_METHODDEF + MATH_ISSUBNORMAL_METHODDEF MATH_ISINF_METHODDEF MATH_ISNAN_METHODDEF MATH_ISQRT_METHODDEF From baccfdb3d4d004cfb5308674e5e6ea6e598abcd7 Mon Sep 17 00:00:00 2001 From: devdanzin <74280297+devdanzin@users.noreply.github.com> Date: Mon, 2 Jun 2025 08:04:59 -0300 Subject: [PATCH 449/989] gh-130999: Avoid exiting the new REPL when there are non-string candidates for suggestions (gh-131001) --- Lib/test/test_pyrepl/test_pyrepl.py | 11 +++++++++ Lib/test/test_traceback.py | 24 +++++++++++++++++++ Lib/traceback.py | 13 ++++++++-- ...-03-09-03-13-41.gh-issue-130999.tBRBVB.rst | 2 ++ 4 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-03-09-03-13-41.gh-issue-130999.tBRBVB.rst diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index aa3a592766d6d1..98bae7dd703fd9 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -1672,6 +1672,17 @@ def test_null_byte(self): self.assertEqual(exit_code, 0) self.assertNotIn("TypeError", output) + @force_not_colorized + def test_non_string_suggestion_candidates(self): + commands = ("import runpy\n" + "runpy._run_module_code('blech', {0: '', 'bluch': ''}, '')\n" + "exit()\n") + + output, exit_code = self.run_repl(commands) + self.assertEqual(exit_code, 0) + self.assertNotIn("all elements in 'candidates' must be strings", output) + self.assertIn("bluch", output) + def test_readline_history_file(self): # skip, if readline module is not available readline = import_module('readline') diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index b9be87f357ffdd..6b2271f5d5ba8d 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -4188,6 +4188,15 @@ def __dir__(self): self.assertNotIn("blech", actual) self.assertNotIn("oh no!", actual) + def test_attribute_error_with_non_string_candidates(self): + class T: + bluch = 1 + + instance = T() + instance.__dict__[0] = 1 + actual = self.get_suggestion(instance, 'blich') + self.assertIn("bluch", actual) + def test_attribute_error_with_bad_name(self): def raise_attribute_error_with_bad_name(): raise AttributeError(name=12, obj=23) @@ -4301,6 +4310,13 @@ def test_import_from_suggestions_underscored(self): self.assertIn("'_bluch'", self.get_import_from_suggestion(code, '_luch')) self.assertNotIn("'_bluch'", self.get_import_from_suggestion(code, 'bluch')) + def test_import_from_suggestions_non_string(self): + modWithNonStringAttr = textwrap.dedent("""\ + globals()[0] = 1 + bluch = 1 + """) + self.assertIn("'bluch'", self.get_import_from_suggestion(modWithNonStringAttr, 'blech')) + def test_import_from_suggestions_do_not_trigger_for_long_attributes(self): code = "blech = None" @@ -4397,6 +4413,14 @@ def func(): actual = self.get_suggestion(func) self.assertIn("'ZeroDivisionError'?", actual) + def test_name_error_suggestions_with_non_string_candidates(self): + def func(): + abc = 1 + globals()[0] = 1 + abv + actual = self.get_suggestion(func) + self.assertIn("abc", actual) + def test_name_error_suggestions_do_not_trigger_for_long_names(self): def func(): somethingverywronghehehehehehe = None diff --git a/Lib/traceback.py b/Lib/traceback.py index 17b082eced6f05..a1f175dbbaa421 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -1595,7 +1595,11 @@ def _compute_suggestion_error(exc_value, tb, wrong_name): if isinstance(exc_value, AttributeError): obj = exc_value.obj try: - d = dir(obj) + try: + d = dir(obj) + except TypeError: # Attributes are unsortable, e.g. int and str + d = list(obj.__class__.__dict__.keys()) + list(obj.__dict__.keys()) + d = sorted([x for x in d if isinstance(x, str)]) hide_underscored = (wrong_name[:1] != '_') if hide_underscored and tb is not None: while tb.tb_next is not None: @@ -1610,7 +1614,11 @@ def _compute_suggestion_error(exc_value, tb, wrong_name): elif isinstance(exc_value, ImportError): try: mod = __import__(exc_value.name) - d = dir(mod) + try: + d = dir(mod) + except TypeError: # Attributes are unsortable, e.g. int and str + d = list(mod.__dict__.keys()) + d = sorted([x for x in d if isinstance(x, str)]) if wrong_name[:1] != '_': d = [x for x in d if x[:1] != '_'] except Exception: @@ -1628,6 +1636,7 @@ def _compute_suggestion_error(exc_value, tb, wrong_name): + list(frame.f_globals) + list(frame.f_builtins) ) + d = [x for x in d if isinstance(x, str)] # Check first if we are in a method and the instance # has the wrong name as attribute diff --git a/Misc/NEWS.d/next/Library/2025-03-09-03-13-41.gh-issue-130999.tBRBVB.rst b/Misc/NEWS.d/next/Library/2025-03-09-03-13-41.gh-issue-130999.tBRBVB.rst new file mode 100644 index 00000000000000..157522f9aab1b6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-09-03-13-41.gh-issue-130999.tBRBVB.rst @@ -0,0 +1,2 @@ +Avoid exiting the new REPL and offer suggestions even if there are non-string +candidates when errors occur. From 5bc2d99126e40cb727a249939a4f905a51e8c920 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 2 Jun 2025 16:28:20 +0300 Subject: [PATCH 450/989] gh-130662: Accept leading zeros in precision/width for Fraction's formatting (#130663) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/fractions.py | 4 ++-- Lib/test/test_fractions.py | 8 ++------ .../2025-06-02-14-28-30.gh-issue-130662.EIgIR8.rst | 3 +++ 3 files changed, 7 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-02-14-28-30.gh-issue-130662.EIgIR8.rst diff --git a/Lib/fractions.py b/Lib/fractions.py index 063f28478c7338..cb05ae7c2003f0 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -168,9 +168,9 @@ def _round_to_figures(n, d, figures): # A '0' that's *not* followed by another digit is parsed as a minimum width # rather than a zeropad flag. (?P0(?=[0-9]))? - (?P0|[1-9][0-9]*)? + (?P[0-9]+)? (?P[,_])? - (?:\.(?P0|[1-9][0-9]*))? + (?:\.(?P[0-9]+))? (?P[eEfFgG%]) """, re.DOTALL | re.VERBOSE).fullmatch diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index 96b3f30519459b..d1d2739856c092 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -1518,6 +1518,8 @@ def test_format_f_presentation_type(self): (F(51, 1000), '.1f', '0.1'), (F(149, 1000), '.1f', '0.1'), (F(151, 1000), '.1f', '0.2'), + (F(22, 7), '.02f', '3.14'), # issue gh-130662 + (F(22, 7), '005.02f', '03.14'), ] for fraction, spec, expected in testcases: with self.subTest(fraction=fraction, spec=spec): @@ -1616,12 +1618,6 @@ def test_invalid_formats(self): '=010%', '>00.2f', '>00f', - # Too many zeros - minimum width should not have leading zeros - '006f', - # Leading zeros in precision - '.010f', - '.02f', - '.000f', # Missing precision '.e', '.f', diff --git a/Misc/NEWS.d/next/Library/2025-06-02-14-28-30.gh-issue-130662.EIgIR8.rst b/Misc/NEWS.d/next/Library/2025-06-02-14-28-30.gh-issue-130662.EIgIR8.rst new file mode 100644 index 00000000000000..e07200f9a3fbbd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-02-14-28-30.gh-issue-130662.EIgIR8.rst @@ -0,0 +1,3 @@ +Accept leading zeros in precision and width fields for +:class:`~fractions.Fraction` formatting, for example ``format(Fraction(1, +3), '.016f')``. From 9c72658e49806eae52346a0905c1c176d3d49a2f Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 2 Jun 2025 16:30:52 +0300 Subject: [PATCH 451/989] gh-130662: Accept leading zeros in precision/width for Decimal's formatting (#132549) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/_pydecimal.py | 4 ++-- Lib/test/test_decimal.py | 2 ++ .../Library/2025-06-02-14-36-28.gh-issue-130662.Gpr2GB.rst | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-02-14-36-28.gh-issue-130662.Gpr2GB.rst diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index 46fa9ffcb1e056..781b38ec26ba33 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -6120,9 +6120,9 @@ def _convert_for_comparison(self, other, equality_op=False): (?Pz)? (?P\#)? (?P0)? -(?P(?!0)\d+)? +(?P\d+)? (?P[,_])? -(?:\.(?P0|(?!0)\d+))? +(?:\.(?P\d+))? (?P[eEfFgGn%])? \z """, re.VERBOSE|re.DOTALL) diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index c0a1e378583ba8..ef64b878805d77 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -981,6 +981,7 @@ def test_formatting(self): ('.0f', '0e-2', '0'), ('.0f', '3.14159265', '3'), ('.1f', '3.14159265', '3.1'), + ('.01f', '3.14159265', '3.1'), # leading zero in precision ('.4f', '3.14159265', '3.1416'), ('.6f', '3.14159265', '3.141593'), ('.7f', '3.14159265', '3.1415926'), # round-half-even! @@ -1066,6 +1067,7 @@ def test_formatting(self): ('8,', '123456', ' 123,456'), ('08,', '123456', '0,123,456'), # special case: extra 0 needed ('+08,', '123456', '+123,456'), # but not if there's a sign + ('008,', '123456', '0,123,456'), # leading zero in width (' 08,', '123456', ' 123,456'), ('08,', '-123456', '-123,456'), ('+09,', '123456', '+0,123,456'), diff --git a/Misc/NEWS.d/next/Library/2025-06-02-14-36-28.gh-issue-130662.Gpr2GB.rst b/Misc/NEWS.d/next/Library/2025-06-02-14-36-28.gh-issue-130662.Gpr2GB.rst new file mode 100644 index 00000000000000..d97d937376a9d0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-02-14-36-28.gh-issue-130662.Gpr2GB.rst @@ -0,0 +1,3 @@ ++Accept leading zeros in precision and width fields for ++:class:`~decimal.Decimal` formatting, for example ``format(Decimal(1.25), +'.016f')``. From b6237c3602075294a18dec821773429a51fa7e0d Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Tue, 3 Jun 2025 02:34:26 +1200 Subject: [PATCH 452/989] gh-117852: fix argument checking of `async_generator.athrow` (#134868) Co-authored-by: Kumar Aditya --- Lib/test/test_asyncgen.py | 9 ++++ ...-05-28-23-58-50.gh-issue-117852.BO9g7z.rst | 1 + Objects/genobject.c | 49 +++++++++++-------- 3 files changed, 38 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-28-23-58-50.gh-issue-117852.BO9g7z.rst diff --git a/Lib/test/test_asyncgen.py b/Lib/test/test_asyncgen.py index 2c44647bf3e2f9..636cb33dd9884a 100644 --- a/Lib/test/test_asyncgen.py +++ b/Lib/test/test_asyncgen.py @@ -2021,6 +2021,15 @@ async def gen(): g.athrow(RuntimeError) gc_collect() + def test_athrow_throws_immediately(self): + async def gen(): + yield 1 + + g = gen() + msg = "athrow expected at least 1 argument, got 0" + with self.assertRaisesRegex(TypeError, msg): + g.athrow() + def test_aclose(self): async def gen(): yield 1 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-28-23-58-50.gh-issue-117852.BO9g7z.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-28-23-58-50.gh-issue-117852.BO9g7z.rst new file mode 100644 index 00000000000000..fc71cd21a367b2 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-28-23-58-50.gh-issue-117852.BO9g7z.rst @@ -0,0 +1 @@ +Fix argument checking of :meth:`~agen.athrow`. diff --git a/Objects/genobject.c b/Objects/genobject.c index 98b2c5004df8ac..da1462deaaa02c 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -1451,7 +1451,9 @@ typedef struct PyAsyncGenAThrow { /* Can be NULL, when in the "aclose()" mode (equivalent of "athrow(GeneratorExit)") */ - PyObject *agt_args; + PyObject *agt_typ; + PyObject *agt_tb; + PyObject *agt_val; AwaitableState agt_state; } PyAsyncGenAThrow; @@ -2078,7 +2080,9 @@ async_gen_athrow_dealloc(PyObject *self) _PyObject_GC_UNTRACK(self); Py_CLEAR(agt->agt_gen); - Py_CLEAR(agt->agt_args); + Py_XDECREF(agt->agt_typ); + Py_XDECREF(agt->agt_tb); + Py_XDECREF(agt->agt_val); PyObject_GC_Del(self); } @@ -2088,7 +2092,9 @@ async_gen_athrow_traverse(PyObject *self, visitproc visit, void *arg) { PyAsyncGenAThrow *agt = _PyAsyncGenAThrow_CAST(self); Py_VISIT(agt->agt_gen); - Py_VISIT(agt->agt_args); + Py_VISIT(agt->agt_typ); + Py_VISIT(agt->agt_tb); + Py_VISIT(agt->agt_val); return 0; } @@ -2116,7 +2122,7 @@ async_gen_athrow_send(PyObject *self, PyObject *arg) if (o->agt_state == AWAITABLE_STATE_INIT) { if (o->agt_gen->ag_running_async) { o->agt_state = AWAITABLE_STATE_CLOSED; - if (o->agt_args == NULL) { + if (o->agt_typ == NULL) { PyErr_SetString( PyExc_RuntimeError, "aclose(): asynchronous generator is already running"); @@ -2143,7 +2149,7 @@ async_gen_athrow_send(PyObject *self, PyObject *arg) o->agt_state = AWAITABLE_STATE_ITER; o->agt_gen->ag_running_async = 1; - if (o->agt_args == NULL) { + if (o->agt_typ == NULL) { /* aclose() mode */ o->agt_gen->ag_closed = 1; @@ -2157,19 +2163,10 @@ async_gen_athrow_send(PyObject *self, PyObject *arg) goto yield_close; } } else { - PyObject *typ; - PyObject *tb = NULL; - PyObject *val = NULL; - - if (!PyArg_UnpackTuple(o->agt_args, "athrow", 1, 3, - &typ, &val, &tb)) { - return NULL; - } - retval = _gen_throw((PyGenObject *)gen, 0, /* Do not close generator when PyExc_GeneratorExit is passed */ - typ, val, tb); + o->agt_typ, o->agt_val, o->agt_tb); retval = async_gen_unwrap_value(o->agt_gen, retval); } if (retval == NULL) { @@ -2181,7 +2178,7 @@ async_gen_athrow_send(PyObject *self, PyObject *arg) assert(o->agt_state == AWAITABLE_STATE_ITER); retval = gen_send((PyObject *)gen, arg); - if (o->agt_args) { + if (o->agt_typ) { return async_gen_unwrap_value(o->agt_gen, retval); } else { /* aclose() mode */ @@ -2212,7 +2209,7 @@ async_gen_athrow_send(PyObject *self, PyObject *arg) if (PyErr_ExceptionMatches(PyExc_StopAsyncIteration) || PyErr_ExceptionMatches(PyExc_GeneratorExit)) { - if (o->agt_args == NULL) { + if (o->agt_typ == NULL) { /* when aclose() is called we don't want to propagate StopAsyncIteration or GeneratorExit; just raise StopIteration, signalling that this 'aclose()' await @@ -2241,7 +2238,7 @@ async_gen_athrow_throw(PyObject *self, PyObject *const *args, Py_ssize_t nargs) if (o->agt_state == AWAITABLE_STATE_INIT) { if (o->agt_gen->ag_running_async) { o->agt_state = AWAITABLE_STATE_CLOSED; - if (o->agt_args == NULL) { + if (o->agt_typ == NULL) { PyErr_SetString( PyExc_RuntimeError, "aclose(): asynchronous generator is already running"); @@ -2259,7 +2256,7 @@ async_gen_athrow_throw(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } PyObject *retval = gen_throw((PyObject*)o->agt_gen, args, nargs); - if (o->agt_args) { + if (o->agt_typ) { retval = async_gen_unwrap_value(o->agt_gen, retval); if (retval == NULL) { o->agt_gen->ag_running_async = 0; @@ -2334,7 +2331,7 @@ async_gen_athrow_finalize(PyObject *op) { PyAsyncGenAThrow *o = (PyAsyncGenAThrow*)op; if (o->agt_state == AWAITABLE_STATE_INIT) { - PyObject *method = o->agt_args ? &_Py_ID(athrow) : &_Py_ID(aclose); + PyObject *method = o->agt_typ ? &_Py_ID(athrow) : &_Py_ID(aclose); _PyErr_WarnUnawaitedAgenMethod(o->agt_gen, method); } } @@ -2403,13 +2400,23 @@ PyTypeObject _PyAsyncGenAThrow_Type = { static PyObject * async_gen_athrow_new(PyAsyncGenObject *gen, PyObject *args) { + PyObject *typ = NULL; + PyObject *tb = NULL; + PyObject *val = NULL; + if (args && !PyArg_UnpackTuple(args, "athrow", 1, 3, &typ, &val, &tb)) { + return NULL; + } + PyAsyncGenAThrow *o; o = PyObject_GC_New(PyAsyncGenAThrow, &_PyAsyncGenAThrow_Type); if (o == NULL) { return NULL; } o->agt_gen = (PyAsyncGenObject*)Py_NewRef(gen); - o->agt_args = Py_XNewRef(args); + o->agt_typ = Py_XNewRef(typ); + o->agt_tb = Py_XNewRef(tb); + o->agt_val = Py_XNewRef(val); + o->agt_state = AWAITABLE_STATE_INIT; _PyObject_GC_TRACK((PyObject*)o); return (PyObject*)o; From 26a1cd4e8c0c9ea1a6683abd82547ddee656ff3d Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Mon, 2 Jun 2025 16:43:32 +0200 Subject: [PATCH 453/989] gh-123471: make concurrent iteration over `itertools.cycle` safe under free-threading (#131212) Co-authored-by: Kumar Aditya --- ...itertools_batched.py => test_itertools.py} | 32 +++++++++++++++++-- ...-03-13-20-48-58.gh-issue-123471.cM4w4f.rst | 1 + Modules/itertoolsmodule.c | 27 +++++++++------- 3 files changed, 46 insertions(+), 14 deletions(-) rename Lib/test/test_free_threading/{test_itertools_batched.py => test_itertools.py} (53%) create mode 100644 Misc/NEWS.d/next/Library/2025-03-13-20-48-58.gh-issue-123471.cM4w4f.rst diff --git a/Lib/test/test_free_threading/test_itertools_batched.py b/Lib/test/test_free_threading/test_itertools.py similarity index 53% rename from Lib/test/test_free_threading/test_itertools_batched.py rename to Lib/test/test_free_threading/test_itertools.py index a754b4f9ea9902..8360afbf78cadd 100644 --- a/Lib/test/test_free_threading/test_itertools_batched.py +++ b/Lib/test/test_free_threading/test_itertools.py @@ -1,15 +1,15 @@ import unittest from threading import Thread, Barrier -from itertools import batched +from itertools import batched, cycle from test.support import threading_helper threading_helper.requires_working_threading(module=True) -class EnumerateThreading(unittest.TestCase): +class ItertoolsThreading(unittest.TestCase): @threading_helper.reap_threads - def test_threading(self): + def test_batched(self): number_of_threads = 10 number_of_iterations = 20 barrier = Barrier(number_of_threads) @@ -34,5 +34,31 @@ def work(it): barrier.reset() + @threading_helper.reap_threads + def test_cycle(self): + number_of_threads = 6 + number_of_iterations = 10 + number_of_cycles = 400 + + barrier = Barrier(number_of_threads) + def work(it): + barrier.wait() + for _ in range(number_of_cycles): + _ = next(it) + + data = (1, 2, 3, 4) + for it in range(number_of_iterations): + cycle_iterator = cycle(data) + worker_threads = [] + for ii in range(number_of_threads): + worker_threads.append( + Thread(target=work, args=[cycle_iterator])) + + with threading_helper.start_threads(worker_threads): + pass + + barrier.reset() + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-03-13-20-48-58.gh-issue-123471.cM4w4f.rst b/Misc/NEWS.d/next/Library/2025-03-13-20-48-58.gh-issue-123471.cM4w4f.rst new file mode 100644 index 00000000000000..cfc783900de70f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-13-20-48-58.gh-issue-123471.cM4w4f.rst @@ -0,0 +1 @@ +Make concurrent iterations over :class:`itertools.cycle` safe under free-threading. diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 943c1e8607b38f..2003546ce84cef 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -1124,7 +1124,6 @@ typedef struct { PyObject *it; PyObject *saved; Py_ssize_t index; - int firstpass; } cycleobject; #define cycleobject_CAST(op) ((cycleobject *)(op)) @@ -1165,8 +1164,7 @@ itertools_cycle_impl(PyTypeObject *type, PyObject *iterable) } lz->it = it; lz->saved = saved; - lz->index = 0; - lz->firstpass = 0; + lz->index = -1; return (PyObject *)lz; } @@ -1199,11 +1197,11 @@ cycle_next(PyObject *op) cycleobject *lz = cycleobject_CAST(op); PyObject *item; - if (lz->it != NULL) { + Py_ssize_t index = FT_ATOMIC_LOAD_SSIZE_RELAXED(lz->index); + + if (index < 0) { item = PyIter_Next(lz->it); if (item != NULL) { - if (lz->firstpass) - return item; if (PyList_Append(lz->saved, item)) { Py_DECREF(item); return NULL; @@ -1213,15 +1211,22 @@ cycle_next(PyObject *op) /* Note: StopIteration is already cleared by PyIter_Next() */ if (PyErr_Occurred()) return NULL; + index = 0; + FT_ATOMIC_STORE_SSIZE_RELAXED(lz->index, 0); +#ifndef Py_GIL_DISABLED Py_CLEAR(lz->it); +#endif } if (PyList_GET_SIZE(lz->saved) == 0) return NULL; - item = PyList_GET_ITEM(lz->saved, lz->index); - lz->index++; - if (lz->index >= PyList_GET_SIZE(lz->saved)) - lz->index = 0; - return Py_NewRef(item); + item = PyList_GetItemRef(lz->saved, index); + assert(item); + index++; + if (index >= PyList_GET_SIZE(lz->saved)) { + index = 0; + } + FT_ATOMIC_STORE_SSIZE_RELAXED(lz->index, index); + return item; } static PyType_Slot cycle_slots[] = { From 055827528fa50c9a7707792f5fe264c4e20c07e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Mon, 2 Jun 2025 16:57:08 +0200 Subject: [PATCH 454/989] gh-130999: Fix globals() poisoning in test_traceback (gh-135030) --- Lib/test/test_traceback.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 6b2271f5d5ba8d..74b979d009664d 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -4232,8 +4232,8 @@ def make_module(self, code): return mod_name - def get_import_from_suggestion(self, mod_dict, name): - modname = self.make_module(mod_dict) + def get_import_from_suggestion(self, code, name): + modname = self.make_module(code) def callable(): try: @@ -4416,8 +4416,9 @@ def func(): def test_name_error_suggestions_with_non_string_candidates(self): def func(): abc = 1 - globals()[0] = 1 - abv + custom_globals = globals().copy() + custom_globals[0] = 1 + print(eval("abv", custom_globals, locals())) actual = self.get_suggestion(func) self.assertIn("abc", actual) From 44fb7c361cb24dcf9989a7a1cfee4f6aad5c81aa Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Tue, 3 Jun 2025 05:22:41 +1200 Subject: [PATCH 455/989] gh-134908: Protect `textiowrapper_iternext` with critical section (gh-134910) The `textiowrapper_iternext` function called `_textiowrapper_writeflush`, but did not use a critical section, making it racy in free-threaded builds. --- Lib/test/test_io.py | 31 +++++++++++++++++++ ...-05-30-15-56-19.gh-issue-134908.3a7PxM.rst | 1 + Modules/_io/textio.c | 15 ++++++++- 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-15-56-19.gh-issue-134908.3a7PxM.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 168e66c5a3f0e0..0c921ffbc2576a 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -1062,6 +1062,37 @@ def flush(self): # Silence destructor error R.flush = lambda self: None + @threading_helper.requires_working_threading() + def test_write_readline_races(self): + # gh-134908: Concurrent iteration over a file caused races + thread_count = 2 + write_count = 100 + read_count = 100 + + def writer(file, barrier): + barrier.wait() + for _ in range(write_count): + file.write("x") + + def reader(file, barrier): + barrier.wait() + for _ in range(read_count): + for line in file: + self.assertEqual(line, "") + + with self.open(os_helper.TESTFN, "w+") as f: + barrier = threading.Barrier(thread_count + 1) + reader = threading.Thread(target=reader, args=(f, barrier)) + writers = [threading.Thread(target=writer, args=(f, barrier)) + for _ in range(thread_count)] + with threading_helper.catch_threading_exception() as cm: + with threading_helper.start_threads(writers + [reader]): + pass + self.assertIsNone(cm.exc_type) + + self.assertEqual(os.stat(os_helper.TESTFN).st_size, + write_count * thread_count) + class CIOTest(IOTest): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-15-56-19.gh-issue-134908.3a7PxM.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-15-56-19.gh-issue-134908.3a7PxM.rst new file mode 100644 index 00000000000000..3178f0aaf885f8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-15-56-19.gh-issue-134908.3a7PxM.rst @@ -0,0 +1 @@ +Fix crash when iterating over lines in a text file on the :term:`free threaded ` build. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 86328e46a7b131..3808ecdceb9b70 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1578,6 +1578,8 @@ _io_TextIOWrapper_detach_impl(textio *self) static int _textiowrapper_writeflush(textio *self) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + if (self->pending_bytes == NULL) return 0; @@ -3173,8 +3175,9 @@ _io_TextIOWrapper_close_impl(textio *self) } static PyObject * -textiowrapper_iternext(PyObject *op) +textiowrapper_iternext_lock_held(PyObject *op) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); PyObject *line; textio *self = textio_CAST(op); @@ -3210,6 +3213,16 @@ textiowrapper_iternext(PyObject *op) return line; } +static PyObject * +textiowrapper_iternext(PyObject *op) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = textiowrapper_iternext_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] @critical_section @getter From 0cec424af5904b3d23ad6e3c6d1a27f89d238d64 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 2 Jun 2025 21:08:26 +0300 Subject: [PATCH 456/989] gh-66234: Add flag to disable the use of mmap in dbm.gnu (GH-135005) This may harm performance, but improve crash tolerance. --- Doc/library/dbm.rst | 3 +++ Doc/whatsnew/3.15.rst | 4 +++ Lib/test/test_dbm_gnu.py | 27 +++++++++++++++++-- ...5-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst | 3 +++ Modules/_gdbmmodule.c | 8 ++++++ 5 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 6f548fbb1b39d8..39e287b15214e4 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -254,6 +254,9 @@ functionality like crash tolerance. * ``'s'``: Synchronized mode. Changes to the database will be written immediately to the file. * ``'u'``: Do not lock database. + * ``'m'``: Do not use :manpage:`mmap(2)`. + This may harm performance, but improve crash tolerance. + .. versionadded:: next Not all flags are valid for all versions of GDBM. See the :data:`open_flags` member for a list of supported flag characters. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index a27a17afdba2a8..2fe33c4c535919 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -96,6 +96,10 @@ dbm which allow to recover unused free space previously occupied by deleted entries. (Contributed by Andrea Oliveri in :gh:`134004`.) +* Add the ``'m'`` flag for :func:`dbm.gnu.open` which allows to disable + the use of :manpage:`mmap(2)`. + This may harm performance, but improve crash tolerance. + (Contributed by Serhiy Storchaka in :gh:`66234`.) difflib ------- diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index 66268c42a300b5..e0b988b7b95bbd 100644 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -74,12 +74,12 @@ def test_flags(self): # Test the flag parameter open() by trying all supported flag modes. all = set(gdbm.open_flags) # Test standard flags (presumably "crwn"). - modes = all - set('fsu') + modes = all - set('fsum') for mode in sorted(modes): # put "c" mode first self.g = gdbm.open(filename, mode) self.g.close() - # Test additional flags (presumably "fsu"). + # Test additional flags (presumably "fsum"). flags = all - set('crwn') for mode in modes: for flag in flags: @@ -217,6 +217,29 @@ def test_localized_error(self): create_empty_file(os.path.join(d, 'test')) self.assertRaises(gdbm.error, gdbm.open, filename, 'r') + @unittest.skipUnless('m' in gdbm.open_flags, "requires 'm' in open_flags") + def test_nommap_no_crash(self): + self.g = g = gdbm.open(filename, 'nm') + os.truncate(filename, 0) + + g.get(b'a', b'c') + g.keys() + g.firstkey() + g.nextkey(b'a') + with self.assertRaises(KeyError): + g[b'a'] + with self.assertRaises(gdbm.error): + len(g) + + with self.assertRaises(gdbm.error): + g[b'a'] = b'c' + with self.assertRaises(gdbm.error): + del g[b'a'] + with self.assertRaises(gdbm.error): + g.setdefault(b'a', b'c') + with self.assertRaises(gdbm.error): + g.reorganize() + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst b/Misc/NEWS.d/next/Library/2025-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst new file mode 100644 index 00000000000000..1defb9a72e04e7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst @@ -0,0 +1,3 @@ +Add the ``'m'`` flag for :func:`dbm.gnu.open` which allows to disable the +use of :manpage:`mmap(2)`. This may harm performance, but improve crash +tolerance. diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c index 9c402e20e513b9..6a4939512b22fc 100644 --- a/Modules/_gdbmmodule.c +++ b/Modules/_gdbmmodule.c @@ -813,6 +813,11 @@ dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, case 'u': iflags |= GDBM_NOLOCK; break; +#endif +#ifdef GDBM_NOMMAP + case 'm': + iflags |= GDBM_NOMMAP; + break; #endif default: PyErr_Format(state->gdbm_error, @@ -846,6 +851,9 @@ static const char gdbmmodule_open_flags[] = "rwcn" #endif #ifdef GDBM_NOLOCK "u" +#endif +#ifdef GDBM_NOMMAP + "m" #endif ; From c7051a366996dd2213decb8677c52d1eda0fde6a Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 2 Jun 2025 15:13:08 -0300 Subject: [PATCH 457/989] Remove newline in Doc/c-api/lifecycle.rst for gettext builder (GH-135013) --- Doc/c-api/lifecycle.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Doc/c-api/lifecycle.rst b/Doc/c-api/lifecycle.rst index 0e2ffc096caba7..5a170862a26f44 100644 --- a/Doc/c-api/lifecycle.rst +++ b/Doc/c-api/lifecycle.rst @@ -55,16 +55,14 @@ that must be true for *B* to occur after *A*. .. image:: lifecycle.dot.svg :align: center :class: invert-in-dark-mode - :alt: Diagram showing events in an object's life. Explained in detail - below. + :alt: Diagram showing events in an object's life. Explained in detail below. .. only:: latex .. image:: lifecycle.dot.pdf :align: center :class: invert-in-dark-mode - :alt: Diagram showing events in an object's life. Explained in detail - below. + :alt: Diagram showing events in an object's life. Explained in detail below. .. container:: :name: life-events-graph-description From 7a79f52d83c22f5a9787e590f267325c1175d389 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 2 Jun 2025 23:25:32 +0300 Subject: [PATCH 458/989] gh-133454: Mark test_queue tests with many threads as bigmem (gh-134575) 50 producer and 50 consumer threads need more than 5GB of memory. --- Lib/test/test_queue.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_queue.py b/Lib/test/test_queue.py index 7f4fe357034b71..c855fb8fe2b05a 100644 --- a/Lib/test/test_queue.py +++ b/Lib/test/test_queue.py @@ -6,7 +6,7 @@ import time import unittest import weakref -from test.support import gc_collect +from test.support import gc_collect, bigmemtest from test.support import import_helper from test.support import threading_helper @@ -963,33 +963,33 @@ def test_order(self): # One producer, one consumer => results appended in well-defined order self.assertEqual(results, inputs) - def test_many_threads(self): + @bigmemtest(size=50, memuse=100*2**20, dry_run=False) + def test_many_threads(self, size): # Test multiple concurrent put() and get() - N = 50 q = self.q inputs = list(range(10000)) - results = self.run_threads(N, q, inputs, self.feed, self.consume) + results = self.run_threads(size, q, inputs, self.feed, self.consume) # Multiple consumers without synchronization append the # results in random order self.assertEqual(sorted(results), inputs) - def test_many_threads_nonblock(self): + @bigmemtest(size=50, memuse=100*2**20, dry_run=False) + def test_many_threads_nonblock(self, size): # Test multiple concurrent put() and get(block=False) - N = 50 q = self.q inputs = list(range(10000)) - results = self.run_threads(N, q, inputs, + results = self.run_threads(size, q, inputs, self.feed, self.consume_nonblock) self.assertEqual(sorted(results), inputs) - def test_many_threads_timeout(self): + @bigmemtest(size=50, memuse=100*2**20, dry_run=False) + def test_many_threads_timeout(self, size): # Test multiple concurrent put() and get(timeout=...) - N = 50 q = self.q inputs = list(range(1000)) - results = self.run_threads(N, q, inputs, + results = self.run_threads(size, q, inputs, self.feed, self.consume_timeout) self.assertEqual(sorted(results), inputs) From e814f43f2c655b931af8ee9e1c128bd1027549fb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 2 Jun 2025 23:31:06 +0300 Subject: [PATCH 459/989] gh-74232: Add a note about roundtrip of non-float numerics in CSV (GH-134963) --- Doc/library/csv.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 533cdf13974be6..5297be17bd708e 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -70,7 +70,7 @@ The :mod:`csv` module defines the following functions: section :ref:`csv-fmt-params`. Each row read from the csv file is returned as a list of strings. No - automatic data type conversion is performed unless the ``QUOTE_NONNUMERIC`` format + automatic data type conversion is performed unless the :data:`QUOTE_NONNUMERIC` format option is specified (in which case unquoted fields are transformed into floats). A short usage example:: @@ -331,8 +331,14 @@ The :mod:`csv` module defines the following constants: Instructs :class:`writer` objects to quote all non-numeric fields. - Instructs :class:`reader` objects to convert all non-quoted fields to type *float*. + Instructs :class:`reader` objects to convert all non-quoted fields to type :class:`float`. + .. note:: + Some numeric types, such as :class:`bool`, :class:`~fractions.Fraction`, + or :class:`~enum.IntEnum`, have a string representation that cannot be + converted to :class:`float`. + They cannot be read in the :data:`QUOTE_NONNUMERIC` and + :data:`QUOTE_STRINGS` modes. .. data:: QUOTE_NONE From df98a47a61a274eb7427c6201ddabec9ffd30b0a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 2 Jun 2025 23:35:41 +0300 Subject: [PATCH 460/989] gh-132813: Improve error messages for incorrect types and values of csv.Dialog attributes (GH-133241) Make them similar to PyArg_Parse error messages, mention None as a possible value, show a wrong type and the string length. --- Lib/test/test_csv.py | 48 ++++++++---- ...-05-01-10-56-44.gh-issue-132813.rKurvp.rst | 2 + Modules/_csv.c | 73 ++++++++----------- 3 files changed, 69 insertions(+), 54 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 9aace57633b0c6..60feab225a107c 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1122,19 +1122,22 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be a 1-character string') + '"quotechar" must be a unicode character or None, ' + 'not a string of length 0') mydialect.quotechar = "''" with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be a 1-character string') + '"quotechar" must be a unicode character or None, ' + 'not a string of length 2') mydialect.quotechar = 4 with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be string or None, not int') + '"quotechar" must be a unicode character or None, ' + 'not int') def test_delimiter(self): class mydialect(csv.Dialect): @@ -1151,31 +1154,32 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be a 1-character string') + '"delimiter" must be a unicode character, ' + 'not a string of length 3') mydialect.delimiter = "" with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be a 1-character string') + '"delimiter" must be a unicode character, not a string of length 0') mydialect.delimiter = b"," with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not bytes') + '"delimiter" must be a unicode character, not bytes') mydialect.delimiter = 4 with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not int') + '"delimiter" must be a unicode character, not int') mydialect.delimiter = None with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not NoneType') + '"delimiter" must be a unicode character, not NoneType') def test_escapechar(self): class mydialect(csv.Dialect): @@ -1189,20 +1193,32 @@ class mydialect(csv.Dialect): self.assertEqual(d.escapechar, "\\") mydialect.escapechar = "" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not a string of length 0') mydialect.escapechar = "**" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not a string of length 2') mydialect.escapechar = b"*" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not bytes'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not bytes') mydialect.escapechar = 4 - with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not int'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not int') def test_lineterminator(self): class mydialect(csv.Dialect): @@ -1223,7 +1239,13 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"lineterminator" must be a string') + '"lineterminator" must be a string, not int') + + mydialect.lineterminator = None + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"lineterminator" must be a string, not NoneType') def test_invalid_chars(self): def create_invalid(field_name, value, **kwargs): diff --git a/Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst b/Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst new file mode 100644 index 00000000000000..55608528a4564b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst @@ -0,0 +1,2 @@ +Improve error messages for incorrect types and values of :class:`csv.Dialect` +attributes. diff --git a/Modules/_csv.c b/Modules/_csv.c index e5ae853590bf2c..2e04136e0ac657 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -237,7 +237,7 @@ _set_int(const char *name, int *target, PyObject *src, int dflt) int value; if (!PyLong_CheckExact(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be an integer", name); + "\"%s\" must be an integer, not %T", name, src); return -1; } value = PyLong_AsInt(src); @@ -255,27 +255,29 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt if (src == NULL) { *target = dflt; } - else { + else if (src == Py_None) { *target = NOT_SET; - if (src != Py_None) { - if (!PyUnicode_Check(src)) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be string or None, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; - } - Py_ssize_t len = PyUnicode_GetLength(src); - if (len < 0) { - return -1; - } - if (len != 1) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); - return -1; - } - *target = PyUnicode_READ_CHAR(src, 0); + } + else { + // similar to PyArg_Parse("C?") + if (!PyUnicode_Check(src)) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, not %T", + name, src); + return -1; + } + Py_ssize_t len = PyUnicode_GetLength(src); + if (len < 0) { + return -1; } + if (len != 1) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, " + "not a string of length %zd", + name, len); + return -1; + } + *target = PyUnicode_READ_CHAR(src, 0); } return 0; } @@ -287,11 +289,12 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) *target = dflt; } else { + // similar to PyArg_Parse("C") if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be string, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; + "\"%s\" must be a unicode character, not %T", + name, src); + return -1; } Py_ssize_t len = PyUnicode_GetLength(src); if (len < 0) { @@ -299,8 +302,9 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) } if (len != 1) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); + "\"%s\" must be a unicode character, " + "not a string of length %zd", + name, len); return -1; } *target = PyUnicode_READ_CHAR(src, 0); @@ -314,16 +318,12 @@ _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) if (src == NULL) *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); else { - if (src == Py_None) - *target = NULL; - else if (!PyUnicode_Check(src)) { + if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a string", name); + "\"%s\" must be a string, not %T", name, src); return -1; } - else { - Py_XSETREF(*target, Py_NewRef(src)); - } + Py_XSETREF(*target, Py_NewRef(src)); } return 0; } @@ -533,11 +533,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) /* validate options */ if (dialect_check_quoting(self->quoting)) goto err; - if (self->delimiter == NOT_SET) { - PyErr_SetString(PyExc_TypeError, - "\"delimiter\" must be a 1-character string"); - goto err; - } if (quotechar == Py_None && quoting == NULL) self->quoting = QUOTE_NONE; if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) { @@ -545,10 +540,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) "quotechar must be set if quoting enabled"); goto err; } - if (self->lineterminator == NULL) { - PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); - goto err; - } if (dialect_check_char("delimiter", self->delimiter, self, true) || dialect_check_char("escapechar", self->escapechar, self, !self->skipinitialspace) || From 0ac9e17fb47075c9446b99da4dffe4cad993b97a Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Mon, 2 Jun 2025 23:08:20 +0100 Subject: [PATCH 461/989] gh-134830: Fix reference in `Doc/extending/windows.rst` (GH-134831) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Miro Hrončok --- Doc/c-api/stable.rst | 1 + Doc/extending/windows.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/stable.rst b/Doc/c-api/stable.rst index 124e58cf950b7a..9b65e0b8d23d93 100644 --- a/Doc/c-api/stable.rst +++ b/Doc/c-api/stable.rst @@ -51,6 +51,7 @@ It is generally intended for specialized, low-level tools like debuggers. Projects that use this API are expected to follow CPython development and spend extra effort adjusting to changes. +.. _stable-application-binary-interface: Stable Application Binary Interface =================================== diff --git a/Doc/extending/windows.rst b/Doc/extending/windows.rst index 56aa44e4e58c83..a97c6182553c30 100644 --- a/Doc/extending/windows.rst +++ b/Doc/extending/windows.rst @@ -121,7 +121,7 @@ When creating DLLs in Windows, you can use the CPython library in two ways: :file:`Python.h` triggers an implicit, configure-aware link with the library. The header file chooses :file:`pythonXY_d.lib` for Debug, :file:`pythonXY.lib` for Release, and :file:`pythonX.lib` for Release with - the `Limited API `_ enabled. + the :ref:`Limited API ` enabled. To build two DLLs, spam and ni (which uses C functions found in spam), you could use these commands:: From b525e31b7fc50e7a498f8b9b16437cb7b9656f6f Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 3 Jun 2025 08:40:40 +0900 Subject: [PATCH 462/989] gh-134875: Fix mimallc build error for the old compilers (gh-134994) --- Include/internal/mimalloc/mimalloc/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h index 71b7ea702d6c5e..a7daa3a40a4c0b 100644 --- a/Include/internal/mimalloc/mimalloc/internal.h +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -634,10 +634,10 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl mi_track_mem_defined(block,sizeof(mi_block_t)); mi_block_t* next; #ifdef MI_ENCODE_FREELIST - next = (mi_block_t*)mi_ptr_decode(null, mi_atomic_load_relaxed(&block->next), keys); + next = (mi_block_t*)mi_ptr_decode(null, mi_atomic_load_relaxed((_Atomic(mi_encoded_t)*)&block->next), keys); #else MI_UNUSED(keys); MI_UNUSED(null); - next = (mi_block_t*)mi_atomic_load_relaxed(&block->next); + next = (mi_block_t*)mi_atomic_load_relaxed((_Atomic(mi_encoded_t)*)&block->next); #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); return next; From 6e80f11eb5eba360334b4ace105eb7d73394baf7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 3 Jun 2025 08:40:45 +0200 Subject: [PATCH 463/989] gh-135028: Increase parser MAXSTACK for nested parenthesis (#135031) --- Lib/test/test_grammar.py | 14 +++++++++++++- Parser/parser.c | 2 +- Tools/peg_generator/pegen/c_generator.py | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index c39565144bf7f4..7f5d48b9c63ab7 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -1,7 +1,7 @@ # Python test set -- part 1, grammar. # This just tests whether the parser accepts them all. -from test.support import check_syntax_error +from test.support import check_syntax_error, skip_wasi_stack_overflow from test.support import import_helper import annotationlib import inspect @@ -249,6 +249,18 @@ def test_eof_error(self): compile(s, "", "exec") self.assertIn("was never closed", str(cm.exception)) + @skip_wasi_stack_overflow() + def test_max_level(self): + # Macro defined in Parser/lexer/state.h + MAXLEVEL = 200 + + result = eval("(" * MAXLEVEL + ")" * MAXLEVEL) + self.assertEqual(result, ()) + + with self.assertRaises(SyntaxError) as cm: + eval("(" * (MAXLEVEL + 1) + ")" * (MAXLEVEL + 1)) + self.assertStartsWith(str(cm.exception), 'too many nested parentheses') + var_annot_global: int # a global annotated is necessary for test_var_annot diff --git a/Parser/parser.c b/Parser/parser.c index 84a293cddffde6..d5aafef826ed3a 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -14,7 +14,7 @@ # define MAXSTACK 4000 # endif #else -# define MAXSTACK 4000 +# define MAXSTACK 6000 #endif static const int n_keyword_lists = 9; static KeywordToken *reserved_keywords[] = { diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 2be85a163b4043..09c5651f24a3bb 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -44,7 +44,7 @@ # define MAXSTACK 4000 # endif #else -# define MAXSTACK 4000 +# define MAXSTACK 6000 #endif """ From 485b499610eefe362faf171f258b3a3588378ff1 Mon Sep 17 00:00:00 2001 From: stratakis Date: Tue, 3 Jun 2025 09:09:43 +0200 Subject: [PATCH 464/989] gh-128605: Add branch protections for x86_64 in asm_trampoline.S (#128606) Apply Intel Control-flow Technology for x86-64 on asm_trampoline.S. Required for mitigation against return-oriented programming (ROP) and Call or Jump Oriented Programming (COP/JOP) attacks. Manual application is required for the assembly files. See also: https://sourceware.org/annobin/annobin.html/Test-cf-protection.html --- Python/asm_trampoline.S | 22 ++++++++++++++++++++++ Python/perf_jit_trampoline.c | 4 ++++ 2 files changed, 26 insertions(+) diff --git a/Python/asm_trampoline.S b/Python/asm_trampoline.S index 0a3265dfeee204..616752459ba4d9 100644 --- a/Python/asm_trampoline.S +++ b/Python/asm_trampoline.S @@ -9,6 +9,9 @@ # } _Py_trampoline_func_start: #ifdef __x86_64__ +#if defined(__CET__) && (__CET__ & 1) + endbr64 +#endif sub $8, %rsp call *%rcx add $8, %rsp @@ -34,3 +37,22 @@ _Py_trampoline_func_start: .globl _Py_trampoline_func_end _Py_trampoline_func_end: .section .note.GNU-stack,"",@progbits +# Note for indicating the assembly code supports CET +#if defined(__x86_64__) && defined(__CET__) && (__CET__ & 1) + .section .note.gnu.property,"a" + .align 8 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + .string "GNU" +1: + .align 8 + .long 0xc0000002 + .long 3f - 2f +2: + .long 0x3 +3: + .align 8 +4: +#endif // __x86_64__ diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index 5c7cb5b0a9913c..2ca18c23593547 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -869,7 +869,11 @@ static void elf_init_ehframe(ELFObjectContext* ctx) { */ #ifdef __x86_64__ /* x86_64 calling convention unwinding rules */ +# if defined(__CET__) && (__CET__ & 1) + DWRF_U8(DWRF_CFA_advance_loc | 8); // Advance location by 8 bytes when CET protection is enabled +# else DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance location by 4 bytes +# endif DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset DWRF_UV(16); // New offset: SP + 16 DWRF_U8(DWRF_CFA_advance_loc | 6); // Advance location by 6 bytes From ec12559ebafca01ded22c9013de64abe535c838d Mon Sep 17 00:00:00 2001 From: Roei Ben Artzi <155478676+roeibenartzi@users.noreply.github.com> Date: Tue, 3 Jun 2025 10:40:25 +0300 Subject: [PATCH 465/989] gh-131884: Fix incorrect formatting in json.dumps() when using indent and skipkeys=True (GH-132200) --- Lib/json/encoder.py | 5 +++-- Lib/test/test_json/test_dump.py | 8 ++++++++ .../2025-04-07-06-41-54.gh-issue-131884.ym9BJN.rst | 1 + Modules/_json.c | 13 ++++++++----- 4 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-07-06-41-54.gh-issue-131884.ym9BJN.rst diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py index 016638549aa59b..bc446e0f377a11 100644 --- a/Lib/json/encoder.py +++ b/Lib/json/encoder.py @@ -348,7 +348,6 @@ def _iterencode_dict(dct, _current_indent_level): _current_indent_level += 1 newline_indent = '\n' + _indent * _current_indent_level item_separator = _item_separator + newline_indent - yield newline_indent else: newline_indent = None item_separator = _item_separator @@ -381,6 +380,8 @@ def _iterencode_dict(dct, _current_indent_level): f'not {key.__class__.__name__}') if first: first = False + if newline_indent is not None: + yield newline_indent else: yield item_separator yield _encoder(key) @@ -413,7 +414,7 @@ def _iterencode_dict(dct, _current_indent_level): except BaseException as exc: exc.add_note(f'when serializing {type(dct).__name__} item {key!r}') raise - if newline_indent is not None: + if not first and newline_indent is not None: _current_indent_level -= 1 yield '\n' + _indent * _current_indent_level yield '}' diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index 13b40020781bae..39470754003bb6 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -22,6 +22,14 @@ def test_dump_skipkeys(self): self.assertIn('valid_key', o) self.assertNotIn(b'invalid_key', o) + def test_dump_skipkeys_indent_empty(self): + v = {b'invalid_key': False} + self.assertEqual(self.json.dumps(v, skipkeys=True, indent=4), '{}') + + def test_skipkeys_indent(self): + v = {b'invalid_key': False, 'valid_key': True} + self.assertEqual(self.json.dumps(v, skipkeys=True, indent=4), '{\n "valid_key": true\n}') + def test_encode_truefalse(self): self.assertEqual(self.dumps( {True: False, False: True}, sort_keys=True), diff --git a/Misc/NEWS.d/next/Library/2025-04-07-06-41-54.gh-issue-131884.ym9BJN.rst b/Misc/NEWS.d/next/Library/2025-04-07-06-41-54.gh-issue-131884.ym9BJN.rst new file mode 100644 index 00000000000000..d9e2eae02dce1f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-07-06-41-54.gh-issue-131884.ym9BJN.rst @@ -0,0 +1 @@ +Fix formatting issues in :func:`json.dump` when both *indent* and *skipkeys* are used. diff --git a/Modules/_json.c b/Modules/_json.c index 57678ad595f928..6b5f6ea42df4d1 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1603,6 +1603,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs if (*first) { *first = false; + if (s->indent != Py_None) { + if (write_newline_indent(writer, indent_level, indent_cache) < 0) { + Py_DECREF(keystr); + return -1; + } + } } else { if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { @@ -1670,11 +1676,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, if (s->indent != Py_None) { indent_level++; separator = get_item_separator(s, indent_level, indent_cache); - if (separator == NULL || - write_newline_indent(writer, indent_level, indent_cache) < 0) - { + if (separator == NULL) goto bail; - } } if (s->sort_keys || !PyDict_CheckExact(dct)) { @@ -1714,7 +1717,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, goto bail; Py_CLEAR(ident); } - if (s->indent != Py_None) { + if (s->indent != Py_None && !first) { indent_level--; if (write_newline_indent(writer, indent_level, indent_cache) < 0) { goto bail; From 3612d8f51741b11f36f8fb0494d79086bac9390a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 3 Jun 2025 12:42:11 +0200 Subject: [PATCH 466/989] gh-135034: Normalize link targets in tarfile, add `os.path.realpath(strict='allow_missing')` (#135037) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses CVEs 2024-12718, 2025-4138, 2025-4330, and 2025-4517. Signed-off-by: Łukasz Langa Co-authored-by: Petr Viktorin Co-authored-by: Seth Michael Larson Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Serhiy Storchaka --- Doc/library/os.path.rst | 32 +- Doc/library/tarfile.rst | 20 ++ Doc/whatsnew/3.15.rst | 32 ++ Lib/genericpath.py | 11 +- Lib/ntpath.py | 38 ++- Lib/posixpath.py | 57 ++-- Lib/tarfile.py | 163 +++++++-- Lib/test/test_ntpath.py | 214 ++++++++++-- Lib/test/test_posixpath.py | 252 +++++++++++--- Lib/test/test_tarfile.py | 310 +++++++++++++++++- ...-06-02-11-32-23.gh-issue-135034.RLGjbp.rst | 6 + 11 files changed, 966 insertions(+), 169 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index ecbbc1d7605f9f..f72aee19d8f332 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -408,9 +408,26 @@ the :mod:`glob` module.) system). On Windows, this function will also resolve MS-DOS (also called 8.3) style names such as ``C:\\PROGRA~1`` to ``C:\\Program Files``. - If a path doesn't exist or a symlink loop is encountered, and *strict* is - ``True``, :exc:`OSError` is raised. If *strict* is ``False`` these errors - are ignored, and so the result might be missing or otherwise inaccessible. + By default, the path is evaluated up to the first component that does not + exist, is a symlink loop, or whose evaluation raises :exc:`OSError`. + All such components are appended unchanged to the existing part of the path. + + Some errors that are handled this way include "access denied", "not a + directory", or "bad argument to internal function". Thus, the + resulting path may be missing or inaccessible, may still contain + links or loops, and may traverse non-directories. + + This behavior can be modified by keyword arguments: + + If *strict* is ``True``, the first error encountered when evaluating the path is + re-raised. + In particular, :exc:`FileNotFoundError` is raised if *path* does not exist, + or another :exc:`OSError` if it is otherwise inaccessible. + + If *strict* is :py:data:`os.path.ALLOW_MISSING`, errors other than + :exc:`FileNotFoundError` are re-raised (as with ``strict=True``). + Thus, the returned path will not contain any symbolic links, but the named + file and some of its parent directories may be missing. .. note:: This function emulates the operating system's procedure for making a path @@ -429,6 +446,15 @@ the :mod:`glob` module.) .. versionchanged:: 3.10 The *strict* parameter was added. + .. versionchanged:: next + The :py:data:`~os.path.ALLOW_MISSING` value for the *strict* parameter + was added. + +.. data:: ALLOW_MISSING + + Special value used for the *strict* argument in :func:`realpath`. + + .. versionadded:: next .. function:: relpath(path, start=os.curdir) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index f9cb5495e60cd2..7cec108a5bd41d 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -255,6 +255,15 @@ The :mod:`tarfile` module defines the following exceptions: Raised to refuse extracting a symbolic link pointing outside the destination directory. +.. exception:: LinkFallbackError + + Raised to refuse emulating a link (hard or symbolic) by extracting another + archive member, when that member would be rejected by the filter location. + The exception that was raised to reject the replacement member is available + as :attr:`!BaseException.__context__`. + + .. versionadded:: next + The following constants are available at the module level: @@ -1068,6 +1077,12 @@ reused in custom filters: Implements the ``'data'`` filter. In addition to what ``tar_filter`` does: + - Normalize link targets (:attr:`TarInfo.linkname`) using + :func:`os.path.normpath`. + Note that this removes internal ``..`` components, which may change the + meaning of the link if the path in :attr:`!TarInfo.linkname` traverses + symbolic links. + - :ref:`Refuse ` to extract links (hard or soft) that link to absolute paths, or ones that link outside the destination. @@ -1099,6 +1114,10 @@ reused in custom filters: Note that this filter does not block *all* dangerous archive features. See :ref:`tarfile-further-verification` for details. + .. versionchanged:: next + + Link targets are now normalized. + .. _tarfile-extraction-refuse: @@ -1127,6 +1146,7 @@ Here is an incomplete list of things to consider: * Extract to a :func:`new temporary directory ` to prevent e.g. exploiting pre-existing links, and to make it easier to clean up after a failed extraction. +* Disallow symbolic links if you do not need the functionality. * When working with untrusted data, use external (e.g. OS-level) limits on disk, memory and CPU usage. * Check filenames against an allow-list of characters diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 2fe33c4c535919..daf3e8fb6c2c2b 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -116,6 +116,16 @@ math (Contributed by Sergey B Kirpichev in :gh:`132908`.) +os.path +------- + +* The *strict* parameter to :func:`os.path.realpath` accepts a new value, + :data:`os.path.ALLOW_MISSING`. + If used, errors other than :exc:`FileNotFoundError` will be re-raised; + the resulting path can be missing but it will be free of symlinks. + (Contributed by Petr Viktorin for :cve:`2025-4517`.) + + shelve ------ @@ -132,6 +142,28 @@ ssl (Contributed by Will Childs-Klein in :gh:`133624`.) +tarfile +------- + +* :func:`~tarfile.data_filter` now normalizes symbolic link targets in order to + avoid path traversal attacks. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2025-4138`.) +* :func:`~tarfile.TarFile.extractall` now skips fixing up directory attributes + when a directory was removed or replaced by another kind of file. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2024-12718`.) +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + now (re-)apply the extraction filter when substituting a link (hard or + symbolic) with a copy of another archive member, and when fixing up + directory attributes. + The former raises a new exception, :exc:`~tarfile.LinkFallbackError`. + (Contributed by Petr Viktorin for :cve:`2025-4330` and :cve:`2024-12718`.) +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + no longer extract rejected members when + :func:`~tarfile.TarFile.errorlevel` is zero. + (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` + and :cve:`2025-4435`.) + + zlib ---- diff --git a/Lib/genericpath.py b/Lib/genericpath.py index ba7b0a13c7f81d..9363f564aab7a6 100644 --- a/Lib/genericpath.py +++ b/Lib/genericpath.py @@ -8,7 +8,7 @@ __all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', 'getsize', 'isdevdrive', 'isdir', 'isfile', 'isjunction', 'islink', - 'lexists', 'samefile', 'sameopenfile', 'samestat'] + 'lexists', 'samefile', 'sameopenfile', 'samestat', 'ALLOW_MISSING'] # Does a path exist? @@ -189,3 +189,12 @@ def _check_arg_types(funcname, *args): f'os.PathLike object, not {s.__class__.__name__!r}') from None if hasstr and hasbytes: raise TypeError("Can't mix strings and bytes in path components") from None + +# A singleton with a true boolean value. +@object.__new__ +class ALLOW_MISSING: + """Special value for use in realpath().""" + def __repr__(self): + return 'os.path.ALLOW_MISSING' + def __reduce__(self): + return self.__class__.__name__ diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 52ff2af743af6c..9cdc16480f9afe 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -29,7 +29,7 @@ "abspath","curdir","pardir","sep","pathsep","defpath","altsep", "extsep","devnull","realpath","supports_unicode_filenames","relpath", "samefile", "sameopenfile", "samestat", "commonpath", "isjunction", - "isdevdrive"] + "isdevdrive", "ALLOW_MISSING"] def _get_bothseps(path): if isinstance(path, bytes): @@ -601,9 +601,10 @@ def abspath(path): from nt import _findfirstfile, _getfinalpathname, readlink as _nt_readlink except ImportError: # realpath is a no-op on systems without _getfinalpathname support. - realpath = abspath + def realpath(path, *, strict=False): + return abspath(path) else: - def _readlink_deep(path): + def _readlink_deep(path, ignored_error=OSError): # These error codes indicate that we should stop reading links and # return the path we currently have. # 1: ERROR_INVALID_FUNCTION @@ -636,7 +637,7 @@ def _readlink_deep(path): path = old_path break path = normpath(join(dirname(old_path), path)) - except OSError as ex: + except ignored_error as ex: if ex.winerror in allowed_winerror: break raise @@ -645,7 +646,7 @@ def _readlink_deep(path): break return path - def _getfinalpathname_nonstrict(path): + def _getfinalpathname_nonstrict(path, ignored_error=OSError): # These error codes indicate that we should stop resolving the path # and return the value we currently have. # 1: ERROR_INVALID_FUNCTION @@ -673,17 +674,18 @@ def _getfinalpathname_nonstrict(path): try: path = _getfinalpathname(path) return join(path, tail) if tail else path - except OSError as ex: + except ignored_error as ex: if ex.winerror not in allowed_winerror: raise try: # The OS could not resolve this path fully, so we attempt # to follow the link ourselves. If we succeed, join the tail # and return. - new_path = _readlink_deep(path) + new_path = _readlink_deep(path, + ignored_error=ignored_error) if new_path != path: return join(new_path, tail) if tail else new_path - except OSError: + except ignored_error: # If we fail to readlink(), let's keep traversing pass # If we get these errors, try to get the real name of the file without accessing it. @@ -691,7 +693,7 @@ def _getfinalpathname_nonstrict(path): try: name = _findfirstfile(path) path, _ = split(path) - except OSError: + except ignored_error: path, name = split(path) else: path, name = split(path) @@ -721,6 +723,15 @@ def realpath(path, *, strict=False): if normcase(path) == devnull: return '\\\\.\\NUL' had_prefix = path.startswith(prefix) + + if strict is ALLOW_MISSING: + ignored_error = FileNotFoundError + strict = True + elif strict: + ignored_error = () + else: + ignored_error = OSError + if not had_prefix and not isabs(path): path = join(cwd, path) try: @@ -728,17 +739,16 @@ def realpath(path, *, strict=False): initial_winerror = 0 except ValueError as ex: # gh-106242: Raised for embedded null characters - # In strict mode, we convert into an OSError. + # In strict modes, we convert into an OSError. # Non-strict mode returns the path as-is, since we've already # made it absolute. if strict: raise OSError(str(ex)) from None path = normpath(path) - except OSError as ex: - if strict: - raise + except ignored_error as ex: initial_winerror = ex.winerror - path = _getfinalpathname_nonstrict(path) + path = _getfinalpathname_nonstrict(path, + ignored_error=ignored_error) # The path returned by _getfinalpathname will always start with \\?\ - # strip off that prefix unless it was already provided on the original # path. diff --git a/Lib/posixpath.py b/Lib/posixpath.py index db72ded8826056..d38f3bd5872bcd 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -36,7 +36,7 @@ "samefile","sameopenfile","samestat", "curdir","pardir","sep","pathsep","defpath","altsep","extsep", "devnull","realpath","supports_unicode_filenames","relpath", - "commonpath", "isjunction","isdevdrive"] + "commonpath", "isjunction","isdevdrive","ALLOW_MISSING"] def _get_sep(path): @@ -402,10 +402,18 @@ def realpath(filename, *, strict=False): curdir = '.' pardir = '..' getcwd = os.getcwd - return _realpath(filename, strict, sep, curdir, pardir, getcwd) + if strict is ALLOW_MISSING: + ignored_error = FileNotFoundError + strict = True + elif strict: + ignored_error = () + else: + ignored_error = OSError + + lstat = os.lstat + readlink = os.readlink + maxlinks = None -def _realpath(filename, strict=False, sep=sep, curdir=curdir, pardir=pardir, - getcwd=os.getcwd, lstat=os.lstat, readlink=os.readlink, maxlinks=None): # The stack of unresolved path parts. When popped, a special value of None # indicates that a symlink target has been resolved, and that the original # symlink path can be retrieved by popping again. The [::-1] slice is a @@ -477,27 +485,28 @@ def _realpath(filename, strict=False, sep=sep, curdir=curdir, pardir=pardir, path = newpath continue target = readlink(newpath) - except OSError: - if strict: - raise - path = newpath + except ignored_error: + pass + else: + # Resolve the symbolic link + if target.startswith(sep): + # Symlink target is absolute; reset resolved path. + path = sep + if maxlinks is None: + # Mark this symlink as seen but not fully resolved. + seen[newpath] = None + # Push the symlink path onto the stack, and signal its specialness + # by also pushing None. When these entries are popped, we'll + # record the fully-resolved symlink target in the 'seen' mapping. + rest.append(newpath) + rest.append(None) + # Push the unresolved symlink target parts onto the stack. + target_parts = target.split(sep)[::-1] + rest.extend(target_parts) + part_count += len(target_parts) continue - # Resolve the symbolic link - if target.startswith(sep): - # Symlink target is absolute; reset resolved path. - path = sep - if maxlinks is None: - # Mark this symlink as seen but not fully resolved. - seen[newpath] = None - # Push the symlink path onto the stack, and signal its specialness - # by also pushing None. When these entries are popped, we'll - # record the fully-resolved symlink target in the 'seen' mapping. - rest.append(newpath) - rest.append(None) - # Push the unresolved symlink target parts onto the stack. - target_parts = target.split(sep)[::-1] - rest.extend(target_parts) - part_count += len(target_parts) + # An error occurred and was ignored. + path = newpath return path diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 212b71f6509740..068aa13ed70356 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -67,7 +67,7 @@ "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter", "tar_filter", "FilterError", "AbsoluteLinkError", "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", - "LinkOutsideDestinationError"] + "LinkOutsideDestinationError", "LinkFallbackError"] #--------------------------------------------------------- @@ -766,10 +766,22 @@ def __init__(self, tarinfo, path): super().__init__(f'{tarinfo.name!r} would link to {path!r}, ' + 'which is outside the destination') +class LinkFallbackError(FilterError): + def __init__(self, tarinfo, path): + self.tarinfo = tarinfo + self._path = path + super().__init__(f'link {tarinfo.name!r} would be extracted as a ' + + f'copy of {path!r}, which was rejected') + +# Errors caused by filters -- both "fatal" and "non-fatal" -- that +# we consider to be issues with the argument, rather than a bug in the +# filter function +_FILTER_ERRORS = (FilterError, OSError, ExtractError) + def _get_filtered_attrs(member, dest_path, for_data=True): new_attrs = {} name = member.name - dest_path = os.path.realpath(dest_path) + dest_path = os.path.realpath(dest_path, strict=os.path.ALLOW_MISSING) # Strip leading / (tar's directory separator) from filenames. # Include os.sep (target OS directory separator) as well. if name.startswith(('/', os.sep)): @@ -779,7 +791,8 @@ def _get_filtered_attrs(member, dest_path, for_data=True): # For example, 'C:/foo' on Windows. raise AbsolutePathError(member) # Ensure we stay in the destination - target_path = os.path.realpath(os.path.join(dest_path, name)) + target_path = os.path.realpath(os.path.join(dest_path, name), + strict=os.path.ALLOW_MISSING) if os.path.commonpath([target_path, dest_path]) != dest_path: raise OutsideDestinationError(member, target_path) # Limit permissions (no high bits, and go-w) @@ -817,6 +830,9 @@ def _get_filtered_attrs(member, dest_path, for_data=True): if member.islnk() or member.issym(): if os.path.isabs(member.linkname): raise AbsoluteLinkError(member) + normalized = os.path.normpath(member.linkname) + if normalized != member.linkname: + new_attrs['linkname'] = normalized if member.issym(): target_path = os.path.join(dest_path, os.path.dirname(name), @@ -824,7 +840,8 @@ def _get_filtered_attrs(member, dest_path, for_data=True): else: target_path = os.path.join(dest_path, member.linkname) - target_path = os.path.realpath(target_path) + target_path = os.path.realpath(target_path, + strict=os.path.ALLOW_MISSING) if os.path.commonpath([target_path, dest_path]) != dest_path: raise LinkOutsideDestinationError(member, target_path) return new_attrs @@ -2386,30 +2403,58 @@ def extractall(self, path=".", members=None, *, numeric_owner=False, members = self for member in members: - tarinfo = self._get_extract_tarinfo(member, filter_function, path) + tarinfo, unfiltered = self._get_extract_tarinfo( + member, filter_function, path) if tarinfo is None: continue if tarinfo.isdir(): # For directories, delay setting attributes until later, # since permissions can interfere with extraction and # extracting contents can reset mtime. - directories.append(tarinfo) + directories.append(unfiltered) self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), - numeric_owner=numeric_owner) + numeric_owner=numeric_owner, + filter_function=filter_function) # Reverse sort directories. directories.sort(key=lambda a: a.name, reverse=True) + # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) + for unfiltered in directories: try: + # Need to re-apply any filter, to take the *current* filesystem + # state into account. + try: + tarinfo = filter_function(unfiltered, path) + except _FILTER_ERRORS as exc: + self._log_no_directory_fixup(unfiltered, repr(exc)) + continue + if tarinfo is None: + self._log_no_directory_fixup(unfiltered, + 'excluded by filter') + continue + dirpath = os.path.join(path, tarinfo.name) + try: + lstat = os.lstat(dirpath) + except FileNotFoundError: + self._log_no_directory_fixup(tarinfo, 'missing') + continue + if not stat.S_ISDIR(lstat.st_mode): + # This is no longer a directory; presumably a later + # member overwrote the entry. + self._log_no_directory_fixup(tarinfo, 'not a directory') + continue self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) self.utime(tarinfo, dirpath) self.chmod(tarinfo, dirpath) except ExtractError as e: self._handle_nonfatal_error(e) + def _log_no_directory_fixup(self, member, reason): + self._dbg(2, "tarfile: Not fixing up directory %r (%s)" % + (member.name, reason)) + def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, filter=None): """Extract a member from the archive to the current working directory, @@ -2425,41 +2470,56 @@ def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, String names of common filters are accepted. """ filter_function = self._get_filter_function(filter) - tarinfo = self._get_extract_tarinfo(member, filter_function, path) + tarinfo, unfiltered = self._get_extract_tarinfo( + member, filter_function, path) if tarinfo is not None: self._extract_one(tarinfo, path, set_attrs, numeric_owner) def _get_extract_tarinfo(self, member, filter_function, path): - """Get filtered TarInfo (or None) from member, which might be a str""" + """Get (filtered, unfiltered) TarInfos from *member* + + *member* might be a string. + + Return (None, None) if not found. + """ + if isinstance(member, str): - tarinfo = self.getmember(member) + unfiltered = self.getmember(member) else: - tarinfo = member + unfiltered = member - unfiltered = tarinfo + filtered = None try: - tarinfo = filter_function(tarinfo, path) + filtered = filter_function(unfiltered, path) except (OSError, UnicodeEncodeError, FilterError) as e: self._handle_fatal_error(e) except ExtractError as e: self._handle_nonfatal_error(e) - if tarinfo is None: + if filtered is None: self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) - return None + return None, None + # Prepare the link target for makelink(). - if tarinfo.islnk(): - tarinfo = copy.copy(tarinfo) - tarinfo._link_target = os.path.join(path, tarinfo.linkname) - return tarinfo + if filtered.islnk(): + filtered = copy.copy(filtered) + filtered._link_target = os.path.join(path, filtered.linkname) + return filtered, unfiltered + + def _extract_one(self, tarinfo, path, set_attrs, numeric_owner, + filter_function=None): + """Extract from filtered tarinfo to disk. - def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): - """Extract from filtered tarinfo to disk""" + filter_function is only used when extracting a *different* + member (e.g. as fallback to creating a symlink) + """ self._check("r") try: self._extract_member(tarinfo, os.path.join(path, tarinfo.name), set_attrs=set_attrs, - numeric_owner=numeric_owner) + numeric_owner=numeric_owner, + filter_function=filter_function, + extraction_root=path) except (OSError, UnicodeEncodeError) as e: self._handle_fatal_error(e) except ExtractError as e: @@ -2517,9 +2577,13 @@ def extractfile(self, member): return None def _extract_member(self, tarinfo, targetpath, set_attrs=True, - numeric_owner=False): - """Extract the TarInfo object tarinfo to a physical + numeric_owner=False, *, filter_function=None, + extraction_root=None): + """Extract the filtered TarInfo object tarinfo to a physical file called targetpath. + + filter_function is only used when extracting a *different* + member (e.g. as fallback to creating a symlink) """ # Fetch the TarInfo object for the given name # and build the destination pathname, replacing @@ -2548,7 +2612,10 @@ def _extract_member(self, tarinfo, targetpath, set_attrs=True, elif tarinfo.ischr() or tarinfo.isblk(): self.makedev(tarinfo, targetpath) elif tarinfo.islnk() or tarinfo.issym(): - self.makelink(tarinfo, targetpath) + self.makelink_with_filter( + tarinfo, targetpath, + filter_function=filter_function, + extraction_root=extraction_root) elif tarinfo.type not in SUPPORTED_TYPES: self.makeunknown(tarinfo, targetpath) else: @@ -2631,10 +2698,18 @@ def makedev(self, tarinfo, targetpath): os.makedev(tarinfo.devmajor, tarinfo.devminor)) def makelink(self, tarinfo, targetpath): + return self.makelink_with_filter(tarinfo, targetpath, None, None) + + def makelink_with_filter(self, tarinfo, targetpath, + filter_function, extraction_root): """Make a (symbolic) link called targetpath. If it cannot be created (platform limitation), we try to make a copy of the referenced file instead of a link. + + filter_function is only used when extracting a *different* + member (e.g. as fallback to creating a link). """ + keyerror_to_extracterror = False try: # For systems that support symbolic and hard links. if tarinfo.issym(): @@ -2642,18 +2717,38 @@ def makelink(self, tarinfo, targetpath): # Avoid FileExistsError on following os.symlink. os.unlink(targetpath) os.symlink(tarinfo.linkname, targetpath) + return else: if os.path.exists(tarinfo._link_target): os.link(tarinfo._link_target, targetpath) - else: - self._extract_member(self._find_link_target(tarinfo), - targetpath) + return except symlink_exception: + keyerror_to_extracterror = True + + try: + unfiltered = self._find_link_target(tarinfo) + except KeyError: + if keyerror_to_extracterror: + raise ExtractError( + "unable to resolve link inside archive") from None + else: + raise + + if filter_function is None: + filtered = unfiltered + else: + if extraction_root is None: + raise ExtractError( + "makelink_with_filter: if filter_function is not None, " + + "extraction_root must also not be None") try: - self._extract_member(self._find_link_target(tarinfo), - targetpath) - except KeyError: - raise ExtractError("unable to resolve link inside archive") from None + filtered = filter_function(unfiltered, extraction_root) + except _FILTER_ERRORS as cause: + raise LinkFallbackError(tarinfo, unfiltered.name) from cause + if filtered is not None: + self._extract_member(filtered, targetpath, + filter_function=filter_function, + extraction_root=extraction_root) def chown(self, tarinfo, targetpath, numeric_owner): """Set owner of targetpath according to tarinfo. If numeric_owner diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index c3b0bdaebc2329..b891d0734ca0a6 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -6,6 +6,7 @@ import sys import unittest import warnings +from ntpath import ALLOW_MISSING from test.support import TestFailed, cpython_only, os_helper from test.support.os_helper import FakePath from test import test_genericpath @@ -76,6 +77,27 @@ def tester(fn, wantResult): %(str(fn), str(wantResult), repr(gotResult))) +def _parameterize(*parameters): + """Simplistic decorator to parametrize a test + + Runs the decorated test multiple times in subTest, with a value from + 'parameters' passed as an extra positional argument. + Calls doCleanups() after each run. + + Not for general use. Intended to avoid indenting for easier backports. + + See https://discuss.python.org/t/91827 for discussing generalizations. + """ + def _parametrize_decorator(func): + def _parameterized(self, *args, **kwargs): + for parameter in parameters: + with self.subTest(parameter): + func(self, *args, parameter, **kwargs) + self.doCleanups() + return _parameterized + return _parametrize_decorator + + class NtpathTestCase(unittest.TestCase): def assertPathEqual(self, path1, path2): if path1 == path2 or _norm(path1) == _norm(path2): @@ -474,6 +496,27 @@ def test_realpath_curdir(self): tester("ntpath.realpath('.\\.')", expected) tester("ntpath.realpath('\\'.join(['.'] * 100))", expected) + def test_realpath_curdir_strict(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('.', strict=True)", expected) + tester("ntpath.realpath('./.', strict=True)", expected) + tester("ntpath.realpath('/'.join(['.'] * 100), strict=True)", expected) + tester("ntpath.realpath('.\\.', strict=True)", expected) + tester("ntpath.realpath('\\'.join(['.'] * 100), strict=True)", expected) + + def test_realpath_curdir_missing_ok(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('.', strict=ALLOW_MISSING)", + expected) + tester("ntpath.realpath('./.', strict=ALLOW_MISSING)", + expected) + tester("ntpath.realpath('/'.join(['.'] * 100), strict=ALLOW_MISSING)", + expected) + tester("ntpath.realpath('.\\.', strict=ALLOW_MISSING)", + expected) + tester("ntpath.realpath('\\'.join(['.'] * 100), strict=ALLOW_MISSING)", + expected) + def test_realpath_pardir(self): expected = ntpath.normpath(os.getcwd()) tester("ntpath.realpath('..')", ntpath.dirname(expected)) @@ -486,24 +529,59 @@ def test_realpath_pardir(self): tester("ntpath.realpath('\\'.join(['..'] * 50))", ntpath.splitdrive(expected)[0] + '\\') + def test_realpath_pardir_strict(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('..', strict=True)", ntpath.dirname(expected)) + tester("ntpath.realpath('../..', strict=True)", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('/'.join(['..'] * 50), strict=True)", + ntpath.splitdrive(expected)[0] + '\\') + tester("ntpath.realpath('..\\..', strict=True)", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('\\'.join(['..'] * 50), strict=True)", + ntpath.splitdrive(expected)[0] + '\\') + + def test_realpath_pardir_missing_ok(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('..', strict=ALLOW_MISSING)", + ntpath.dirname(expected)) + tester("ntpath.realpath('../..', strict=ALLOW_MISSING)", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('/'.join(['..'] * 50), strict=ALLOW_MISSING)", + ntpath.splitdrive(expected)[0] + '\\') + tester("ntpath.realpath('..\\..', strict=ALLOW_MISSING)", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('\\'.join(['..'] * 50), strict=ALLOW_MISSING)", + ntpath.splitdrive(expected)[0] + '\\') + @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') - def test_realpath_basic(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_basic(self, kwargs): ABSTFN = ntpath.abspath(os_helper.TESTFN) open(ABSTFN, "wb").close() self.addCleanup(os_helper.unlink, ABSTFN) self.addCleanup(os_helper.unlink, ABSTFN + "1") os.symlink(ABSTFN, ABSTFN + "1") - self.assertPathEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) - self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")), + self.assertPathEqual(ntpath.realpath(ABSTFN + "1", **kwargs), ABSTFN) + self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1"), **kwargs), os.fsencode(ABSTFN)) # gh-88013: call ntpath.realpath with binary drive name may raise a # TypeError. The drive should not exist to reproduce the bug. drives = {f"{c}:\\" for c in string.ascii_uppercase} - set(os.listdrives()) d = drives.pop().encode() - self.assertEqual(ntpath.realpath(d), d) + self.assertEqual(ntpath.realpath(d, strict=False), d) + + # gh-106242: Embedded nulls and non-strict fallback to abspath + if kwargs: + with self.assertRaises(OSError): + ntpath.realpath(os_helper.TESTFN + "\0spam", + **kwargs) + else: + self.assertEqual(ABSTFN + "\0spam", + ntpath.realpath(os_helper.TESTFN + "\0spam", **kwargs)) @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @@ -526,51 +604,66 @@ def test_realpath_invalid_paths(self): self.assertEqual(realpath(path, strict=False), path) # gh-106242: Embedded nulls should raise OSError (not ValueError) self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) path = ABSTFNb + b'\x00' self.assertEqual(realpath(path, strict=False), path) self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) path = ABSTFN + '\\nonexistent\\x\x00' self.assertEqual(realpath(path, strict=False), path) self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) path = ABSTFNb + b'\\nonexistent\\x\x00' self.assertEqual(realpath(path, strict=False), path) self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) path = ABSTFN + '\x00\\..' self.assertEqual(realpath(path, strict=False), os.getcwd()) self.assertEqual(realpath(path, strict=True), os.getcwd()) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), os.getcwd()) path = ABSTFNb + b'\x00\\..' self.assertEqual(realpath(path, strict=False), os.getcwdb()) self.assertEqual(realpath(path, strict=True), os.getcwdb()) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), os.getcwdb()) path = ABSTFN + '\\nonexistent\\x\x00\\..' self.assertEqual(realpath(path, strict=False), ABSTFN + '\\nonexistent') self.assertRaises(OSError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), ABSTFN + '\\nonexistent') path = ABSTFNb + b'\\nonexistent\\x\x00\\..' self.assertEqual(realpath(path, strict=False), ABSTFNb + b'\\nonexistent') self.assertRaises(OSError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), ABSTFNb + b'\\nonexistent') + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_invalid_unicode_paths(self, kwargs): + realpath = ntpath.realpath + ABSTFN = ntpath.abspath(os_helper.TESTFN) + ABSTFNb = os.fsencode(ABSTFN) path = ABSTFNb + b'\xff' - self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) - self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) path = ABSTFNb + b'\\nonexistent\\\xff' - self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) - self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) path = ABSTFNb + b'\xff\\..' - self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) - self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) path = ABSTFNb + b'\\nonexistent\\\xff\\..' - self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) - self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') - def test_realpath_relative(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_relative(self, kwargs): ABSTFN = ntpath.abspath(os_helper.TESTFN) open(ABSTFN, "wb").close() self.addCleanup(os_helper.unlink, ABSTFN) self.addCleanup(os_helper.unlink, ABSTFN + "1") os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1")) - self.assertPathEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) + self.assertPathEqual(ntpath.realpath(ABSTFN + "1", **kwargs), ABSTFN) @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @@ -722,7 +815,62 @@ def test_realpath_symlink_loops_strict(self): @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') - def test_realpath_symlink_prefix(self): + def test_realpath_symlink_loops_raise(self): + # Symlink loops raise OSError in ALLOW_MISSING mode + ABSTFN = ntpath.abspath(os_helper.TESTFN) + self.addCleanup(os_helper.unlink, ABSTFN) + self.addCleanup(os_helper.unlink, ABSTFN + "1") + self.addCleanup(os_helper.unlink, ABSTFN + "2") + self.addCleanup(os_helper.unlink, ABSTFN + "y") + self.addCleanup(os_helper.unlink, ABSTFN + "c") + self.addCleanup(os_helper.unlink, ABSTFN + "a") + self.addCleanup(os_helper.unlink, ABSTFN + "x") + + os.symlink(ABSTFN, ABSTFN) + self.assertRaises(OSError, ntpath.realpath, ABSTFN, strict=ALLOW_MISSING) + + os.symlink(ABSTFN + "1", ABSTFN + "2") + os.symlink(ABSTFN + "2", ABSTFN + "1") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1", + strict=ALLOW_MISSING) + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "2", + strict=ALLOW_MISSING) + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\x", + strict=ALLOW_MISSING) + + # Windows eliminates '..' components before resolving links; + # realpath is not expected to raise if this removes the loop. + self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\.."), + ntpath.dirname(ABSTFN)) + self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..\\x"), + ntpath.dirname(ABSTFN) + "\\x") + + os.symlink(ABSTFN + "x", ABSTFN + "y") + self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..\\" + + ntpath.basename(ABSTFN) + "y"), + ABSTFN + "x") + self.assertRaises( + OSError, ntpath.realpath, + ABSTFN + "1\\..\\" + ntpath.basename(ABSTFN) + "1", + strict=ALLOW_MISSING) + + os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "a", + strict=ALLOW_MISSING) + + os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN)) + + "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "c", + strict=ALLOW_MISSING) + + # Test using relative path as well. + self.assertRaises(OSError, ntpath.realpath, ntpath.basename(ABSTFN), + strict=ALLOW_MISSING) + + @os_helper.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_symlink_prefix(self, kwargs): ABSTFN = ntpath.abspath(os_helper.TESTFN) self.addCleanup(os_helper.unlink, ABSTFN + "3") self.addCleanup(os_helper.unlink, "\\\\?\\" + ABSTFN + "3.") @@ -737,9 +885,9 @@ def test_realpath_symlink_prefix(self): f.write(b'1') os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link") - self.assertPathEqual(ntpath.realpath(ABSTFN + "3link"), + self.assertPathEqual(ntpath.realpath(ABSTFN + "3link", **kwargs), ABSTFN + "3") - self.assertPathEqual(ntpath.realpath(ABSTFN + "3.link"), + self.assertPathEqual(ntpath.realpath(ABSTFN + "3.link", **kwargs), "\\\\?\\" + ABSTFN + "3.") # Resolved paths should be usable to open target files @@ -749,14 +897,17 @@ def test_realpath_symlink_prefix(self): self.assertEqual(f.read(), b'1') # When the prefix is included, it is not stripped - self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"), + self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link", **kwargs), "\\\\?\\" + ABSTFN + "3") - self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"), + self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link", **kwargs), "\\\\?\\" + ABSTFN + "3.") @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') def test_realpath_nul(self): tester("ntpath.realpath('NUL')", r'\\.\NUL') + tester("ntpath.realpath('NUL', strict=False)", r'\\.\NUL') + tester("ntpath.realpath('NUL', strict=True)", r'\\.\NUL') + tester("ntpath.realpath('NUL', strict=ALLOW_MISSING)", r'\\.\NUL') @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @unittest.skipUnless(HAVE_GETSHORTPATHNAME, 'need _getshortpathname') @@ -780,12 +931,20 @@ def test_realpath_cwd(self): self.assertPathEqual(test_file_long, ntpath.realpath(test_file_short)) - with os_helper.change_cwd(test_dir_long): - self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) - with os_helper.change_cwd(test_dir_long.lower()): - self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) - with os_helper.change_cwd(test_dir_short): - self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) + for kwargs in {}, {'strict': True}, {'strict': ALLOW_MISSING}: + with self.subTest(**kwargs): + with os_helper.change_cwd(test_dir_long): + self.assertPathEqual( + test_file_long, + ntpath.realpath("file.txt", **kwargs)) + with os_helper.change_cwd(test_dir_long.lower()): + self.assertPathEqual( + test_file_long, + ntpath.realpath("file.txt", **kwargs)) + with os_helper.change_cwd(test_dir_short): + self.assertPathEqual( + test_file_long, + ntpath.realpath("file.txt", **kwargs)) @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') def test_realpath_permission(self): @@ -806,12 +965,15 @@ def test_realpath_permission(self): # Automatic generation of short names may be disabled on # NTFS volumes for the sake of performance. # They're not supported at all on ReFS and exFAT. - subprocess.run( + p = subprocess.run( # Try to set the short name manually. ['fsutil.exe', 'file', 'setShortName', test_file, 'LONGFI~1.TXT'], creationflags=subprocess.DETACHED_PROCESS ) + if p.returncode: + raise unittest.SkipTest('failed to set short name') + try: self.assertPathEqual(test_file, ntpath.realpath(test_file_short)) except AssertionError: diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index f3f9895f529470..c45ce6d3ef7820 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -4,7 +4,8 @@ import random import sys import unittest -from posixpath import realpath, abspath, dirname, basename +from functools import partial +from posixpath import realpath, abspath, dirname, basename, ALLOW_MISSING from test import support from test import test_genericpath from test.support import import_helper @@ -33,6 +34,27 @@ def skip_if_ABSTFN_contains_backslash(test): msg = "ABSTFN is not a posix path - tests fail" return [test, unittest.skip(msg)(test)][found_backslash] + +def _parameterize(*parameters): + """Simplistic decorator to parametrize a test + + Runs the decorated test multiple times in subTest, with a value from + 'parameters' passed as an extra positional argument. + Does *not* call doCleanups() after each run. + + Not for general use. Intended to avoid indenting for easier backports. + + See https://discuss.python.org/t/91827 for discussing generalizations. + """ + def _parametrize_decorator(func): + def _parameterized(self, *args, **kwargs): + for parameter in parameters: + with self.subTest(parameter): + func(self, *args, parameter, **kwargs) + return _parameterized + return _parametrize_decorator + + class PosixPathTest(unittest.TestCase): def setUp(self): @@ -442,32 +464,35 @@ def test_normpath(self): self.assertEqual(result, expected) @skip_if_ABSTFN_contains_backslash - def test_realpath_curdir(self): - self.assertEqual(realpath('.'), os.getcwd()) - self.assertEqual(realpath('./.'), os.getcwd()) - self.assertEqual(realpath('/'.join(['.'] * 100)), os.getcwd()) + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_curdir(self, kwargs): + self.assertEqual(realpath('.', **kwargs), os.getcwd()) + self.assertEqual(realpath('./.', **kwargs), os.getcwd()) + self.assertEqual(realpath('/'.join(['.'] * 100), **kwargs), os.getcwd()) - self.assertEqual(realpath(b'.'), os.getcwdb()) - self.assertEqual(realpath(b'./.'), os.getcwdb()) - self.assertEqual(realpath(b'/'.join([b'.'] * 100)), os.getcwdb()) + self.assertEqual(realpath(b'.', **kwargs), os.getcwdb()) + self.assertEqual(realpath(b'./.', **kwargs), os.getcwdb()) + self.assertEqual(realpath(b'/'.join([b'.'] * 100), **kwargs), os.getcwdb()) @skip_if_ABSTFN_contains_backslash - def test_realpath_pardir(self): - self.assertEqual(realpath('..'), dirname(os.getcwd())) - self.assertEqual(realpath('../..'), dirname(dirname(os.getcwd()))) - self.assertEqual(realpath('/'.join(['..'] * 100)), '/') + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_pardir(self, kwargs): + self.assertEqual(realpath('..', **kwargs), dirname(os.getcwd())) + self.assertEqual(realpath('../..', **kwargs), dirname(dirname(os.getcwd()))) + self.assertEqual(realpath('/'.join(['..'] * 100), **kwargs), '/') - self.assertEqual(realpath(b'..'), dirname(os.getcwdb())) - self.assertEqual(realpath(b'../..'), dirname(dirname(os.getcwdb()))) - self.assertEqual(realpath(b'/'.join([b'..'] * 100)), b'/') + self.assertEqual(realpath(b'..', **kwargs), dirname(os.getcwdb())) + self.assertEqual(realpath(b'../..', **kwargs), dirname(dirname(os.getcwdb()))) + self.assertEqual(realpath(b'/'.join([b'..'] * 100), **kwargs), b'/') @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_basic(self): + @_parameterize({}, {'strict': ALLOW_MISSING}) + def test_realpath_basic(self, kwargs): # Basic operation. try: os.symlink(ABSTFN+"1", ABSTFN) - self.assertEqual(realpath(ABSTFN), ABSTFN+"1") + self.assertEqual(realpath(ABSTFN, **kwargs), ABSTFN+"1") finally: os_helper.unlink(ABSTFN) @@ -487,90 +512,115 @@ def test_realpath_invalid_paths(self): path = '/\x00' self.assertRaises(ValueError, realpath, path, strict=False) self.assertRaises(ValueError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) path = b'/\x00' self.assertRaises(ValueError, realpath, path, strict=False) self.assertRaises(ValueError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) path = '/nonexistent/x\x00' self.assertRaises(ValueError, realpath, path, strict=False) self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) path = b'/nonexistent/x\x00' self.assertRaises(ValueError, realpath, path, strict=False) self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) path = '/\x00/..' self.assertRaises(ValueError, realpath, path, strict=False) self.assertRaises(ValueError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) path = b'/\x00/..' self.assertRaises(ValueError, realpath, path, strict=False) self.assertRaises(ValueError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) + path = '/nonexistent/x\x00/..' self.assertRaises(ValueError, realpath, path, strict=False) self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) path = b'/nonexistent/x\x00/..' self.assertRaises(ValueError, realpath, path, strict=False) self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) path = '/\udfff' if sys.platform == 'win32': self.assertEqual(realpath(path, strict=False), path) self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), path) else: self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) self.assertRaises(UnicodeEncodeError, realpath, path, strict=True) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) path = '/nonexistent/\udfff' if sys.platform == 'win32': self.assertEqual(realpath(path, strict=False), path) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), path) else: self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) self.assertRaises(FileNotFoundError, realpath, path, strict=True) path = '/\udfff/..' if sys.platform == 'win32': self.assertEqual(realpath(path, strict=False), '/') self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), '/') else: self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) self.assertRaises(UnicodeEncodeError, realpath, path, strict=True) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) path = '/nonexistent/\udfff/..' if sys.platform == 'win32': self.assertEqual(realpath(path, strict=False), '/nonexistent') + self.assertEqual(realpath(path, strict=ALLOW_MISSING), '/nonexistent') else: self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) self.assertRaises(FileNotFoundError, realpath, path, strict=True) path = b'/\xff' if sys.platform == 'win32': self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=ALLOW_MISSING) else: self.assertEqual(realpath(path, strict=False), path) if support.is_wasi: self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) else: self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), path) path = b'/nonexistent/\xff' if sys.platform == 'win32': self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=ALLOW_MISSING) else: self.assertEqual(realpath(path, strict=False), path) if support.is_wasi: self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) else: self.assertRaises(FileNotFoundError, realpath, path, strict=True) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_relative(self): + @_parameterize({}, {'strict': ALLOW_MISSING}) + def test_realpath_relative(self, kwargs): try: os.symlink(posixpath.relpath(ABSTFN+"1"), ABSTFN) - self.assertEqual(realpath(ABSTFN), ABSTFN+"1") + self.assertEqual(realpath(ABSTFN, **kwargs), ABSTFN+"1") finally: os_helper.unlink(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_missing_pardir(self): + @_parameterize({}, {'strict': ALLOW_MISSING}) + def test_realpath_missing_pardir(self, kwargs): try: os.symlink(TESTFN + "1", TESTFN) - self.assertEqual(realpath("nonexistent/../" + TESTFN), ABSTFN + "1") + self.assertEqual( + realpath("nonexistent/../" + TESTFN, **kwargs), ABSTFN + "1") finally: os_helper.unlink(TESTFN) @@ -617,37 +667,38 @@ def test_realpath_symlink_loops(self): @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_symlink_loops_strict(self): + @_parameterize({'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_symlink_loops_strict(self, kwargs): # Bug #43757, raise OSError if we get into an infinite symlink loop in - # strict mode. + # the strict modes. try: os.symlink(ABSTFN, ABSTFN) - self.assertRaises(OSError, realpath, ABSTFN, strict=True) + self.assertRaises(OSError, realpath, ABSTFN, **kwargs) os.symlink(ABSTFN+"1", ABSTFN+"2") os.symlink(ABSTFN+"2", ABSTFN+"1") - self.assertRaises(OSError, realpath, ABSTFN+"1", strict=True) - self.assertRaises(OSError, realpath, ABSTFN+"2", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"1", **kwargs) + self.assertRaises(OSError, realpath, ABSTFN+"2", **kwargs) - self.assertRaises(OSError, realpath, ABSTFN+"1/x", strict=True) - self.assertRaises(OSError, realpath, ABSTFN+"1/..", strict=True) - self.assertRaises(OSError, realpath, ABSTFN+"1/../x", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"1/x", **kwargs) + self.assertRaises(OSError, realpath, ABSTFN+"1/..", **kwargs) + self.assertRaises(OSError, realpath, ABSTFN+"1/../x", **kwargs) os.symlink(ABSTFN+"x", ABSTFN+"y") self.assertRaises(OSError, realpath, - ABSTFN+"1/../" + basename(ABSTFN) + "y", strict=True) + ABSTFN+"1/../" + basename(ABSTFN) + "y", **kwargs) self.assertRaises(OSError, realpath, - ABSTFN+"1/../" + basename(ABSTFN) + "1", strict=True) + ABSTFN+"1/../" + basename(ABSTFN) + "1", **kwargs) os.symlink(basename(ABSTFN) + "a/b", ABSTFN+"a") - self.assertRaises(OSError, realpath, ABSTFN+"a", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"a", **kwargs) os.symlink("../" + basename(dirname(ABSTFN)) + "/" + basename(ABSTFN) + "c", ABSTFN+"c") - self.assertRaises(OSError, realpath, ABSTFN+"c", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"c", **kwargs) # Test using relative path as well. with os_helper.change_cwd(dirname(ABSTFN)): - self.assertRaises(OSError, realpath, basename(ABSTFN), strict=True) + self.assertRaises(OSError, realpath, basename(ABSTFN), **kwargs) finally: os_helper.unlink(ABSTFN) os_helper.unlink(ABSTFN+"1") @@ -658,13 +709,14 @@ def test_realpath_symlink_loops_strict(self): @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_repeated_indirect_symlinks(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_repeated_indirect_symlinks(self, kwargs): # Issue #6975. try: os.mkdir(ABSTFN) os.symlink('../' + basename(ABSTFN), ABSTFN + '/self') os.symlink('self/self/self', ABSTFN + '/link') - self.assertEqual(realpath(ABSTFN + '/link'), ABSTFN) + self.assertEqual(realpath(ABSTFN + '/link', **kwargs), ABSTFN) finally: os_helper.unlink(ABSTFN + '/self') os_helper.unlink(ABSTFN + '/link') @@ -672,14 +724,15 @@ def test_realpath_repeated_indirect_symlinks(self): @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_deep_recursion(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_deep_recursion(self, kwargs): depth = 10 try: os.mkdir(ABSTFN) for i in range(depth): os.symlink('/'.join(['%d' % i] * 10), ABSTFN + '/%d' % (i + 1)) os.symlink('.', ABSTFN + '/0') - self.assertEqual(realpath(ABSTFN + '/%d' % depth), ABSTFN) + self.assertEqual(realpath(ABSTFN + '/%d' % depth, **kwargs), ABSTFN) # Test using relative path as well. with os_helper.change_cwd(ABSTFN): @@ -691,7 +744,8 @@ def test_realpath_deep_recursion(self): @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_resolve_parents(self): + @_parameterize({}, {'strict': ALLOW_MISSING}) + def test_realpath_resolve_parents(self, kwargs): # We also need to resolve any symlinks in the parents of a relative # path passed to realpath. E.g.: current working directory is # /usr/doc with 'doc' being a symlink to /usr/share/doc. We call @@ -702,7 +756,8 @@ def test_realpath_resolve_parents(self): os.symlink(ABSTFN + "/y", ABSTFN + "/k") with os_helper.change_cwd(ABSTFN + "/k"): - self.assertEqual(realpath("a"), ABSTFN + "/y/a") + self.assertEqual(realpath("a", **kwargs), + ABSTFN + "/y/a") finally: os_helper.unlink(ABSTFN + "/k") os_helper.rmdir(ABSTFN + "/y") @@ -710,7 +765,8 @@ def test_realpath_resolve_parents(self): @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_resolve_before_normalizing(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_resolve_before_normalizing(self, kwargs): # Bug #990669: Symbolic links should be resolved before we # normalize the path. E.g.: if we have directories 'a', 'k' and 'y' # in the following hierarchy: @@ -725,10 +781,10 @@ def test_realpath_resolve_before_normalizing(self): os.symlink(ABSTFN + "/k/y", ABSTFN + "/link-y") # Absolute path. - self.assertEqual(realpath(ABSTFN + "/link-y/.."), ABSTFN + "/k") + self.assertEqual(realpath(ABSTFN + "/link-y/..", **kwargs), ABSTFN + "/k") # Relative path. with os_helper.change_cwd(dirname(ABSTFN)): - self.assertEqual(realpath(basename(ABSTFN) + "/link-y/.."), + self.assertEqual(realpath(basename(ABSTFN) + "/link-y/..", **kwargs), ABSTFN + "/k") finally: os_helper.unlink(ABSTFN + "/link-y") @@ -738,7 +794,8 @@ def test_realpath_resolve_before_normalizing(self): @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_resolve_first(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_resolve_first(self, kwargs): # Bug #1213894: The first component of the path, if not absolute, # must be resolved too. @@ -748,8 +805,8 @@ def test_realpath_resolve_first(self): os.symlink(ABSTFN, ABSTFN + "link") with os_helper.change_cwd(dirname(ABSTFN)): base = basename(ABSTFN) - self.assertEqual(realpath(base + "link"), ABSTFN) - self.assertEqual(realpath(base + "link/k"), ABSTFN + "/k") + self.assertEqual(realpath(base + "link", **kwargs), ABSTFN) + self.assertEqual(realpath(base + "link/k", **kwargs), ABSTFN + "/k") finally: os_helper.unlink(ABSTFN + "link") os_helper.rmdir(ABSTFN + "/k") @@ -767,12 +824,67 @@ def test_realpath_unreadable_symlink(self): self.assertEqual(realpath(ABSTFN + '/foo'), ABSTFN + '/foo') self.assertEqual(realpath(ABSTFN + '/../foo'), dirname(ABSTFN) + '/foo') self.assertEqual(realpath(ABSTFN + '/foo/..'), ABSTFN) - with self.assertRaises(PermissionError): - realpath(ABSTFN, strict=True) finally: os.chmod(ABSTFN, 0o755, follow_symlinks=False) os_helper.unlink(ABSTFN) + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash + @unittest.skipIf(os.chmod not in os.supports_follow_symlinks, "Can't set symlink permissions") + @unittest.skipIf(sys.platform != "darwin", "only macOS requires read permission to readlink()") + @_parameterize({'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_unreadable_symlink_strict(self, kwargs): + try: + os.symlink(ABSTFN+"1", ABSTFN) + os.chmod(ABSTFN, 0o000, follow_symlinks=False) + with self.assertRaises(PermissionError): + realpath(ABSTFN, **kwargs) + with self.assertRaises(PermissionError): + realpath(ABSTFN + '/foo', **kwargs), + with self.assertRaises(PermissionError): + realpath(ABSTFN + '/../foo', **kwargs) + with self.assertRaises(PermissionError): + realpath(ABSTFN + '/foo/..', **kwargs) + finally: + os.chmod(ABSTFN, 0o755, follow_symlinks=False) + os.unlink(ABSTFN) + + @skip_if_ABSTFN_contains_backslash + @os_helper.skip_unless_symlink + def test_realpath_unreadable_directory(self): + try: + os.mkdir(ABSTFN) + os.mkdir(ABSTFN + '/k') + os.chmod(ABSTFN, 0o000) + self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN) + self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN) + self.assertEqual(realpath(ABSTFN, strict=ALLOW_MISSING), ABSTFN) + + try: + os.stat(ABSTFN) + except PermissionError: + pass + else: + self.skipTest('Cannot block permissions') + + self.assertEqual(realpath(ABSTFN + '/k', strict=False), + ABSTFN + '/k') + self.assertRaises(PermissionError, realpath, ABSTFN + '/k', + strict=True) + self.assertRaises(PermissionError, realpath, ABSTFN + '/k', + strict=ALLOW_MISSING) + + self.assertEqual(realpath(ABSTFN + '/missing', strict=False), + ABSTFN + '/missing') + self.assertRaises(PermissionError, realpath, ABSTFN + '/missing', + strict=True) + self.assertRaises(PermissionError, realpath, ABSTFN + '/missing', + strict=ALLOW_MISSING) + finally: + os.chmod(ABSTFN, 0o755) + os_helper.rmdir(ABSTFN + '/k') + os_helper.rmdir(ABSTFN) + @skip_if_ABSTFN_contains_backslash def test_realpath_nonterminal_file(self): try: @@ -780,14 +892,27 @@ def test_realpath_nonterminal_file(self): f.write('test_posixpath wuz ere') self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN) self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN) + self.assertEqual(realpath(ABSTFN, strict=ALLOW_MISSING), ABSTFN) + self.assertEqual(realpath(ABSTFN + "/", strict=False), ABSTFN) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/.", strict=False), ABSTFN) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/..", strict=False), dirname(ABSTFN)) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/subdir", strict=False), ABSTFN + "/subdir") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", + strict=ALLOW_MISSING) finally: os_helper.unlink(ABSTFN) @@ -800,14 +925,27 @@ def test_realpath_nonterminal_symlink_to_file(self): os.symlink(ABSTFN + "1", ABSTFN) self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN + "1") self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN + "1") + self.assertEqual(realpath(ABSTFN, strict=ALLOW_MISSING), ABSTFN + "1") + self.assertEqual(realpath(ABSTFN + "/", strict=False), ABSTFN + "1") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/.", strict=False), ABSTFN + "1") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/..", strict=False), dirname(ABSTFN)) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/subdir", strict=False), ABSTFN + "1/subdir") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", + strict=ALLOW_MISSING) finally: os_helper.unlink(ABSTFN) os_helper.unlink(ABSTFN + "1") @@ -822,14 +960,27 @@ def test_realpath_nonterminal_symlink_to_symlinks_to_file(self): os.symlink(ABSTFN + "1", ABSTFN) self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN + "2") self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN + "2") + self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN + "2") + self.assertEqual(realpath(ABSTFN + "/", strict=False), ABSTFN + "2") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/.", strict=False), ABSTFN + "2") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/..", strict=False), dirname(ABSTFN)) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/subdir", strict=False), ABSTFN + "2/subdir") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", + strict=ALLOW_MISSING) finally: os_helper.unlink(ABSTFN) os_helper.unlink(ABSTFN + "1") @@ -1017,9 +1168,12 @@ def test_path_normpath(self): def test_path_abspath(self): self.assertPathEqual(self.path.abspath) - def test_path_realpath(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_path_realpath(self, kwargs): self.assertPathEqual(self.path.realpath) + self.assertPathEqual(partial(self.path.realpath, **kwargs)) + def test_path_relpath(self): self.assertPathEqual(self.path.relpath) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index cf218a2bf14369..7055e1ed147a9e 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2715,6 +2715,31 @@ def test_useful_error_message_when_modules_missing(self): str(excinfo.exception), ) + @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') + @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod") + @unittest.mock.patch('os.chmod') + def test_deferred_directory_attributes_update(self, mock_chmod): + # Regression test for gh-127987: setting attributes on arbitrary files + tempdir = os.path.join(TEMPDIR, 'test127987') + def mock_chmod_side_effect(path, mode, **kwargs): + target_path = os.path.realpath(path) + if os.path.commonpath([target_path, tempdir]) != tempdir: + raise Exception("should not try to chmod anything outside the destination", target_path) + mock_chmod.side_effect = mock_chmod_side_effect + + outside_tree_dir = os.path.join(TEMPDIR, 'outside_tree_dir') + with ArchiveMaker() as arc: + arc.add('x', symlink_to='.') + arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt') + arc.add('x', symlink_to=outside_tree_dir) + + os.makedirs(outside_tree_dir) + try: + arc.open().extractall(path=tempdir, filter='tar') + finally: + os_helper.rmtree(outside_tree_dir) + os_helper.rmtree(tempdir) + class CommandLineTest(unittest.TestCase): @@ -3275,6 +3300,10 @@ def check_files_present(self, directory): got_paths = set( p.relative_to(directory) for p in pathlib.Path(directory).glob('**/*')) + if self.extraction_filter in (None, 'data'): + # The 'data' filter is expected to reject special files + for path in 'ustar/fifotype', 'ustar/blktype', 'ustar/chrtype': + got_paths.discard(pathlib.Path(path)) self.assertEqual(self.control_paths, got_paths) @contextmanager @@ -3504,12 +3533,28 @@ def __exit__(self, *exc): self.bio = None def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, - mode=None, size=None, **kwargs): - """Add a member to the test archive. Call within `with`.""" + mode=None, size=None, content=None, **kwargs): + """Add a member to the test archive. Call within `with`. + + Provides many shortcuts: + - default `type` is based on symlink_to, hardlink_to, and trailing `/` + in name (which is stripped) + - size & content defaults are based on each other + - content can be str or bytes + - mode should be textual ('-rwxrwxrwx') + + (add more! this is unstable internal test-only API) + """ name = str(name) tarinfo = tarfile.TarInfo(name).replace(**kwargs) + if content is not None: + if isinstance(content, str): + content = content.encode() + size = len(content) if size is not None: tarinfo.size = size + if content is None: + content = bytes(tarinfo.size) if mode: tarinfo.mode = _filemode_to_int(mode) if symlink_to is not None: @@ -3523,7 +3568,7 @@ def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, if type is not None: tarinfo.type = type if tarinfo.isreg(): - fileobj = io.BytesIO(bytes(tarinfo.size)) + fileobj = io.BytesIO(content) else: fileobj = None self.tar_w.addfile(tarinfo, fileobj) @@ -3557,7 +3602,7 @@ class TestExtractionFilters(unittest.TestCase): destdir = outerdir / 'dest' @contextmanager - def check_context(self, tar, filter): + def check_context(self, tar, filter, *, check_flag=True): """Extracts `tar` to `self.destdir` and allows checking the result If an error occurs, it must be checked using `expect_exception` @@ -3566,27 +3611,40 @@ def check_context(self, tar, filter): except the destination directory itself and parent directories of other files. When checking directories, do so before their contents. + + A file called 'flag' is made in outerdir (i.e. outside destdir) + before extraction; it should not be altered nor should its contents + be read/copied. """ with os_helper.temp_dir(self.outerdir): + flag_path = self.outerdir / 'flag' + flag_path.write_text('capture me') try: tar.extractall(self.destdir, filter=filter) except Exception as exc: self.raised_exception = exc + self.reraise_exception = True self.expected_paths = set() else: self.raised_exception = None + self.reraise_exception = False self.expected_paths = set(self.outerdir.glob('**/*')) self.expected_paths.discard(self.destdir) + self.expected_paths.discard(flag_path) try: - yield + yield self finally: tar.close() - if self.raised_exception: + if self.reraise_exception: raise self.raised_exception self.assertEqual(self.expected_paths, set()) + if check_flag: + self.assertEqual(flag_path.read_text(), 'capture me') + else: + assert filter == 'fully_trusted' def expect_file(self, name, type=None, symlink_to=None, mode=None, - size=None): + size=None, content=None): """Check a single file. See check_context.""" if self.raised_exception: raise self.raised_exception @@ -3605,26 +3663,45 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None, # The symlink might be the same (textually) as what we expect, # but some systems change the link to an equivalent path, so # we fall back to samefile(). - if expected != got: - self.assertTrue(got.samefile(expected)) + try: + if expected != got: + self.assertTrue(got.samefile(expected)) + except Exception as e: + # attach a note, so it's shown even if `samefile` fails + e.add_note(f'{expected=}, {got=}') + raise elif type == tarfile.REGTYPE or type is None: self.assertTrue(path.is_file()) elif type == tarfile.DIRTYPE: self.assertTrue(path.is_dir()) elif type == tarfile.FIFOTYPE: self.assertTrue(path.is_fifo()) + elif type == tarfile.SYMTYPE: + self.assertTrue(path.is_symlink()) else: raise NotImplementedError(type) if size is not None: self.assertEqual(path.stat().st_size, size) + if content is not None: + self.assertEqual(path.read_text(), content) for parent in path.parents: self.expected_paths.discard(parent) + def expect_any_tree(self, name): + """Check a directory; forget about its contents.""" + tree_path = (self.destdir / name).resolve() + self.expect_file(tree_path, type=tarfile.DIRTYPE) + self.expected_paths = { + p for p in self.expected_paths + if tree_path not in p.parents + } + def expect_exception(self, exc_type, message_re='.'): with self.assertRaisesRegex(exc_type, message_re): if self.raised_exception is not None: raise self.raised_exception - self.raised_exception = None + self.reraise_exception = False + return self.raised_exception def test_benign_file(self): with ArchiveMaker() as arc: @@ -3709,6 +3786,80 @@ def test_parent_symlink(self): with self.check_context(arc.open(), 'data'): self.expect_file('parent/evil') + @symlink_test + @os_helper.skip_unless_symlink + def test_realpath_limit_attack(self): + # (CVE-2025-4517) + + with ArchiveMaker() as arc: + # populate the symlinks and dirs that expand in os.path.realpath() + # The component length is chosen so that in common cases, the unexpanded + # path fits in PATH_MAX, but it overflows when the final symlink + # is expanded + steps = "abcdefghijklmnop" + if sys.platform == 'win32': + component = 'd' * 25 + elif 'PC_PATH_MAX' in os.pathconf_names: + max_path_len = os.pathconf(self.outerdir.parent, "PC_PATH_MAX") + path_sep_len = 1 + dest_len = len(str(self.destdir)) + path_sep_len + component_len = (max_path_len - dest_len) // (len(steps) + path_sep_len) + component = 'd' * component_len + else: + raise NotImplementedError("Need to guess component length for {sys.platform}") + path = "" + step_path = "" + for i in steps: + arc.add(os.path.join(path, component), type=tarfile.DIRTYPE, + mode='drwxrwxrwx') + arc.add(os.path.join(path, i), symlink_to=component) + path = os.path.join(path, component) + step_path = os.path.join(step_path, i) + # create the final symlink that exceeds PATH_MAX and simply points + # to the top dir. + # this link will never be expanded by + # os.path.realpath(strict=False), nor anything after it. + linkpath = os.path.join(*steps, "l"*254) + parent_segments = [".."] * len(steps) + arc.add(linkpath, symlink_to=os.path.join(*parent_segments)) + # make a symlink outside to keep the tar command happy + arc.add("escape", symlink_to=os.path.join(linkpath, "..")) + # use the symlinks above, that are not checked, to create a hardlink + # to a file outside of the destination path + arc.add("flaglink", hardlink_to=os.path.join("escape", "flag")) + # now that we have the hardlink we can overwrite the file + arc.add("flaglink", content='overwrite') + # we can also create new files as well! + arc.add("escape/newfile", content='new') + + with (self.subTest('fully_trusted'), + self.check_context(arc.open(), filter='fully_trusted', + check_flag=False)): + if sys.platform == 'win32': + self.expect_exception((FileNotFoundError, FileExistsError)) + elif self.raised_exception: + # Cannot symlink/hardlink: tarfile falls back to getmember() + self.expect_exception(KeyError) + # Otherwise, this block should never enter. + else: + self.expect_any_tree(component) + self.expect_file('flaglink', content='overwrite') + self.expect_file('../newfile', content='new') + self.expect_file('escape', type=tarfile.SYMTYPE) + self.expect_file('a', symlink_to=component) + + for filter in 'tar', 'data': + with self.subTest(filter), self.check_context(arc.open(), filter=filter): + exc = self.expect_exception((OSError, KeyError)) + if isinstance(exc, OSError): + if sys.platform == 'win32': + # 3: ERROR_PATH_NOT_FOUND + # 5: ERROR_ACCESS_DENIED + # 206: ERROR_FILENAME_EXCED_RANGE + self.assertIn(exc.winerror, (3, 5, 206)) + else: + self.assertEqual(exc.errno, errno.ENAMETOOLONG) + @symlink_test def test_parent_symlink2(self): # Test interplaying symlinks @@ -3931,8 +4082,8 @@ def test_chains(self): arc.add('symlink2', symlink_to=os.path.join( 'linkdir', 'hardlink2')) arc.add('targetdir/target', size=3) - arc.add('linkdir/hardlink', hardlink_to='targetdir/target') - arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink') + arc.add('linkdir/hardlink', hardlink_to=os.path.join('targetdir', 'target')) + arc.add('linkdir/hardlink2', hardlink_to=os.path.join('linkdir', 'symlink')) for filter in 'tar', 'data', 'fully_trusted': with self.check_context(arc.open(), filter): @@ -3948,6 +4099,129 @@ def test_chains(self): self.expect_file('linkdir/symlink', size=3) self.expect_file('symlink2', size=3) + @symlink_test + def test_sneaky_hardlink_fallback(self): + # (CVE-2025-4330) + # Test that when hardlink extraction falls back to extracting members + # from the archive, the extracted member is (re-)filtered. + with ArchiveMaker() as arc: + # Create a directory structure so the c/escape symlink stays + # inside the path + arc.add("a/t/dummy") + # Create b/ directory + arc.add("b/") + # Point "c" to the bottom of the tree in "a" + arc.add("c", symlink_to=os.path.join("a", "t")) + # link to non-existant location under "a" + arc.add("c/escape", symlink_to=os.path.join("..", "..", + "link_here")) + # Move "c" to point to "b" ("c/escape" no longer exists) + arc.add("c", symlink_to="b") + # Attempt to create a hard link to "c/escape". Since it doesn't + # exist it will attempt to extract "cescape" but at "boom". + arc.add("boom", hardlink_to=os.path.join("c", "escape")) + + with self.check_context(arc.open(), 'data'): + if not os_helper.can_symlink(): + # When 'c/escape' is extracted, 'c' is a regular + # directory, and 'c/escape' *would* point outside + # the destination if symlinks were allowed. + self.expect_exception( + tarfile.LinkOutsideDestinationError) + elif sys.platform == "win32": + # On Windows, 'c/escape' points outside the destination + self.expect_exception(tarfile.LinkOutsideDestinationError) + else: + e = self.expect_exception( + tarfile.LinkFallbackError, + "link 'boom' would be extracted as a copy of " + + "'c/escape', which was rejected") + self.assertIsInstance(e.__cause__, + tarfile.LinkOutsideDestinationError) + for filter in 'tar', 'fully_trusted': + with self.subTest(filter), self.check_context(arc.open(), filter): + if not os_helper.can_symlink(): + self.expect_file("a/t/dummy") + self.expect_file("b/") + self.expect_file("c/") + else: + self.expect_file("a/t/dummy") + self.expect_file("b/") + self.expect_file("a/t/escape", symlink_to='../../link_here') + self.expect_file("boom", symlink_to='../../link_here') + self.expect_file("c", symlink_to='b') + + @symlink_test + def test_exfiltration_via_symlink(self): + # (CVE-2025-4138) + # Test changing symlinks that result in a symlink pointing outside + # the extraction directory, unless prevented by 'data' filter's + # normalization. + with ArchiveMaker() as arc: + arc.add("escape", symlink_to=os.path.join('link', 'link', '..', '..', 'link-here')) + arc.add("link", symlink_to='./') + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter): + if os_helper.can_symlink(): + self.expect_file("link", symlink_to='./') + if filter == 'data': + self.expect_file("escape", symlink_to='link-here') + else: + self.expect_file("escape", + symlink_to='link/link/../../link-here') + else: + # Nothing is extracted. + pass + + @symlink_test + def test_chmod_outside_dir(self): + # (CVE-2024-12718) + # Test that members used for delayed updates of directory metadata + # are (re-)filtered. + with ArchiveMaker() as arc: + # "pwn" is a veeeery innocent symlink: + arc.add("a/pwn", symlink_to='.') + # But now "pwn" is also a directory, so it's scheduled to have its + # metadata updated later: + arc.add("a/pwn/", mode='drwxrwxrwx') + # Oops, "pwn" is not so innocent any more: + arc.add("a/pwn", symlink_to='x/../') + # Newly created symlink points to the dest dir, + # so it's OK for the "data" filter. + arc.add('a/x', symlink_to=('../')) + # But now "pwn" points outside the dest dir + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter) as cc: + if not os_helper.can_symlink(): + self.expect_file("a/pwn/") + elif filter == 'data': + self.expect_file("a/x", symlink_to='../') + self.expect_file("a/pwn", symlink_to='.') + else: + self.expect_file("a/x", symlink_to='../') + self.expect_file("a/pwn", symlink_to='x/../') + if sys.platform != "win32": + st_mode = cc.outerdir.stat().st_mode + self.assertNotEqual(st_mode & 0o777, 0o777) + + def test_link_fallback_normalizes(self): + # Make sure hardlink fallbacks work for non-normalized paths for all + # filters + with ArchiveMaker() as arc: + arc.add("dir/") + arc.add("dir/../afile") + arc.add("link1", hardlink_to='dir/../afile') + arc.add("link2", hardlink_to='dir/../dir/../afile') + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter) as cc: + self.expect_file("dir/") + self.expect_file("afile") + self.expect_file("link1") + self.expect_file("link2") + def test_modes(self): # Test how file modes are extracted # (Note that the modes are ignored on platforms without working chmod) @@ -4072,7 +4346,7 @@ def test_tar_filter(self): # The 'tar' filter returns TarInfo objects with the same name/type. # (It can also fail for particularly "evil" input, but we don't have # that in the test archive.) - with tarfile.TarFile.open(tarname) as tar: + with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar: for tarinfo in tar.getmembers(): try: filtered = tarfile.tar_filter(tarinfo, '') @@ -4084,7 +4358,7 @@ def test_tar_filter(self): def test_data_filter(self): # The 'data' filter either raises, or returns TarInfo with the same # name/type. - with tarfile.TarFile.open(tarname) as tar: + with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar: for tarinfo in tar.getmembers(): try: filtered = tarfile.data_filter(tarinfo, '') @@ -4242,13 +4516,13 @@ def valueerror_filter(tarinfo, path): # If errorlevel is 0, errors affected by errorlevel are ignored with self.check_context(arc.open(errorlevel=0), extracterror_filter): - self.expect_file('file') + pass with self.check_context(arc.open(errorlevel=0), filtererror_filter): - self.expect_file('file') + pass with self.check_context(arc.open(errorlevel=0), oserror_filter): - self.expect_file('file') + pass with self.check_context(arc.open(errorlevel=0), tarerror_filter): self.expect_exception(tarfile.TarError) @@ -4259,7 +4533,7 @@ def valueerror_filter(tarinfo, path): # If 1, all fatal errors are raised with self.check_context(arc.open(errorlevel=1), extracterror_filter): - self.expect_file('file') + pass with self.check_context(arc.open(errorlevel=1), filtererror_filter): self.expect_exception(tarfile.FilterError) diff --git a/Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst b/Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst new file mode 100644 index 00000000000000..08a0087e203671 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst @@ -0,0 +1,6 @@ +Fixes multiple issues that allowed ``tarfile`` extraction filters +(``filter="data"`` and ``filter="tar"``) to be bypassed using crafted +symlinks and hard links. + +Addresses :cve:`2024-12718`, :cve:`2025-4138`, :cve:`2025-4330`, and :cve:`2025-4517`. + From 54ca55978e305ebb099d1b49633211597625bd52 Mon Sep 17 00:00:00 2001 From: tpburns Date: Tue, 3 Jun 2025 12:00:25 -0400 Subject: [PATCH 467/989] gh-134248 test_getallocatedblocks pre-check to ignore immortalized strings (#134871) When sanity checking against gettotalrefcount(), we exclude the blocks for immortalized strings since their references are not tracked/reported. This now matches refleak.py's book-keeping using the same functions. --- Lib/test/test_sys.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 83745f3d0ba46e..bf415894903e9b 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1135,23 +1135,12 @@ def test_getallocatedblocks(self): b = sys.getallocatedblocks() self.assertLessEqual(b, a) try: - # While we could imagine a Python session where the number of - # multiple buffer objects would exceed the sharing of references, - # it is unlikely to happen in a normal test run. - # - # In free-threaded builds each code object owns an array of - # pointers to copies of the bytecode. When the number of - # code objects is a large fraction of the total number of - # references, this can cause the total number of allocated - # blocks to exceed the total number of references. - # - # For some reason, iOS seems to trigger the "unlikely to happen" - # case reliably under CI conditions. It's not clear why; but as - # this test is checking the behavior of getallocatedblock() - # under garbage collection, we can skip this pre-condition check - # for now. See GH-130384. - if not support.Py_GIL_DISABLED and not support.is_apple_mobile: - self.assertLess(a, sys.gettotalrefcount()) + # The reported blocks will include immortalized strings, but the + # total ref count will not. This will sanity check that among all + # other objects (those eligible for garbage collection) there + # are more references being tracked than allocated blocks. + interned_immortal = sys.getunicodeinternedsize(_only_immortal=True) + self.assertLess(a - interned_immortal, sys.gettotalrefcount()) except AttributeError: # gettotalrefcount() not available pass From 1ffe913c2017b44804aca18befd45689df06c069 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 4 Jun 2025 05:28:58 +1200 Subject: [PATCH 468/989] gh-127081: use `getlogin_r` if available (gh-132751) The `getlogin` function is not thread-safe: replace with `getlogin_r` where available. --- ...-04-21-01-03-15.gh-issue-127081.WXRliX.rst | 2 ++ Modules/posixmodule.c | 18 ++++++++++ configure | 33 +++++++++++++++++++ configure.ac | 14 +++++++- pyconfig.h.in | 13 ++++++++ 5 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-21-01-03-15.gh-issue-127081.WXRliX.rst diff --git a/Misc/NEWS.d/next/Library/2025-04-21-01-03-15.gh-issue-127081.WXRliX.rst b/Misc/NEWS.d/next/Library/2025-04-21-01-03-15.gh-issue-127081.WXRliX.rst new file mode 100644 index 00000000000000..63fed60ced03c5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-21-01-03-15.gh-issue-127081.WXRliX.rst @@ -0,0 +1,2 @@ +Fix libc thread safety issues with :mod:`os` by replacing ``getlogin`` with +``getlogin_r`` re-entrant version. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 588894adeac811..07ab5499cd4fa1 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -9562,6 +9562,24 @@ os_getlogin_impl(PyObject *module) } else result = PyErr_SetFromWindowsErr(GetLastError()); +#elif defined (HAVE_GETLOGIN_R) +# if defined (HAVE_MAXLOGNAME) + char name[MAXLOGNAME + 1]; +# elif defined (HAVE_UT_NAMESIZE) + char name[UT_NAMESIZE + 1]; +# else + char name[256]; +# endif + int err = getlogin_r(name, sizeof(name)); + if (err) { + int old_errno = errno; + errno = -err; + posix_error(); + errno = old_errno; + } + else { + result = PyUnicode_DecodeFSDefault(name); + } #else char *name; int old_errno = errno; diff --git a/configure b/configure index b0dc18d5cea749..029bf527da4e3d 100755 --- a/configure +++ b/configure @@ -19458,6 +19458,12 @@ if test "x$ac_cv_func_getlogin" = xyes then : printf "%s\n" "#define HAVE_GETLOGIN 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "getlogin_r" "ac_cv_func_getlogin_r" +if test "x$ac_cv_func_getlogin_r" = xyes +then : + printf "%s\n" "#define HAVE_GETLOGIN_R 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "getpeername" "ac_cv_func_getpeername" if test "x$ac_cv_func_getpeername" = xyes @@ -23843,6 +23849,33 @@ fi +ac_fn_check_decl "$LINENO" "MAXLOGNAME" "ac_cv_have_decl_MAXLOGNAME" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_MAXLOGNAME" = xyes +then : + +printf "%s\n" "#define HAVE_MAXLOGNAME 1" >>confdefs.h + +fi + +ac_fn_check_decl "$LINENO" "UT_NAMESIZE" "ac_cv_have_decl_UT_NAMESIZE" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_UT_NAMESIZE" = xyes +then : + ac_have_decl=1 +else case e in #( + e) ac_have_decl=0 ;; +esac +fi +printf "%s\n" "#define HAVE_DECL_UT_NAMESIZE $ac_have_decl" >>confdefs.h +if test $ac_have_decl = 1 +then : + +printf "%s\n" "#define HAVE_UT_NAMESIZE 1" >>confdefs.h + +fi + + # check for openpty, login_tty, and forkpty diff --git a/configure.ac b/configure.ac index 70ad6da86719c6..371b2e8ed73525 100644 --- a/configure.ac +++ b/configure.ac @@ -5219,7 +5219,7 @@ AC_CHECK_FUNCS([ \ faccessat fchmod fchmodat fchown fchownat fdopendir fdwalk fexecve \ fork fork1 fpathconf fstatat ftime ftruncate futimens futimes futimesat \ gai_strerror getegid geteuid getgid getgrent getgrgid getgrgid_r \ - getgrnam_r getgrouplist gethostname getitimer getloadavg getlogin \ + getgrnam_r getgrouplist gethostname getitimer getloadavg getlogin getlogin_r \ getpeername getpgid getpid getppid getpriority _getpty \ getpwent getpwnam_r getpwuid getpwuid_r getresgid getresuid getrusage getsid getspent \ getspnam getuid getwd grantpt if_nameindex initgroups kill killpg lchown linkat \ @@ -5538,6 +5538,18 @@ PY_CHECK_FUNC([setgroups], [ #endif ]) +AC_CHECK_DECL([MAXLOGNAME], + [AC_DEFINE([HAVE_MAXLOGNAME], [1], + [Define if you have the 'MAXLOGNAME' constant.])], + [], + [@%:@include ]) + +AC_CHECK_DECLS([UT_NAMESIZE], + [AC_DEFINE([HAVE_UT_NAMESIZE], [1], + [Define if you have the 'HAVE_UT_NAMESIZE' constant.])], + [], + [@%:@include ]) + # check for openpty, login_tty, and forkpty AC_CHECK_FUNCS([openpty], [], diff --git a/pyconfig.h.in b/pyconfig.h.in index 3dbbda157df70e..65a2c55217c258 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -267,6 +267,10 @@ */ #undef HAVE_DECL_TZNAME +/* Define to 1 if you have the declaration of 'UT_NAMESIZE', and to 0 if you + don't. */ +#undef HAVE_DECL_UT_NAMESIZE + /* Define to 1 if you have the device macros. */ #undef HAVE_DEVICE_MACROS @@ -539,6 +543,9 @@ /* Define to 1 if you have the 'getlogin' function. */ #undef HAVE_GETLOGIN +/* Define to 1 if you have the 'getlogin_r' function. */ +#undef HAVE_GETLOGIN_R + /* Define to 1 if you have the 'getnameinfo' function. */ #undef HAVE_GETNAMEINFO @@ -807,6 +814,9 @@ /* Define this if you have the makedev macro. */ #undef HAVE_MAKEDEV +/* Define if you have the 'MAXLOGNAME' constant. */ +#undef HAVE_MAXLOGNAME + /* Define to 1 if you have the 'mbrtowc' function. */ #undef HAVE_MBRTOWC @@ -1575,6 +1585,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UTMP_H +/* Define if you have the 'HAVE_UT_NAMESIZE' constant. */ +#undef HAVE_UT_NAMESIZE + /* Define to 1 if you have the 'uuid_create' function. */ #undef HAVE_UUID_CREATE From dba9de731b231ca0c079205f496d1e3d178b4fd3 Mon Sep 17 00:00:00 2001 From: Joe Rickerby Date: Wed, 4 Jun 2025 01:31:43 +0100 Subject: [PATCH 469/989] gh-135101: When choosing the default simulator device, don't use `simctl --set testing` (#135102) On a fresh Xcode install (including some CI provider configurations), there is no pre-existing testing set that can be used to identify simulator models. Use the default device set to detect available models instead. Live testing simulators are still created in the testing set. --- iOS/testbed/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iOS/testbed/__main__.py b/iOS/testbed/__main__.py index c05497ede3aa61..1146bf3b988cda 100644 --- a/iOS/testbed/__main__.py +++ b/iOS/testbed/__main__.py @@ -127,7 +127,7 @@ async def async_check_output(*args, **kwargs): async def select_simulator_device(): # List the testing simulators, in JSON format raw_json = await async_check_output( - "xcrun", "simctl", "--set", "testing", "list", "-j" + "xcrun", "simctl", "list", "-j" ) json_data = json.loads(raw_json) From 8f778f7bb9a8ad80fc06570566ad4de541826178 Mon Sep 17 00:00:00 2001 From: Christian Veenhuis <124370897+ChVeen@users.noreply.github.com> Date: Wed, 4 Jun 2025 06:57:31 +0200 Subject: [PATCH 470/989] gh-135103: Remove an unused local variable in Lib/code.py (GH-135104) remove unused local variable --- Lib/code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/code.py b/Lib/code.py index b134886dc267fb..f7e275d8801b7c 100644 --- a/Lib/code.py +++ b/Lib/code.py @@ -224,7 +224,7 @@ def interact(self, banner=None, exitmsg=None): sys.ps1 = ">>> " delete_ps1_after = True try: - _ps2 = sys.ps2 + sys.ps2 delete_ps2_after = False except AttributeError: sys.ps2 = "... " From cc581f32bf5f15e9f2f89b830ec64ea25684d0cd Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 4 Jun 2025 03:35:56 -0400 Subject: [PATCH 471/989] gh-135099: Only wait on `_PyOS_SigintEvent()` in main thread (GH-135100) On Windows, the `_PyOS_SigintEvent()` event handle is used to interrupt the main thread when Ctrl-C is pressed. Previously, we also waited on the event from other threads, but ignored the result. However, this can race with interpreter shutdown because the main thread closes the handle in `_PySignal_Fini` and threads may still be running and using mutexes during interpreter shtudown. Only use `_PyOS_SigintEvent()` in the main thread in parking_lot.c, like we do in other places in the CPython codebase. --- ...-06-03-18-26-54.gh-issue-135099.Q9usKm.rst | 2 ++ Python/parking_lot.c | 22 ++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2025-06-03-18-26-54.gh-issue-135099.Q9usKm.rst diff --git a/Misc/NEWS.d/next/Windows/2025-06-03-18-26-54.gh-issue-135099.Q9usKm.rst b/Misc/NEWS.d/next/Windows/2025-06-03-18-26-54.gh-issue-135099.Q9usKm.rst new file mode 100644 index 00000000000000..36e70b1c0d8cb6 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2025-06-03-18-26-54.gh-issue-135099.Q9usKm.rst @@ -0,0 +1,2 @@ +Fix a crash that could occur on Windows when a background thread waits on a +:c:type:`PyMutex` while the main thread is shutting down the interpreter. diff --git a/Python/parking_lot.c b/Python/parking_lot.c index 8edf43235942ab..e896dea02712f2 100644 --- a/Python/parking_lot.c +++ b/Python/parking_lot.c @@ -112,17 +112,27 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout) } } - // NOTE: we wait on the sigint event even in non-main threads to match the - // behavior of the other platforms. Non-main threads will ignore the - // Py_PARK_INTR result. - HANDLE sigint_event = _PyOS_SigintEvent(); - HANDLE handles[2] = { sema->platform_sem, sigint_event }; - DWORD count = sigint_event != NULL ? 2 : 1; + HANDLE handles[2] = { sema->platform_sem, NULL }; + HANDLE sigint_event = NULL; + DWORD count = 1; + if (_Py_IsMainThread()) { + // gh-135099: Wait on the SIGINT event only in the main thread. Other + // threads would ignore the result anyways, and accessing + // `_PyOS_SigintEvent()` from non-main threads may race with + // interpreter shutdown, which closes the event handle. Note that + // non-main interpreters will ignore the result. + sigint_event = _PyOS_SigintEvent(); + if (sigint_event != NULL) { + handles[1] = sigint_event; + count = 2; + } + } wait = WaitForMultipleObjects(count, handles, FALSE, millis); if (wait == WAIT_OBJECT_0) { res = Py_PARK_OK; } else if (wait == WAIT_OBJECT_0 + 1) { + assert(sigint_event != NULL); ResetEvent(sigint_event); res = Py_PARK_INTR; } From 1f515104441898111c20aca5a7bbda1d11b15d36 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 4 Jun 2025 10:23:08 +0100 Subject: [PATCH 472/989] Use a more clear example for the PEP 758 what's new section (#135118) --- Doc/whatsnew/3.14.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 27dfc75c90fbe9..45e68aea5fb9a2 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -342,15 +342,16 @@ For example the following expressions are now valid: .. code-block:: python try: - release_new_sleep_token_album() - except AlbumNotFound, SongsTooGoodToBeReleased: - print("Sorry, no new album this year.") + connect_to_server() + except TimeoutError, ConnectionRefusedError: + print("Network issue encountered.") # The same applies to except* (for exception groups): + try: - release_new_sleep_token_album() - except* AlbumNotFound, SongsTooGoodToBeReleased: - print("Sorry, no new album this year.") + connect_to_server() + except* TimeoutError, ConnectionRefusedError: + print("Network issue encountered.") Check :pep:`758` for more details. From 5b3865418ceb1448bfbf15cddf52c900cd5882a3 Mon Sep 17 00:00:00 2001 From: Furkan Onder Date: Wed, 4 Jun 2025 15:51:18 +0300 Subject: [PATCH 473/989] gh-135108: Fix utmp.h inclusion in posixmodule.c on NetBSD (GH-135109) --- Modules/posixmodule.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 07ab5499cd4fa1..7dc5ef39a566e4 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -8820,14 +8820,14 @@ os_ptsname_impl(PyObject *module, int fd) #if defined(HAVE_OPENPTY) || defined(HAVE_FORKPTY) || defined(HAVE_LOGIN_TTY) || defined(HAVE_DEV_PTMX) #ifdef HAVE_PTY_H #include -#ifdef HAVE_UTMP_H -#include -#endif /* HAVE_UTMP_H */ #elif defined(HAVE_LIBUTIL_H) #include #elif defined(HAVE_UTIL_H) #include #endif /* HAVE_PTY_H */ +#ifdef HAVE_UTMP_H +#include +#endif /* HAVE_UTMP_H */ #ifdef HAVE_STROPTS_H #include #endif From bc00ce941e03347dade3faa8822f19836b5bbfe4 Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Wed, 4 Jun 2025 13:58:34 +0100 Subject: [PATCH 474/989] gh-135074: Fix exception messages in test.support module (GH-135076) --- Lib/test/support/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 351d832a26d1df..f9b66b88d3d05c 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1084,7 +1084,7 @@ def set_memlimit(limit: str) -> None: global real_max_memuse memlimit = _parse_memlimit(limit) if memlimit < _2G - 1: - raise ValueError('Memory limit {limit!r} too low to be useful') + raise ValueError(f'Memory limit {limit!r} too low to be useful') real_max_memuse = memlimit memlimit = min(memlimit, MAX_Py_ssize_t) @@ -2358,7 +2358,7 @@ def infinite_recursion(max_depth=None): # very deep recursion. max_depth = 20_000 elif max_depth < 3: - raise ValueError("max_depth must be at least 3, got {max_depth}") + raise ValueError(f"max_depth must be at least 3, got {max_depth}") depth = get_recursion_depth() depth = max(depth - 1, 1) # Ignore infinite_recursion() frame. limit = depth + max_depth From 40c8be0008ecadb5d0dc9a017434b1133a3a6e06 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 4 Jun 2025 14:00:25 +0100 Subject: [PATCH 475/989] gh-126483: disable warnings filters mutation in concurrent test (GH-132694) The `test_ssl_in_multiple_threads` test failed because `test_check_hostname_idn()` modified the global warnings filters via `warnings_helper.check_no_resource_warning()`. Only check for warnings when the context aware warnings feature is enabled, which makes the warnings filter context-local and thread-safe. --- Lib/test/test_ssl.py | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index 2767a53d53c1fc..f123f6ece40669 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -31,6 +31,7 @@ import platform import sysconfig import functools +from contextlib import nullcontext try: import ctypes except ImportError: @@ -2843,6 +2844,7 @@ def test_ssl_in_multiple_threads(self): # See GH-124984: OpenSSL is not thread safe. threads = [] + warnings_filters = sys.flags.context_aware_warnings global USE_SAME_TEST_CONTEXT USE_SAME_TEST_CONTEXT = True try: @@ -2851,7 +2853,10 @@ def test_ssl_in_multiple_threads(self): self.test_alpn_protocols, self.test_getpeercert, self.test_crl_check, - self.test_check_hostname_idn, + functools.partial( + self.test_check_hostname_idn, + warnings_filters=warnings_filters, + ), self.test_wrong_cert_tls12, self.test_wrong_cert_tls13, ): @@ -3097,7 +3102,7 @@ def test_dual_rsa_ecc(self): cipher = s.cipher()[0].split('-') self.assertTrue(cipher[:2], ('ECDHE', 'ECDSA')) - def test_check_hostname_idn(self): + def test_check_hostname_idn(self, warnings_filters=True): if support.verbose: sys.stdout.write("\n") @@ -3152,16 +3157,30 @@ def test_check_hostname_idn(self): server_hostname="python.example.org") as s: with self.assertRaises(ssl.CertificateError): s.connect((HOST, server.port)) - with ThreadedEchoServer(context=server_context, chatty=True) as server: - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(UnicodeError): - context.wrap_socket(socket.socket(), - server_hostname='.pythontest.net') - with ThreadedEchoServer(context=server_context, chatty=True) as server: - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(UnicodeDecodeError): - context.wrap_socket(socket.socket(), - server_hostname=b'k\xf6nig.idn.pythontest.net') + with ( + ThreadedEchoServer(context=server_context, chatty=True) as server, + ( + warnings_helper.check_no_resource_warning(self) + if warnings_filters + else nullcontext() + ), + self.assertRaises(UnicodeError), + ): + context.wrap_socket(socket.socket(), server_hostname='.pythontest.net') + + with ( + ThreadedEchoServer(context=server_context, chatty=True) as server, + ( + warnings_helper.check_no_resource_warning(self) + if warnings_filters + else nullcontext() + ), + self.assertRaises(UnicodeDecodeError), + ): + context.wrap_socket( + socket.socket(), + server_hostname=b'k\xf6nig.idn.pythontest.net', + ) def test_wrong_cert_tls12(self): """Connecting when the server rejects the client's certificate From c21113072cd1f0da83729f99d3576647db85d816 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 4 Jun 2025 15:07:52 +0200 Subject: [PATCH 476/989] gh-134989: Implement PyObject_DelAttr() as a macro in the limited C API (GH-135021) --- Include/abstract.h | 14 ++++++++++++-- .../2025-06-02-13-19-22.gh-issue-134989.sDDyBN.rst | 2 ++ 2 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-06-02-13-19-22.gh-issue-134989.sDDyBN.rst diff --git a/Include/abstract.h b/Include/abstract.h index b9199fc03a399a..80f3298701d249 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -138,7 +138,12 @@ extern "C" { Delete attribute named attr_name, for object o. Returns -1 on failure. - This is the equivalent of the Python statement: del o.attr_name. */ + This is the equivalent of the Python statement: del o.attr_name. + + Implemented as a macro in the limited C API 3.12 and older. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030d0000 +# define PyObject_DelAttrString(O, A) PyObject_SetAttrString((O), (A), NULL) +#endif /* Implemented elsewhere: @@ -147,7 +152,12 @@ extern "C" { Delete attribute named attr_name, for object o. Returns -1 on failure. This is the equivalent of the Python - statement: del o.attr_name. */ + statement: del o.attr_name. + + Implemented as a macro in the limited C API 3.12 and older. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030d0000 +# define PyObject_DelAttr(O, A) PyObject_SetAttr((O), (A), NULL) +#endif /* Implemented elsewhere: diff --git a/Misc/NEWS.d/next/C_API/2025-06-02-13-19-22.gh-issue-134989.sDDyBN.rst b/Misc/NEWS.d/next/C_API/2025-06-02-13-19-22.gh-issue-134989.sDDyBN.rst new file mode 100644 index 00000000000000..e49f765106582e --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-06-02-13-19-22.gh-issue-134989.sDDyBN.rst @@ -0,0 +1,2 @@ +Implement :c:func:`PyObject_DelAttr` and :c:func:`PyObject_DelAttrString` as +macros in the limited C API 3.12 and older. Patch by Victor Stinner. From 0df15d0d4d55dcf7b7a7f4b2985bf84fd092d257 Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Wed, 4 Jun 2025 11:26:19 -0400 Subject: [PATCH 477/989] gh-129876: Update IDLE News3.txt to May 2025 (#135139) --- Lib/idlelib/News3.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/idlelib/News3.txt b/Lib/idlelib/News3.txt index 74d84b3893125a..30784578cc637f 100644 --- a/Lib/idlelib/News3.txt +++ b/Lib/idlelib/News3.txt @@ -4,6 +4,13 @@ Released on 2025-10-07 ========================= +gh-112936: IDLE - Include Shell menu in single-process mode, +though with Restart Shell and View Last Restart disabled. +Patch by Zhikang Yan. + +gh-112938: IDLE - Fix uninteruptable hang when Shell gets +rapid continuous output. + gh-127060: Set TERM environment variable to 'dumb' to not add ANSI escape sequences for text color in tracebacks. IDLE does not understand them. Patch by Victor Stinner. From a10b321a5807ba924c7a7833692fe5d0dc40e875 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Wed, 4 Jun 2025 10:06:10 -0700 Subject: [PATCH 478/989] Update the devcontainer image to the latest version (GH-135143) Fixes an issue where the WASI tools were being incorrectly installed under the ARM image. --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index fd1d7151518f40..8e09808f08bba2 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,5 @@ { - "image": "ghcr.io/python/devcontainer:2025.05.25.15232270922", + "image": "ghcr.io/python/devcontainer:2025.05.29.15334414373", "onCreateCommand": [ // Install common tooling. "dnf", From e598eecf4c97509acef517e94053e45db51636fb Mon Sep 17 00:00:00 2001 From: "T. Wouters" Date: Wed, 4 Jun 2025 21:39:00 +0200 Subject: [PATCH 479/989] gh-135144: Add `_remote_debugging` to the MSI (legacy) Windows installers. (#135145) Add _remote_debugging to the MSI (legacy) installers. --- Tools/msi/freethreaded/freethreaded_files.wxs | 2 +- Tools/msi/lib/lib_files.wxs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/msi/freethreaded/freethreaded_files.wxs b/Tools/msi/freethreaded/freethreaded_files.wxs index 86d9a8b83f6535..0707e77b5e9ab2 100644 --- a/Tools/msi/freethreaded/freethreaded_files.wxs +++ b/Tools/msi/freethreaded/freethreaded_files.wxs @@ -103,7 +103,7 @@ - + diff --git a/Tools/msi/lib/lib_files.wxs b/Tools/msi/lib/lib_files.wxs index 8439518bcbd804..4d44299f783909 100644 --- a/Tools/msi/lib/lib_files.wxs +++ b/Tools/msi/lib/lib_files.wxs @@ -1,6 +1,6 @@  - + From 6b77af257c25d31f1f137e477cb23e63692ddf29 Mon Sep 17 00:00:00 2001 From: mpage Date: Wed, 4 Jun 2025 16:07:58 -0700 Subject: [PATCH 480/989] gh-134889: Fix handling of a few opcodes when optimizing `LOAD_FAST` (#134958) We were incorrectly handling a few opcodes that leave their operands on the stack. Treat all of these conservatively; assume that they always leave operands on the stack. --- Include/internal/pycore_magic_number.h | 3 +- Lib/test/test_dis.py | 2 +- Lib/test/test_peepholer.py | 92 +++++++++++++++++++ ...-05-30-18-09-54.gh-issue-134889.Ic9UM-.rst | 2 + Python/flowgraph.c | 20 ++++ 5 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-18-09-54.gh-issue-134889.Ic9UM-.rst diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index cd1fc873623ed1..347d9762f26bff 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -280,6 +280,7 @@ Known values: Python 3.15a0 3650 (Initial version) Python 3.15a1 3651 (Simplify LOAD_CONST) Python 3.15a1 3652 (Virtual iterators) + Python 3.15a1 3653 (Fix handling of opcodes that may leave operands on the stack when optimizing LOAD_FAST) Python 3.16 will start with 3700 @@ -293,7 +294,7 @@ PC/launcher.c must also be updated. */ -#define PYC_MAGIC_NUMBER 3652 +#define PYC_MAGIC_NUMBER 3653 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes (little-endian) and then appending b'\r\n'. */ #define PYC_MAGIC_NUMBER_TOKEN \ diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index ec930a728aa5b3..355990ed58ee09 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -606,7 +606,7 @@ async def _asyncwith(c): POP_TOP L1: RESUME 0 -%4d LOAD_FAST_BORROW 0 (c) +%4d LOAD_FAST 0 (c) COPY 1 LOAD_SPECIAL 3 (__aexit__) SWAP 2 diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index f33de3d420ca34..ef596630b930f7 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -2614,6 +2614,90 @@ def test_send(self): ] self.cfg_optimization_test(insts, expected, consts=[None]) + def test_format_simple(self): + # FORMAT_SIMPLE will leave its operand on the stack if it's a unicode + # object. We treat it conservatively and assume that it always leaves + # its operand on the stack. + insts = [ + ("LOAD_FAST", 0, 1), + ("FORMAT_SIMPLE", None, 2), + ("STORE_FAST", 1, 3), + ] + self.check(insts, insts) + + insts = [ + ("LOAD_FAST", 0, 1), + ("FORMAT_SIMPLE", None, 2), + ("POP_TOP", None, 3), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("FORMAT_SIMPLE", None, 2), + ("POP_TOP", None, 3), + ] + self.check(insts, expected) + + def test_set_function_attribute(self): + # SET_FUNCTION_ATTRIBUTE leaves the function on the stack + insts = [ + ("LOAD_CONST", 0, 1), + ("LOAD_FAST", 0, 2), + ("SET_FUNCTION_ATTRIBUTE", 2, 3), + ("STORE_FAST", 1, 4), + ("LOAD_CONST", 0, 5), + ("RETURN_VALUE", None, 6) + ] + self.cfg_optimization_test(insts, insts, consts=[None]) + + insts = [ + ("LOAD_CONST", 0, 1), + ("LOAD_FAST", 0, 2), + ("SET_FUNCTION_ATTRIBUTE", 2, 3), + ("RETURN_VALUE", None, 4) + ] + expected = [ + ("LOAD_CONST", 0, 1), + ("LOAD_FAST_BORROW", 0, 2), + ("SET_FUNCTION_ATTRIBUTE", 2, 3), + ("RETURN_VALUE", None, 4) + ] + self.cfg_optimization_test(insts, expected, consts=[None]) + + def test_get_yield_from_iter(self): + # GET_YIELD_FROM_ITER may leave its operand on the stack + insts = [ + ("LOAD_FAST", 0, 1), + ("GET_YIELD_FROM_ITER", None, 2), + ("LOAD_CONST", 0, 3), + send := self.Label(), + ("SEND", end := self.Label(), 5), + ("YIELD_VALUE", 1, 6), + ("RESUME", 2, 7), + ("JUMP", send, 8), + end, + ("END_SEND", None, 9), + ("LOAD_CONST", 0, 10), + ("RETURN_VALUE", None, 11), + ] + self.cfg_optimization_test(insts, insts, consts=[None]) + + def test_push_exc_info(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("PUSH_EXC_INFO", None, 2), + ] + self.check(insts, insts) + + def test_load_special(self): + # LOAD_SPECIAL may leave self on the stack + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_SPECIAL", 0, 2), + ("STORE_FAST", 1, 3), + ] + self.check(insts, insts) + + def test_del_in_finally(self): # This loads `obj` onto the stack, executes `del obj`, then returns the # `obj` from the stack. See gh-133371 for more details. @@ -2630,6 +2714,14 @@ def create_obj(): gc.collect() self.assertEqual(obj, [42]) + def test_format_simple_unicode(self): + # Repro from gh-134889 + def f(): + var = f"{1}" + var = f"{var}" + return var + self.assertEqual(f(), "1") + if __name__ == "__main__": diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-18-09-54.gh-issue-134889.Ic9UM-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-18-09-54.gh-issue-134889.Ic9UM-.rst new file mode 100644 index 00000000000000..3b86134bf16800 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-18-09-54.gh-issue-134889.Ic9UM-.rst @@ -0,0 +1,2 @@ +Fix handling of a few opcodes that leave operands on the stack when +optimizing ``LOAD_FAST``. diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 67ccf350b72ed6..2adc8c84d83974 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2870,9 +2870,11 @@ optimize_load_fast(cfg_builder *g) // how many inputs should be left on the stack. // Opcodes that consume no inputs + case FORMAT_SIMPLE: case GET_ANEXT: case GET_ITER: case GET_LEN: + case GET_YIELD_FROM_ITER: case IMPORT_FROM: case MATCH_KEYS: case MATCH_MAPPING: @@ -2907,6 +2909,16 @@ optimize_load_fast(cfg_builder *g) break; } + case END_SEND: + case SET_FUNCTION_ATTRIBUTE: { + assert(_PyOpcode_num_popped(opcode, oparg) == 2); + assert(_PyOpcode_num_pushed(opcode, oparg) == 1); + ref tos = ref_stack_pop(&refs); + ref_stack_pop(&refs); + PUSH_REF(tos.instr, tos.local); + break; + } + // Opcodes that consume some inputs and push new values case CHECK_EXC_MATCH: { ref_stack_pop(&refs); @@ -2936,6 +2948,14 @@ optimize_load_fast(cfg_builder *g) break; } + case LOAD_SPECIAL: + case PUSH_EXC_INFO: { + ref tos = ref_stack_pop(&refs); + PUSH_REF(i, NOT_LOCAL); + PUSH_REF(tos.instr, tos.local); + break; + } + case SEND: { load_fast_push_block(&sp, instr->i_target, refs.size); ref_stack_pop(&refs); From 2e1544fd2b0cd46ba93fc51e3cdd47f4781d7499 Mon Sep 17 00:00:00 2001 From: Malcolm Smith Date: Thu, 5 Jun 2025 06:46:16 +0100 Subject: [PATCH 481/989] gh-131531: android.py enhancements to support cibuildwheel (#132870) Modifies the environment handling and execution arguments of the Android management script to support the compilation of third-party binaries, and the use of the testbed to invoke third-party test code. Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Russell Keith-Magee --- Android/README.md | 4 + Android/android-env.sh | 6 +- Android/android.py | 231 ++++++++++++------ Android/testbed/app/build.gradle.kts | 20 +- .../java/org/python/testbed/PythonSuite.kt | 8 +- .../java/org/python/testbed/MainActivity.kt | 27 +- .../{main.py => android_testbed_main.py} | 20 +- Android/testbed/build.gradle.kts | 2 +- .../gradle/wrapper/gradle-wrapper.properties | 2 +- Doc/using/android.rst | 9 + 10 files changed, 239 insertions(+), 90 deletions(-) rename Android/testbed/app/src/main/python/{main.py => android_testbed_main.py} (68%) diff --git a/Android/README.md b/Android/README.md index 6cabd6ba5d6844..c42eb627006e6a 100644 --- a/Android/README.md +++ b/Android/README.md @@ -156,6 +156,10 @@ repository's `Lib` directory will be picked up immediately. Changes in C files, and architecture-specific files such as sysconfigdata, will not take effect until you re-run `android.py make-host` or `build`. +The testbed app can also be used to test third-party packages. For more details, +run `android.py test --help`, paying attention to the options `--site-packages`, +`--cwd`, `-c` and `-m`. + ## Using in your own app diff --git a/Android/android-env.sh b/Android/android-env.sh index bab4130c9e92d0..7b381a013cf0ba 100644 --- a/Android/android-env.sh +++ b/Android/android-env.sh @@ -3,7 +3,7 @@ : "${HOST:?}" # GNU target triplet # You may also override the following: -: "${api_level:=24}" # Minimum Android API level the build will run on +: "${ANDROID_API_LEVEL:=24}" # Minimum Android API level the build will run on : "${PREFIX:-}" # Path in which to find required libraries @@ -24,7 +24,7 @@ fail() { # * https://android.googlesource.com/platform/ndk/+/ndk-rXX-release/docs/BuildSystemMaintainers.md # where XX is the NDK version. Do a diff against the version you're upgrading from, e.g.: # https://android.googlesource.com/platform/ndk/+/ndk-r25-release..ndk-r26-release/docs/BuildSystemMaintainers.md -ndk_version=27.1.12297006 +ndk_version=27.2.12479018 ndk=$ANDROID_HOME/ndk/$ndk_version if ! [ -e "$ndk" ]; then @@ -43,7 +43,7 @@ fi toolchain=$(echo "$ndk"/toolchains/llvm/prebuilt/*) export AR="$toolchain/bin/llvm-ar" export AS="$toolchain/bin/llvm-as" -export CC="$toolchain/bin/${clang_triplet}${api_level}-clang" +export CC="$toolchain/bin/${clang_triplet}${ANDROID_API_LEVEL}-clang" export CXX="${CC}++" export LD="$toolchain/bin/ld" export NM="$toolchain/bin/llvm-nm" diff --git a/Android/android.py b/Android/android.py index 3f48b42aa17571..551168fc4b2f5a 100755 --- a/Android/android.py +++ b/Android/android.py @@ -14,7 +14,7 @@ from contextlib import asynccontextmanager from datetime import datetime, timezone from glob import glob -from os.path import basename, relpath +from os.path import abspath, basename, relpath from pathlib import Path from subprocess import CalledProcessError from tempfile import TemporaryDirectory @@ -22,9 +22,13 @@ SCRIPT_NAME = Path(__file__).name ANDROID_DIR = Path(__file__).resolve().parent -CHECKOUT = ANDROID_DIR.parent +PYTHON_DIR = ANDROID_DIR.parent +in_source_tree = ( + ANDROID_DIR.name == "Android" and (PYTHON_DIR / "pyconfig.h.in").exists() +) + TESTBED_DIR = ANDROID_DIR / "testbed" -CROSS_BUILD_DIR = CHECKOUT / "cross-build" +CROSS_BUILD_DIR = PYTHON_DIR / "cross-build" HOSTS = ["aarch64-linux-android", "x86_64-linux-android"] APP_ID = "org.python.testbed" @@ -76,39 +80,68 @@ def run(command, *, host=None, env=None, log=True, **kwargs): kwargs.setdefault("check", True) if env is None: env = os.environ.copy() - original_env = env.copy() if host: - env_script = ANDROID_DIR / "android-env.sh" - env_output = subprocess.run( - f"set -eu; " - f"HOST={host}; " - f"PREFIX={subdir(host)}/prefix; " - f". {env_script}; " - f"export", - check=True, shell=True, text=True, stdout=subprocess.PIPE - ).stdout - - for line in env_output.splitlines(): - # We don't require every line to match, as there may be some other - # output from installing the NDK. - if match := re.search( - "^(declare -x |export )?(\\w+)=['\"]?(.*?)['\"]?$", line - ): - key, value = match[2], match[3] - if env.get(key) != value: - print(line) - env[key] = value - - if env == original_env: - raise ValueError(f"Found no variables in {env_script.name} output:\n" - + env_output) + host_env = android_env(host) + print_env(host_env) + env.update(host_env) if log: - print(">", " ".join(map(str, command))) + print(">", join_command(command)) return subprocess.run(command, env=env, **kwargs) +# Format a command so it can be copied into a shell. Like shlex.join, but also +# accepts arguments which are Paths, or a single string/Path outside of a list. +def join_command(args): + if isinstance(args, (str, Path)): + return str(args) + else: + return shlex.join(map(str, args)) + + +# Format the environment so it can be pasted into a shell. +def print_env(env): + for key, value in sorted(env.items()): + print(f"export {key}={shlex.quote(value)}") + + +def android_env(host): + if host: + prefix = subdir(host) / "prefix" + else: + prefix = ANDROID_DIR / "prefix" + sysconfig_files = prefix.glob("lib/python*/_sysconfigdata__android_*.py") + sysconfig_filename = next(sysconfig_files).name + host = re.fullmatch(r"_sysconfigdata__android_(.+).py", sysconfig_filename)[1] + + env_script = ANDROID_DIR / "android-env.sh" + env_output = subprocess.run( + f"set -eu; " + f"export HOST={host}; " + f"PREFIX={prefix}; " + f". {env_script}; " + f"export", + check=True, shell=True, capture_output=True, encoding='utf-8', + ).stdout + + env = {} + for line in env_output.splitlines(): + # We don't require every line to match, as there may be some other + # output from installing the NDK. + if match := re.search( + "^(declare -x |export )?(\\w+)=['\"]?(.*?)['\"]?$", line + ): + key, value = match[2], match[3] + if os.environ.get(key) != value: + env[key] = value + + if not env: + raise ValueError(f"Found no variables in {env_script.name} output:\n" + + env_output) + return env + + def build_python_path(): """The path to the build Python binary.""" build_dir = subdir("build") @@ -127,7 +160,7 @@ def configure_build_python(context): clean("build") os.chdir(subdir("build", create=True)) - command = [relpath(CHECKOUT / "configure")] + command = [relpath(PYTHON_DIR / "configure")] if context.args: command.extend(context.args) run(command) @@ -139,12 +172,13 @@ def make_build_python(context): def unpack_deps(host, prefix_dir): + os.chdir(prefix_dir) deps_url = "https://github.com/beeware/cpython-android-source-deps/releases/download" - for name_ver in ["bzip2-1.0.8-2", "libffi-3.4.4-3", "openssl-3.0.15-4", + for name_ver in ["bzip2-1.0.8-3", "libffi-3.4.4-3", "openssl-3.0.15-4", "sqlite-3.49.1-0", "xz-5.4.6-1"]: filename = f"{name_ver}-{host}.tar.gz" download(f"{deps_url}/{name_ver}/{filename}") - shutil.unpack_archive(filename, prefix_dir) + shutil.unpack_archive(filename) os.remove(filename) @@ -167,7 +201,7 @@ def configure_host_python(context): os.chdir(host_dir) command = [ # Basic cross-compiling configuration - relpath(CHECKOUT / "configure"), + relpath(PYTHON_DIR / "configure"), f"--host={context.host}", f"--build={sysconfig.get_config_var('BUILD_GNU_TYPE')}", f"--with-build-python={build_python_path()}", @@ -196,9 +230,12 @@ def make_host_python(context): for pattern in ("include/python*", "lib/libpython*", "lib/python*"): delete_glob(f"{prefix_dir}/{pattern}") + # The Android environment variables were already captured in the Makefile by + # `configure`, and passing them again when running `make` may cause some + # flags to be duplicated. So we don't use the `host` argument here. os.chdir(host_dir) - run(["make", "-j", str(os.cpu_count())], host=context.host) - run(["make", "install", f"prefix={prefix_dir}"], host=context.host) + run(["make", "-j", str(os.cpu_count())]) + run(["make", "install", f"prefix={prefix_dir}"]) def build_all(context): @@ -228,7 +265,12 @@ def setup_sdk(): if not all((android_home / "licenses" / path).exists() for path in [ "android-sdk-arm-dbt-license", "android-sdk-license" ]): - run([sdkmanager, "--licenses"], text=True, input="y\n" * 100) + run( + [sdkmanager, "--licenses"], + text=True, + capture_output=True, + input="y\n" * 100, + ) # Gradle may install this automatically, but we can't rely on that because # we need to run adb within the logcat task. @@ -474,24 +516,49 @@ async def gradle_task(context): task_prefix = "connected" env["ANDROID_SERIAL"] = context.connected + hidden_output = [] + + def log(line): + # Gradle may take several minutes to install SDK packages, so it's worth + # showing those messages even in non-verbose mode. + if context.verbose or line.startswith('Preparing "Install'): + sys.stdout.write(line) + else: + hidden_output.append(line) + + if context.command: + mode = "-c" + module = context.command + else: + mode = "-m" + module = context.module or "test" + args = [ gradlew, "--console", "plain", f"{task_prefix}DebugAndroidTest", - "-Pandroid.testInstrumentationRunnerArguments.pythonArgs=" - + shlex.join(context.args), + ] + [ + # Build-time properties + f"-Ppython.{name}={value}" + for name, value in [ + ("sitePackages", context.site_packages), ("cwd", context.cwd) + ] if value + ] + [ + # Runtime properties + f"-Pandroid.testInstrumentationRunnerArguments.python{name}={value}" + for name, value in [ + ("Mode", mode), ("Module", module), ("Args", join_command(context.args)) + ] if value ] - hidden_output = [] + if context.verbose >= 2: + args.append("--info") + log("> " + join_command(args)) + try: async with async_process( *args, cwd=TESTBED_DIR, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) as process: while line := (await process.stdout.readline()).decode(*DECODE_ARGS): - # Gradle may take several minutes to install SDK packages, so - # it's worth showing those messages even in non-verbose mode. - if context.verbose or line.startswith('Preparing "Install'): - sys.stdout.write(line) - else: - hidden_output.append(line) + log(line) status = await wait_for(process.wait(), timeout=1) if status == 0: @@ -604,6 +671,10 @@ def package(context): print(f"Wrote {package_path}") +def env(context): + print_env(android_env(getattr(context, "host", None))) + + # Handle SIGTERM the same way as SIGINT. This ensures that if we're terminated # by the buildbot worker, we'll make an attempt to clean up our subprocesses. def install_signal_handler(): @@ -615,36 +686,41 @@ def signal_handler(*args): def parse_args(): parser = argparse.ArgumentParser() - subcommands = parser.add_subparsers(dest="subcommand") + subcommands = parser.add_subparsers(dest="subcommand", required=True) # Subcommands - build = subcommands.add_parser("build", help="Build everything") - configure_build = subcommands.add_parser("configure-build", - help="Run `configure` for the " - "build Python") - make_build = subcommands.add_parser("make-build", - help="Run `make` for the build Python") - configure_host = subcommands.add_parser("configure-host", - help="Run `configure` for Android") - make_host = subcommands.add_parser("make-host", - help="Run `make` for Android") + build = subcommands.add_parser( + "build", help="Run configure-build, make-build, configure-host and " + "make-host") + configure_build = subcommands.add_parser( + "configure-build", help="Run `configure` for the build Python") subcommands.add_parser( - "clean", help="Delete all build and prefix directories") - subcommands.add_parser( - "build-testbed", help="Build the testbed app") - test = subcommands.add_parser( - "test", help="Run the test suite") + "make-build", help="Run `make` for the build Python") + configure_host = subcommands.add_parser( + "configure-host", help="Run `configure` for Android") + make_host = subcommands.add_parser( + "make-host", help="Run `make` for Android") + + subcommands.add_parser("clean", help="Delete all build directories") + subcommands.add_parser("build-testbed", help="Build the testbed app") + test = subcommands.add_parser("test", help="Run the testbed app") package = subcommands.add_parser("package", help="Make a release package") + env = subcommands.add_parser("env", help="Print environment variables") # Common arguments for subcommand in build, configure_build, configure_host: subcommand.add_argument( "--clean", action="store_true", default=False, dest="clean", - help="Delete the relevant build and prefix directories first") - for subcommand in [build, configure_host, make_host, package]: + help="Delete the relevant build directories first") + + host_commands = [build, configure_host, make_host, package] + if in_source_tree: + host_commands.append(env) + for subcommand in host_commands: subcommand.add_argument( "host", metavar="HOST", choices=HOSTS, help="Host triplet: choices=[%(choices)s]") + for subcommand in build, configure_build, configure_host: subcommand.add_argument("args", nargs="*", help="Extra arguments to pass to `configure`") @@ -654,6 +730,7 @@ def parse_args(): "-v", "--verbose", action="count", default=0, help="Show Gradle output, and non-Python logcat messages. " "Use twice to include high-volume messages which are rarely useful.") + device_group = test.add_mutually_exclusive_group(required=True) device_group.add_argument( "--connected", metavar="SERIAL", help="Run on a connected device. " @@ -661,8 +738,24 @@ def parse_args(): device_group.add_argument( "--managed", metavar="NAME", help="Run on a Gradle-managed device. " "These are defined in `managedDevices` in testbed/app/build.gradle.kts.") + + test.add_argument( + "--site-packages", metavar="DIR", type=abspath, + help="Directory to copy as the app's site-packages.") test.add_argument( - "args", nargs="*", help=f"Arguments for `python -m test`. " + "--cwd", metavar="DIR", type=abspath, + help="Directory to copy as the app's working directory.") + + mode_group = test.add_mutually_exclusive_group() + mode_group.add_argument( + "-c", dest="command", help="Execute the given Python code.") + mode_group.add_argument( + "-m", dest="module", help="Execute the module with the given name.") + test.epilog = ( + "If neither -c nor -m are passed, the default is '-m test', which will " + "run Python's own test suite.") + test.add_argument( + "args", nargs="*", help=f"Arguments to add to sys.argv. " f"Separate them from {SCRIPT_NAME}'s own arguments with `--`.") return parser.parse_args() @@ -688,6 +781,7 @@ def main(): "build-testbed": build_testbed, "test": run_testbed, "package": package, + "env": env, } try: @@ -708,14 +802,9 @@ def print_called_process_error(e): if not content.endswith("\n"): stream.write("\n") - # Format the command so it can be copied into a shell. shlex uses single - # quotes, so we surround the whole command with double quotes. - args_joined = ( - e.cmd if isinstance(e.cmd, str) - else " ".join(shlex.quote(str(arg)) for arg in e.cmd) - ) + # shlex uses single quotes, so we surround the command with double quotes. print( - f'Command "{args_joined}" returned exit status {e.returncode}' + f'Command "{join_command(e.cmd)}" returned exit status {e.returncode}' ) diff --git a/Android/testbed/app/build.gradle.kts b/Android/testbed/app/build.gradle.kts index c627cb1b0e0b22..92cffd61f86876 100644 --- a/Android/testbed/app/build.gradle.kts +++ b/Android/testbed/app/build.gradle.kts @@ -85,7 +85,7 @@ android { minSdk = androidEnvFile.useLines { for (line in it) { - """api_level:=(\d+)""".toRegex().find(line)?.let { + """ANDROID_API_LEVEL:=(\d+)""".toRegex().find(line)?.let { return@useLines it.groupValues[1].toInt() } } @@ -205,11 +205,29 @@ androidComponents.onVariants { variant -> into("site-packages") { from("$projectDir/src/main/python") + + val sitePackages = findProperty("python.sitePackages") as String? + if (!sitePackages.isNullOrEmpty()) { + if (!file(sitePackages).exists()) { + throw GradleException("$sitePackages does not exist") + } + from(sitePackages) + } } duplicatesStrategy = DuplicatesStrategy.EXCLUDE exclude("**/__pycache__") } + + into("cwd") { + val cwd = findProperty("python.cwd") as String? + if (!cwd.isNullOrEmpty()) { + if (!file(cwd).exists()) { + throw GradleException("$cwd does not exist") + } + from(cwd) + } + } } } diff --git a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt index 0e888ab71d87da..94be52dd2dc870 100644 --- a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt +++ b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt @@ -17,11 +17,11 @@ class PythonSuite { fun testPython() { val start = System.currentTimeMillis() try { - val context = + val status = PythonTestRunner( InstrumentationRegistry.getInstrumentation().targetContext - val args = - InstrumentationRegistry.getArguments().getString("pythonArgs", "") - val status = PythonTestRunner(context).run(args) + ).run( + InstrumentationRegistry.getArguments() + ) assertEquals(0, status) } finally { // Make sure the process lives long enough for the test script to diff --git a/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt b/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt index c4bf6cbe83d8cd..ef28948486fb52 100644 --- a/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt +++ b/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt @@ -15,17 +15,29 @@ class MainActivity : AppCompatActivity() { override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) setContentView(R.layout.activity_main) - val status = PythonTestRunner(this).run("-W -uall") + val status = PythonTestRunner(this).run("-m", "test", "-W -uall") findViewById(R.id.tvHello).text = "Exit status $status" } } class PythonTestRunner(val context: Context) { - /** @param args Extra arguments for `python -m test`. - * @return The Python exit status: zero if the tests passed, nonzero if - * they failed. */ - fun run(args: String = "") : Int { + fun run(instrumentationArgs: Bundle) = run( + instrumentationArgs.getString("pythonMode")!!, + instrumentationArgs.getString("pythonModule")!!, + instrumentationArgs.getString("pythonArgs") ?: "", + ) + + /** Run Python. + * + * @param mode Either "-c" or "-m". + * @param module Python statements for "-c" mode, or a module name for + * "-m" mode. + * @param args Arguments to add to sys.argv. Will be parsed by `shlex.split`. + * @return The Python exit status: zero on success, nonzero on failure. */ + fun run(mode: String, module: String, args: String) : Int { + Os.setenv("PYTHON_MODE", mode, true) + Os.setenv("PYTHON_MODULE", module, true) Os.setenv("PYTHON_ARGS", args, true) // Python needs this variable to help it find the temporary directory, @@ -36,8 +48,9 @@ class PythonTestRunner(val context: Context) { System.loadLibrary("main_activity") redirectStdioToLogcat() - // The main module is in src/main/python/main.py. - return runPython(pythonHome.toString(), "main") + // The main module is in src/main/python. We don't simply call it + // "main", as that could clash with third-party test code. + return runPython(pythonHome.toString(), "android_testbed_main") } private fun extractAssets() : File { diff --git a/Android/testbed/app/src/main/python/main.py b/Android/testbed/app/src/main/python/android_testbed_main.py similarity index 68% rename from Android/testbed/app/src/main/python/main.py rename to Android/testbed/app/src/main/python/android_testbed_main.py index d6941b14412fcc..31b8e5343a8449 100644 --- a/Android/testbed/app/src/main/python/main.py +++ b/Android/testbed/app/src/main/python/android_testbed_main.py @@ -26,7 +26,23 @@ # test_signals in test_threadsignals.py. signal.pthread_sigmask(signal.SIG_UNBLOCK, [signal.SIGUSR1]) +mode = os.environ["PYTHON_MODE"] +module = os.environ["PYTHON_MODULE"] sys.argv[1:] = shlex.split(os.environ["PYTHON_ARGS"]) -# The test module will call sys.exit to indicate whether the tests passed. -runpy.run_module("test") +cwd = f"{sys.prefix}/cwd" +if not os.path.exists(cwd): + # Empty directories are lost in the asset packing/unpacking process. + os.mkdir(cwd) +os.chdir(cwd) + +if mode == "-c": + # In -c mode, sys.path starts with an empty string, which means whatever the current + # working directory is at the moment of each import. + sys.path.insert(0, "") + exec(module, {}) +elif mode == "-m": + sys.path.insert(0, os.getcwd()) + runpy.run_module(module, run_name="__main__", alter_sys=True) +else: + raise ValueError(f"unknown mode: {mode}") diff --git a/Android/testbed/build.gradle.kts b/Android/testbed/build.gradle.kts index 4d1d6f87594da3..451517b3f1aeab 100644 --- a/Android/testbed/build.gradle.kts +++ b/Android/testbed/build.gradle.kts @@ -1,5 +1,5 @@ // Top-level build file where you can add configuration options common to all sub-projects/modules. plugins { - id("com.android.application") version "8.6.1" apply false + id("com.android.application") version "8.10.0" apply false id("org.jetbrains.kotlin.android") version "1.9.22" apply false } diff --git a/Android/testbed/gradle/wrapper/gradle-wrapper.properties b/Android/testbed/gradle/wrapper/gradle-wrapper.properties index 36529c896426b0..5d42fbae084da1 100644 --- a/Android/testbed/gradle/wrapper/gradle-wrapper.properties +++ b/Android/testbed/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ #Mon Feb 19 20:29:06 GMT 2024 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.11.1-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/Doc/using/android.rst b/Doc/using/android.rst index 65bf23dc994856..cb762310328f1c 100644 --- a/Doc/using/android.rst +++ b/Doc/using/android.rst @@ -63,3 +63,12 @@ link to the relevant file. * Add code to your app to :source:`start Python in embedded mode `. This will need to be C code called via JNI. + +Building a Python package for Android +------------------------------------- + +Python packages can be built for Android as wheels and released on PyPI. The +recommended tool for doing this is `cibuildwheel +`__, which automates +all the details of setting up a cross-compilation environment, building the +wheel, and testing it on an emulator. From 3d396ab7591d544ac8bc1fb49615b4e867ca1c83 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 5 Jun 2025 11:17:03 +0200 Subject: [PATCH 482/989] gh-135124: Change stdout errors in regrtest worker process (#135138) Set sys.stdout encoder error handler to backslashreplace in regrtest workers to avoid UnicodeEncodeError when printing a traceback or any other non-encodable character. Move the code from the Regrtest class to setup_process(). Call setup_process() earlier, before displaying regrtest headers. --- Lib/test/libregrtest/main.py | 7 +----- Lib/test/libregrtest/setup.py | 9 +++++++ Lib/test/test_regrtest.py | 47 ++++++++++++++++++++++++++++++----- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index 713cbedb299706..0d9c059a93872d 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -543,8 +543,6 @@ def _run_tests(self, selected: TestTuple, tests: TestList | None) -> int: self.first_runtests = runtests self.logger.set_tests(runtests) - setup_process() - if (runtests.hunt_refleak is not None) and (not self.num_workers): # gh-109739: WindowsLoadTracker thread interferes with refleak check use_load_tracker = False @@ -721,10 +719,7 @@ def _add_python_opts(self) -> None: self._execute_python(cmd, environ) def _init(self): - # Set sys.stdout encoder error handler to backslashreplace, - # similar to sys.stderr error handler, to avoid UnicodeEncodeError - # when printing a traceback or any other non-encodable character. - sys.stdout.reconfigure(errors="backslashreplace") + setup_process() if self.junit_filename and not os.path.isabs(self.junit_filename): self.junit_filename = os.path.abspath(self.junit_filename) diff --git a/Lib/test/libregrtest/setup.py b/Lib/test/libregrtest/setup.py index c3d1f60a400665..9bfc414cd615c8 100644 --- a/Lib/test/libregrtest/setup.py +++ b/Lib/test/libregrtest/setup.py @@ -1,5 +1,6 @@ import faulthandler import gc +import io import os import random import signal @@ -52,6 +53,14 @@ def setup_process() -> None: support.record_original_stdout(sys.stdout) + # Set sys.stdout encoder error handler to backslashreplace, + # similar to sys.stderr error handler, to avoid UnicodeEncodeError + # when printing a traceback or any other non-encodable character. + # + # Use an assertion to fix mypy error. + assert isinstance(sys.stdout, io.TextIOWrapper) + sys.stdout.reconfigure(errors="backslashreplace") + # Some times __path__ and __file__ are not absolute (e.g. while running from # Lib/) and, if we change the CWD to run the tests in a temporary dir, some # imports might fail. This affects only the modules imported before os.chdir(). diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index f3ac301686b9fc..a43d2678ebd3be 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -768,13 +768,16 @@ def run_command(self, args, input=None, exitcode=0, **kw): self.fail(msg) return proc - def run_python(self, args, **kw): + def run_python(self, args, isolated=True, **kw): extraargs = [] if 'uops' in sys._xoptions: # Pass -X uops along extraargs.extend(['-X', 'uops']) - args = [sys.executable, *extraargs, '-X', 'faulthandler', '-I', *args] - proc = self.run_command(args, **kw) + cmd = [sys.executable, *extraargs, '-X', 'faulthandler'] + if isolated: + cmd.append('-I') + cmd.extend(args) + proc = self.run_command(cmd, **kw) return proc.stdout @@ -831,8 +834,8 @@ def check_output(self, output): self.check_executed_tests(output, self.tests, randomize=True, stats=len(self.tests)) - def run_tests(self, args, env=None): - output = self.run_python(args, env=env) + def run_tests(self, args, env=None, isolated=True): + output = self.run_python(args, env=env, isolated=isolated) self.check_output(output) def test_script_regrtest(self): @@ -2276,7 +2279,6 @@ def test_pass(self): def test_xml(self): code = textwrap.dedent(r""" import unittest - from test import support class VerboseTests(unittest.TestCase): def test_failed(self): @@ -2311,6 +2313,39 @@ def test_failed(self): for out in testcase.iter('system-out'): self.assertEqual(out.text, r"abc \x1b def") + def test_nonascii(self): + code = textwrap.dedent(r""" + import unittest + + class NonASCIITests(unittest.TestCase): + def test_docstring(self): + '''docstring:\u20ac''' + + def test_subtest(self): + with self.subTest(param='subtest:\u20ac'): + pass + + def test_skip(self): + self.skipTest('skipped:\u20ac') + """) + testname = self.create_test(code=code) + + env = dict(os.environ) + env['PYTHONIOENCODING'] = 'ascii' + + def check(output): + self.check_executed_tests(output, testname, stats=TestStats(3, 0, 1)) + self.assertIn(r'docstring:\u20ac', output) + self.assertIn(r'skipped:\u20ac', output) + + # Run sequentially + output = self.run_tests('-v', testname, env=env, isolated=False) + check(output) + + # Run in parallel + output = self.run_tests('-j1', '-v', testname, env=env, isolated=False) + check(output) + class TestUtils(unittest.TestCase): def test_format_duration(self): From 4b44b3409ac026e7f13054a3daa18ab7ee14d85c Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Thu, 5 Jun 2025 04:31:49 -0700 Subject: [PATCH 483/989] gh-134938: Add set_pledged_input_size() to ZstdCompressor (GH-135010) --- Doc/library/compression.zstd.rst | 35 ++++++++- Lib/test/test_zstd.py | 109 ++++++++++++++++++++++++++++ Modules/_zstd/_zstdmodule.c | 3 + Modules/_zstd/_zstdmodule.h | 1 + Modules/_zstd/clinic/compressor.c.h | 41 ++++++++++- Modules/_zstd/compressor.c | 98 +++++++++++++++++++++++++ 6 files changed, 285 insertions(+), 2 deletions(-) diff --git a/Doc/library/compression.zstd.rst b/Doc/library/compression.zstd.rst index 35bcbc2bfd8eac..57ad8e3377fc67 100644 --- a/Doc/library/compression.zstd.rst +++ b/Doc/library/compression.zstd.rst @@ -247,6 +247,27 @@ Compressing and decompressing data in memory The *mode* argument is a :class:`ZstdCompressor` attribute, either :attr:`~.FLUSH_BLOCK`, or :attr:`~.FLUSH_FRAME`. + .. method:: set_pledged_input_size(size) + + Specify the amount of uncompressed data *size* that will be provided for + the next frame. *size* will be written into the frame header of the next + frame unless :attr:`CompressionParameter.content_size_flag` is ``False`` + or ``0``. A size of ``0`` means that the frame is empty. If *size* is + ``None``, the frame header will omit the frame size. Frames that include + the uncompressed data size require less memory to decompress, especially + at higher compression levels. + + If :attr:`last_mode` is not :attr:`FLUSH_FRAME`, a + :exc:`ValueError` is raised as the compressor is not at the start of + a frame. If the pledged size does not match the actual size of data + provided to :meth:`.compress`, future calls to :meth:`!compress` or + :meth:`flush` may raise :exc:`ZstdError` and the last chunk of data may + be lost. + + After :meth:`flush` or :meth:`.compress` are called with mode + :attr:`FLUSH_FRAME`, the next frame will not include the frame size into + the header unless :meth:`!set_pledged_input_size` is called again. + .. attribute:: CONTINUE Collect more data for compression, which may or may not generate output @@ -266,6 +287,13 @@ Compressing and decompressing data in memory :meth:`~.compress` will be written into a new frame and *cannot* reference past data. + .. attribute:: last_mode + + The last mode passed to either :meth:`~.compress` or :meth:`~.flush`. + The value can be one of :attr:`~.CONTINUE`, :attr:`~.FLUSH_BLOCK`, or + :attr:`~.FLUSH_FRAME`. The initial value is :attr:`~.FLUSH_FRAME`, + signifying that the compressor is at the start of a new frame. + .. class:: ZstdDecompressor(zstd_dict=None, options=None) @@ -620,12 +648,17 @@ Advanced parameter control Write the size of the data to be compressed into the Zstandard frame header when known prior to compressing. - This flag only takes effect under the following two scenarios: + This flag only takes effect under the following scenarios: * Calling :func:`compress` for one-shot compression * Providing all of the data to be compressed in the frame in a single :meth:`ZstdCompressor.compress` call, with the :attr:`ZstdCompressor.FLUSH_FRAME` mode. + * Calling :meth:`ZstdCompressor.set_pledged_input_size` with the exact + amount of data that will be provided to the compressor prior to any + calls to :meth:`ZstdCompressor.compress` for the current frame. + :meth:`!ZstdCompressor.set_pledged_input_size` must be called for each + new frame. All other compression calls may not write the size information into the frame header. diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index e475d9346b9594..14a09a886046f7 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -395,6 +395,115 @@ def test_compress_empty(self): c = ZstdCompressor() self.assertNotEqual(c.compress(b'', c.FLUSH_FRAME), b'') + def test_set_pledged_input_size(self): + DAT = DECOMPRESSED_100_PLUS_32KB + CHUNK_SIZE = len(DAT) // 3 + + # wrong value + c = ZstdCompressor() + with self.assertRaisesRegex(ValueError, + r'should be a positive int less than \d+'): + c.set_pledged_input_size(-300) + # overflow + with self.assertRaisesRegex(ValueError, + r'should be a positive int less than \d+'): + c.set_pledged_input_size(2**64) + # ZSTD_CONTENTSIZE_ERROR is invalid + with self.assertRaisesRegex(ValueError, + r'should be a positive int less than \d+'): + c.set_pledged_input_size(2**64-2) + # ZSTD_CONTENTSIZE_UNKNOWN should use None + with self.assertRaisesRegex(ValueError, + r'should be a positive int less than \d+'): + c.set_pledged_input_size(2**64-1) + + # check valid values are settable + c.set_pledged_input_size(2**63) + c.set_pledged_input_size(2**64-3) + + # check that zero means empty frame + c = ZstdCompressor(level=1) + c.set_pledged_input_size(0) + c.compress(b'') + dat = c.flush() + ret = get_frame_info(dat) + self.assertEqual(ret.decompressed_size, 0) + + + # wrong mode + c = ZstdCompressor(level=1) + c.compress(b'123456') + self.assertEqual(c.last_mode, c.CONTINUE) + with self.assertRaisesRegex(ValueError, + r'last_mode == FLUSH_FRAME'): + c.set_pledged_input_size(300) + + # None value + c = ZstdCompressor(level=1) + c.set_pledged_input_size(None) + dat = c.compress(DAT) + c.flush() + + ret = get_frame_info(dat) + self.assertEqual(ret.decompressed_size, None) + + # correct value + c = ZstdCompressor(level=1) + c.set_pledged_input_size(len(DAT)) + + chunks = [] + posi = 0 + while posi < len(DAT): + dat = c.compress(DAT[posi:posi+CHUNK_SIZE]) + posi += CHUNK_SIZE + chunks.append(dat) + + dat = c.flush() + chunks.append(dat) + chunks = b''.join(chunks) + + ret = get_frame_info(chunks) + self.assertEqual(ret.decompressed_size, len(DAT)) + self.assertEqual(decompress(chunks), DAT) + + c.set_pledged_input_size(len(DAT)) # the second frame + dat = c.compress(DAT) + c.flush() + + ret = get_frame_info(dat) + self.assertEqual(ret.decompressed_size, len(DAT)) + self.assertEqual(decompress(dat), DAT) + + # not enough data + c = ZstdCompressor(level=1) + c.set_pledged_input_size(len(DAT)+1) + + for start in range(0, len(DAT), CHUNK_SIZE): + end = min(start+CHUNK_SIZE, len(DAT)) + _dat = c.compress(DAT[start:end]) + + with self.assertRaises(ZstdError): + c.flush() + + # too much data + c = ZstdCompressor(level=1) + c.set_pledged_input_size(len(DAT)) + + for start in range(0, len(DAT), CHUNK_SIZE): + end = min(start+CHUNK_SIZE, len(DAT)) + _dat = c.compress(DAT[start:end]) + + with self.assertRaises(ZstdError): + c.compress(b'extra', ZstdCompressor.FLUSH_FRAME) + + # content size not set if content_size_flag == 0 + c = ZstdCompressor(options={CompressionParameter.content_size_flag: 0}) + c.set_pledged_input_size(10) + dat1 = c.compress(b"hello") + dat2 = c.compress(b"world") + dat3 = c.flush() + frame_data = get_frame_info(dat1 + dat2 + dat3) + self.assertIsNone(frame_data.decompressed_size) + + class DecompressorTestCase(unittest.TestCase): def test_simple_decompress_bad_args(self): diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index b0e50f873f4ca6..d75c0779474a82 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -72,6 +72,9 @@ set_zstd_error(const _zstd_state *state, error_type type, size_t zstd_ret) case ERR_COMPRESS: msg = "Unable to compress Zstandard data: %s"; break; + case ERR_SET_PLEDGED_INPUT_SIZE: + msg = "Unable to set pledged uncompressed content size: %s"; + break; case ERR_LOAD_D_DICT: msg = "Unable to load Zstandard dictionary or prefix for " diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h index c73f15b3c5299b..4e8f708f2232c7 100644 --- a/Modules/_zstd/_zstdmodule.h +++ b/Modules/_zstd/_zstdmodule.h @@ -27,6 +27,7 @@ typedef struct { typedef enum { ERR_DECOMPRESS, ERR_COMPRESS, + ERR_SET_PLEDGED_INPUT_SIZE, ERR_LOAD_D_DICT, ERR_LOAD_C_DICT, diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h index f69161b590e5b7..4f8d93fd9e867c 100644 --- a/Modules/_zstd/clinic/compressor.c.h +++ b/Modules/_zstd/clinic/compressor.c.h @@ -252,4 +252,43 @@ _zstd_ZstdCompressor_flush(PyObject *self, PyObject *const *args, Py_ssize_t nar exit: return return_value; } -/*[clinic end generated code: output=ee2d1dc298de790c input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_zstd_ZstdCompressor_set_pledged_input_size__doc__, +"set_pledged_input_size($self, size, /)\n" +"--\n" +"\n" +"Set the uncompressed content size to be written into the frame header.\n" +"\n" +" size\n" +" The size of the uncompressed data to be provided to the compressor.\n" +"\n" +"This method can be used to ensure the header of the frame about to be written\n" +"includes the size of the data, unless the CompressionParameter.content_size_flag\n" +"is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised.\n" +"\n" +"It is important to ensure that the pledged data size matches the actual data\n" +"size. If they do not match the compressed output data may be corrupted and the\n" +"final chunk written may be lost."); + +#define _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF \ + {"set_pledged_input_size", (PyCFunction)_zstd_ZstdCompressor_set_pledged_input_size, METH_O, _zstd_ZstdCompressor_set_pledged_input_size__doc__}, + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size); + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + unsigned long long size; + + if (!zstd_contentsize_converter(arg, &size)) { + goto exit; + } + return_value = _zstd_ZstdCompressor_set_pledged_input_size_impl((ZstdCompressor *)self, size); + +exit: + return return_value; +} +/*[clinic end generated code: output=c1d5c2cf06a8becd input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index e1217635f60cb0..bc9e6eff89af68 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -45,6 +45,52 @@ typedef struct { #define ZstdCompressor_CAST(op) ((ZstdCompressor *)op) +/*[python input] + +class zstd_contentsize_converter(CConverter): + type = 'unsigned long long' + converter = 'zstd_contentsize_converter' + +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=0932c350d633c7de]*/ + + +static int +zstd_contentsize_converter(PyObject *size, unsigned long long *p) +{ + // None means the user indicates the size is unknown. + if (size == Py_None) { + *p = ZSTD_CONTENTSIZE_UNKNOWN; + } + else { + /* ZSTD_CONTENTSIZE_UNKNOWN is 0ULL - 1 + ZSTD_CONTENTSIZE_ERROR is 0ULL - 2 + Users should only pass values < ZSTD_CONTENTSIZE_ERROR */ + unsigned long long pledged_size = PyLong_AsUnsignedLongLong(size); + /* Here we check for (unsigned long long)-1 as a sign of an error in + PyLong_AsUnsignedLongLong */ + if (pledged_size == (unsigned long long)-1 && PyErr_Occurred()) { + *p = ZSTD_CONTENTSIZE_ERROR; + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; + } + return 0; + } + if (pledged_size >= ZSTD_CONTENTSIZE_ERROR) { + *p = ZSTD_CONTENTSIZE_ERROR; + PyErr_Format(PyExc_ValueError, + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; + } + *p = pledged_size; + } + return 1; +} + #include "clinic/compressor.c.h" static int @@ -643,9 +689,61 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) return ret; } + +/*[clinic input] +_zstd.ZstdCompressor.set_pledged_input_size + + size: zstd_contentsize + The size of the uncompressed data to be provided to the compressor. + / + +Set the uncompressed content size to be written into the frame header. + +This method can be used to ensure the header of the frame about to be written +includes the size of the data, unless the CompressionParameter.content_size_flag +is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised. + +It is important to ensure that the pledged data size matches the actual data +size. If they do not match the compressed output data may be corrupted and the +final chunk written may be lost. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size) +/*[clinic end generated code: output=3a09e55cc0e3b4f9 input=afd8a7d78cff2eb5]*/ +{ + // Error occured while converting argument, should be unreachable + assert(size != ZSTD_CONTENTSIZE_ERROR); + + /* Thread-safe code */ + PyMutex_Lock(&self->lock); + + /* Check the current mode */ + if (self->last_mode != ZSTD_e_end) { + PyErr_SetString(PyExc_ValueError, + "set_pledged_input_size() method must be called " + "when last_mode == FLUSH_FRAME"); + PyMutex_Unlock(&self->lock); + return NULL; + } + + /* Set pledged content size */ + size_t zstd_ret = ZSTD_CCtx_setPledgedSrcSize(self->cctx, size); + PyMutex_Unlock(&self->lock); + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_SET_PLEDGED_INPUT_SIZE, zstd_ret); + return NULL; + } + + Py_RETURN_NONE; +} + static PyMethodDef ZstdCompressor_methods[] = { _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF + _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF {NULL, NULL} }; From 9258f3da9175134d03f2c8c7c7eed223802ad945 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 5 Jun 2025 14:43:47 +0200 Subject: [PATCH 484/989] gh-134989: Fix Py_RETURN_NONE in the limited C API (GH-135165) Fix Py_RETURN_NONE, Py_RETURN_TRUE and Py_RETURN_FALSE macros in the limited C API 3.11 and older: Don't treat Py_None, Py_True and Py_False as immortal. --- Include/boolobject.h | 13 ++++++++++--- Include/object.h | 9 +++++++-- .../2025-06-05-11-06-07.gh-issue-134989.74p4ud.rst | 3 +++ 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-06-05-11-06-07.gh-issue-134989.74p4ud.rst diff --git a/Include/boolobject.h b/Include/boolobject.h index 3037e61bbf6d0c..b56e2baecaa36c 100644 --- a/Include/boolobject.h +++ b/Include/boolobject.h @@ -34,9 +34,16 @@ PyAPI_FUNC(int) Py_IsTrue(PyObject *x); PyAPI_FUNC(int) Py_IsFalse(PyObject *x); #define Py_IsFalse(x) Py_Is((x), Py_False) -/* Macros for returning Py_True or Py_False, respectively */ -#define Py_RETURN_TRUE return Py_True -#define Py_RETURN_FALSE return Py_False +/* Macros for returning Py_True or Py_False, respectively. + * Only treat Py_True and Py_False as immortal in the limited C API 3.12 + * and newer. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030c0000 +# define Py_RETURN_TRUE return Py_NewRef(Py_True) +# define Py_RETURN_FALSE return Py_NewRef(Py_False) +#else +# define Py_RETURN_TRUE return Py_True +# define Py_RETURN_FALSE return Py_False +#endif /* Function to return a bool from a C long */ PyAPI_FUNC(PyObject *) PyBool_FromLong(long); diff --git a/Include/object.h b/Include/object.h index 994cac1ad17501..42aed614d4ad8e 100644 --- a/Include/object.h +++ b/Include/object.h @@ -660,8 +660,13 @@ PyAPI_DATA(PyObject) _Py_NoneStruct; /* Don't use this directly */ PyAPI_FUNC(int) Py_IsNone(PyObject *x); #define Py_IsNone(x) Py_Is((x), Py_None) -/* Macro for returning Py_None from a function */ -#define Py_RETURN_NONE return Py_None +/* Macro for returning Py_None from a function. + * Only treat Py_None as immortal in the limited C API 3.12 and newer. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030c0000 +# define Py_RETURN_NONE return Py_NewRef(Py_None) +#else +# define Py_RETURN_NONE return Py_None +#endif /* Py_NotImplemented is a singleton used to signal that an operation is diff --git a/Misc/NEWS.d/next/C_API/2025-06-05-11-06-07.gh-issue-134989.74p4ud.rst b/Misc/NEWS.d/next/C_API/2025-06-05-11-06-07.gh-issue-134989.74p4ud.rst new file mode 100644 index 00000000000000..844e9a666640c6 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-06-05-11-06-07.gh-issue-134989.74p4ud.rst @@ -0,0 +1,3 @@ +Fix ``Py_RETURN_NONE``, ``Py_RETURN_TRUE`` and ``Py_RETURN_FALSE`` macros in +the limited C API 3.11 and older: don't treat ``Py_None``, ``Py_True`` and +``Py_False`` as immortal. Patch by Victor Stinner. From 8919cb4ad9fb18832a4bc9d5bbea305e9518c7ab Mon Sep 17 00:00:00 2001 From: rialbat <47256826+rialbat@users.noreply.github.com> Date: Thu, 5 Jun 2025 18:08:48 +0300 Subject: [PATCH 485/989] gh-135161: Remove redundant NULL check for 'exc' after dereference in ceval.c (#135162) --- Python/ceval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 7aec196cb85704..5ea837e1a6ef31 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3190,7 +3190,7 @@ _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *kwarg else if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { PyObject *exc = _PyErr_GetRaisedException(tstate); PyObject *args = PyException_GetArgs(exc); - if (exc && PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1) { + if (PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1) { _PyErr_Clear(tstate); PyObject *funcstr = _PyObject_FunctionStr(func); if (funcstr != NULL) { From 1b55e12766d007aea9fcd0966e29ce220b67d28e Mon Sep 17 00:00:00 2001 From: Weipeng Hong Date: Fri, 6 Jun 2025 00:00:07 +0800 Subject: [PATCH 486/989] gh-135166: Fix exception type expected by test.test_zstd (GH-135167) --- Lib/test/test_zstd.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 14a09a886046f7..d4c28aed38ef90 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -293,11 +293,11 @@ def test_compress_parameters(self): # zstd lib doesn't support MT compression if not SUPPORT_MULTITHREADING: - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.nb_workers:4}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.job_size:4}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.overlap_log:4}) # out of bounds error msg From d9cad074d52fe31327429fd81e4d2eeea3dbe35b Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Thu, 5 Jun 2025 10:28:11 -0700 Subject: [PATCH 487/989] gh-134155: fix AttributeError in email._header_value_parser.get_address (#134194) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Append the defect to defects instead of to the parse tree. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Lib/email/_header_value_parser.py | 4 +- .../test_email/test__header_value_parser.py | 45 +++++++++++++++++++ ...-05-18-23-46-21.gh-issue-134152.30HwbX.rst | 1 + 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index f11fa83d45ed2d..91243378dc0441 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1575,7 +1575,7 @@ def get_dtext(value): def _check_for_early_dl_end(value, domain_literal): if value: return False - domain_literal.append(errors.InvalidHeaderDefect( + domain_literal.defects.append(errors.InvalidHeaderDefect( "end of input inside domain-literal")) domain_literal.append(ValueTerminal(']', 'domain-literal-end')) return True @@ -1594,9 +1594,9 @@ def get_domain_literal(value): raise errors.HeaderParseError("expected '[' at start of domain-literal " "but found '{}'".format(value)) value = value[1:] + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if _check_for_early_dl_end(value, domain_literal): return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if value[0] in WSP: token, value = get_fws(value) domain_literal.append(token) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index fd4ac2c404ce47..179e236ecdfd7f 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2491,6 +2491,38 @@ def test_get_address_quoted_strings_in_atom_list(self): self.assertEqual(address.all_mailboxes[0].domain, 'example.com') self.assertEqual(address.all_mailboxes[0].addr_spec, '"example example"@example.com') + def test_get_address_with_invalid_domain(self): + address = self._test_get_x(parser.get_address, + '', + '', + [errors.InvalidHeaderDefect, # missing trailing '>' on angle-addr + errors.InvalidHeaderDefect, # end of input inside domain-literal + ], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 0) + self.assertEqual(len(address.all_mailboxes), 1) + self.assertEqual(address.all_mailboxes[0].domain, '[]') + self.assertEqual(address.all_mailboxes[0].local_part, 'T') + self.assertEqual(address.all_mailboxes[0].token_type, 'invalid-mailbox') + self.assertEqual(address[0].token_type, 'invalid-mailbox') + + address = self._test_get_x(parser.get_address, + '!an??:=m==fr2@[C', + '!an??:=m==fr2@[C];', + '!an??:=m==fr2@[C];', + [errors.InvalidHeaderDefect, # end of header in group + errors.InvalidHeaderDefect, # end of input inside domain-literal + ], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 0) + self.assertEqual(len(address.all_mailboxes), 1) + self.assertEqual(address.all_mailboxes[0].domain, '[C]') + self.assertEqual(address.all_mailboxes[0].local_part, '=m==fr2') + self.assertEqual(address.all_mailboxes[0].token_type, 'invalid-mailbox') + self.assertEqual(address[0].token_type, 'group') # get_address_list @@ -2765,6 +2797,19 @@ def test_parse_valid_message_id(self): ) self.assertEqual(message_id.token_type, 'message-id') + def test_parse_message_id_with_invalid_domain(self): + message_id = self._test_parse_x( + parser.parse_message_id, + "", + "", + [errors.ObsoleteHeaderDefect] + [errors.InvalidHeaderDefect] * 2, + [], + ) + self.assertEqual(message_id.token_type, 'message-id') + self.assertEqual(str(message_id.all_defects[-1]), + "end of input inside domain-literal") + def test_parse_message_id_with_remaining(self): message_id = self._test_parse_x( parser.parse_message_id, diff --git a/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst new file mode 100644 index 00000000000000..911a4a59ea6079 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst @@ -0,0 +1 @@ +:mod:`email`: Fix parsing of email message ID with invalid domain. From b90ecea9e6b33dae360ed7eb2c32598f98444c4d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Jun 2025 18:53:57 +0100 Subject: [PATCH 488/989] GH-132554: Fix tier2 `FOR_ITER` implementation and optimizations (GH-135137) --- Include/internal/pycore_ceval.h | 3 +- Include/internal/pycore_stackref.h | 42 +++++++++++-- Lib/test/test_capi/test_opt.py | 11 ++++ Python/bytecodes.c | 97 +++++++----------------------- Python/ceval.c | 28 ++++++++- Python/executor_cases.c.h | 25 ++------ Python/generated_cases.c.h | 91 +++++++--------------------- Python/optimizer_bytecodes.c | 11 ++++ Python/optimizer_cases.c.h | 12 +++- Python/stackrefs.c | 4 ++ Tools/cases_generator/analyzer.py | 2 + Tools/cases_generator/stack.py | 2 +- 12 files changed, 155 insertions(+), 173 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 092feeb40b04a3..239177deb4a948 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -353,7 +353,8 @@ PyAPI_FUNC(_PyStackRef) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyS extern int _PyRunRemoteDebugger(PyThreadState *tstate); #endif -_PyStackRef _PyForIter_NextWithIndex(PyObject *seq, _PyStackRef index); +PyAPI_FUNC(_PyStackRef) +_PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame* frame, _PyStackRef iter, _PyStackRef *index_ptr); #ifdef __cplusplus } diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index f2ecc30b053568..8791476725289c 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -62,14 +62,15 @@ PyAPI_FUNC(void) _Py_stackref_record_borrow(_PyStackRef ref, const char *filenam extern void _Py_stackref_associate(PyInterpreterState *interp, PyObject *obj, _PyStackRef ref); static const _PyStackRef PyStackRef_NULL = { .index = 0 }; +static const _PyStackRef PyStackRef_ERROR = { .index = 2 }; // Use the first 3 even numbers for None, True and False. // Odd numbers are reserved for (tagged) integers -#define PyStackRef_None ((_PyStackRef){ .index = 2 } ) -#define PyStackRef_False ((_PyStackRef){ .index = 4 }) -#define PyStackRef_True ((_PyStackRef){ .index = 6 }) +#define PyStackRef_None ((_PyStackRef){ .index = 4 } ) +#define PyStackRef_False ((_PyStackRef){ .index = 6 }) +#define PyStackRef_True ((_PyStackRef){ .index = 8 }) -#define INITIAL_STACKREF_INDEX 8 +#define INITIAL_STACKREF_INDEX 10 static inline int PyStackRef_IsNull(_PyStackRef ref) @@ -77,6 +78,19 @@ PyStackRef_IsNull(_PyStackRef ref) return ref.index == 0; } +static inline bool +PyStackRef_IsError(_PyStackRef ref) +{ + return ref.index == 2; +} + +static inline bool +PyStackRef_IsValid(_PyStackRef ref) +{ + /* Invalid values are ERROR and NULL */ + return !PyStackRef_IsError(ref) && !PyStackRef_IsNull(ref); +} + static inline int PyStackRef_IsTrue(_PyStackRef ref) { @@ -104,6 +118,7 @@ PyStackRef_IsTaggedInt(_PyStackRef ref) static inline PyObject * _PyStackRef_AsPyObjectBorrow(_PyStackRef ref, const char *filename, int linenumber) { + assert(!PyStackRef_IsError(ref)); assert(!PyStackRef_IsTaggedInt(ref)); _Py_stackref_record_borrow(ref, filename, linenumber); return _Py_stackref_get_object(ref); @@ -155,6 +170,7 @@ _PyStackRef_CLOSE(_PyStackRef ref, const char *filename, int linenumber) static inline void _PyStackRef_XCLOSE(_PyStackRef ref, const char *filename, int linenumber) { + assert(!PyStackRef_IsError(ref)); if (PyStackRef_IsNull(ref)) { return; } @@ -165,6 +181,7 @@ _PyStackRef_XCLOSE(_PyStackRef ref, const char *filename, int linenumber) static inline _PyStackRef _PyStackRef_DUP(_PyStackRef ref, const char *filename, int linenumber) { + assert(!PyStackRef_IsError(ref)); if (PyStackRef_IsTaggedInt(ref)) { return ref; } @@ -241,9 +258,25 @@ PyStackRef_IsNullOrInt(_PyStackRef ref); #else #define Py_INT_TAG 3 +#define Py_TAG_INVALID 2 #define Py_TAG_REFCNT 1 #define Py_TAG_BITS 3 +static const _PyStackRef PyStackRef_ERROR = { .bits = Py_TAG_INVALID }; + +static inline bool +PyStackRef_IsError(_PyStackRef ref) +{ + return ref.bits == Py_TAG_INVALID; +} + +static inline bool +PyStackRef_IsValid(_PyStackRef ref) +{ + /* Invalid values are ERROR and NULL */ + return ref.bits >= Py_INT_TAG; +} + static inline bool PyStackRef_IsTaggedInt(_PyStackRef i) { @@ -284,6 +317,7 @@ PyStackRef_IncrementTaggedIntNoOverflow(_PyStackRef ref) static const _PyStackRef PyStackRef_NULL = { .bits = Py_TAG_DEFERRED}; + #define PyStackRef_IsNull(stackref) ((stackref).bits == PyStackRef_NULL.bits) #define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_DEFERRED }) #define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_DEFERRED }) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index cb6eae484149ee..a292ebcc7f4aed 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1183,6 +1183,17 @@ def testfunc(n): self.assertIsNotNone(ex) self.assertIn("_RETURN_GENERATOR", get_opnames(ex)) + def test_for_iter(self): + def testfunc(n): + t = 0 + for i in set(range(n)): + t += i + return t + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD * (TIER2_THRESHOLD - 1) // 2) + self.assertIsNotNone(ex) + self.assertIn("_FOR_ITER_TIER_TWO", get_opnames(ex)) + @unittest.skip("Tracing into generators currently isn't supported.") def test_for_iter_gen(self): def gen(n): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f02e32fd1d312a..c4b13da5db41d8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3125,100 +3125,49 @@ dummy_func( } replaced op(_FOR_ITER, (iter, null_or_index -- iter, null_or_index, next)) { - /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (PyStackRef_IsTaggedInt(null_or_index)) { - next = _PyForIter_NextWithIndex(iter_o, null_or_index); - if (PyStackRef_IsNull(next)) { - JUMPBY(oparg + 1); - DISPATCH(); - } - null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); - } - else { - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - if (!matches) { - ERROR_NO_POP(); - } - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); - } - /* iterator ended normally */ - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - /* Jump forward oparg, then skip following END_FOR */ - JUMPBY(oparg + 1); - DISPATCH(); + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + ERROR_NO_POP(); } - next = PyStackRef_FromPyObjectSteal(next_o); + // Jump forward by oparg and skip the following END_FOR + JUMPBY(oparg + 1); + DISPATCH(); } + next = item; } op(_FOR_ITER_TIER_TWO, (iter, null_or_index -- iter, null_or_index, next)) { - /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - EXIT_IF(!PyStackRef_IsNull(null_or_index)); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - if (!matches) { - ERROR_NO_POP(); - } - _PyEval_MonitorRaise(tstate, frame, frame->instr_ptr); - _PyErr_Clear(tstate); + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + ERROR_NO_POP(); } /* iterator ended normally */ /* The translator sets the deopt target just past the matching END_FOR */ EXIT_IF(true); } - next = PyStackRef_FromPyObjectSteal(next_o); - // Common case: no jump, leave it to the code generator + next = item; } + macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER; inst(INSTRUMENTED_FOR_ITER, (unused/1, iter, null_or_index -- iter, null_or_index, next)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (PyStackRef_IsTaggedInt(null_or_index)) { - next = _PyForIter_NextWithIndex(iter_o, null_or_index); - if (PyStackRef_IsNull(next)) { - JUMPBY(oparg + 1); - DISPATCH(); - } - null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); - INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); - } - else { - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - if (next_o != NULL) { - next = PyStackRef_FromPyObjectSteal(next_o); - INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); - } - else { - if (_PyErr_Occurred(tstate)) { - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - if (!matches) { - ERROR_NO_POP(); - } - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); - } - /* iterator ended normally */ - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - /* Skip END_FOR */ - JUMPBY(oparg + 1); - DISPATCH(); + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + ERROR_NO_POP(); } + // Jump forward by oparg and skip the following END_FOR + JUMPBY(oparg + 1); + DISPATCH(); } + next = item; + INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); } - op(_ITER_CHECK_LIST, (iter, null_or_index -- iter, null_or_index)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); EXIT_IF(Py_TYPE(iter_o) != &PyList_Type); diff --git a/Python/ceval.c b/Python/ceval.c index 5ea837e1a6ef31..4cfe4bb88f4e48 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3439,8 +3439,8 @@ _PyEval_LoadName(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject *na return value; } -_PyStackRef -_PyForIter_NextWithIndex(PyObject *seq, _PyStackRef index) +static _PyStackRef +foriter_next(PyObject *seq, _PyStackRef index) { assert(PyStackRef_IsTaggedInt(index)); assert(PyTuple_CheckExact(seq) || PyList_CheckExact(seq)); @@ -3459,6 +3459,30 @@ _PyForIter_NextWithIndex(PyObject *seq, _PyStackRef index) return PyStackRef_FromPyObjectSteal(item); } +_PyStackRef _PyForIter_VirtualIteratorNext(PyThreadState* tstate, _PyInterpreterFrame* frame, _PyStackRef iter, _PyStackRef* index_ptr) +{ + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + _PyStackRef index = *index_ptr; + if (PyStackRef_IsTaggedInt(index)) { + *index_ptr = PyStackRef_IncrementTaggedIntNoOverflow(index); + return foriter_next(iter_o, index); + } + PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + if (next_o == NULL) { + if (_PyErr_Occurred(tstate)) { + if (_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + _PyEval_MonitorRaise(tstate, frame, frame->instr_ptr); + _PyErr_Clear(tstate); + } + else { + return PyStackRef_ERROR; + } + } + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectSteal(next_o); +} + /* Check if a 'cls' provides the given special method. */ static inline int type_has_special_method(PyTypeObject *cls, PyObject *name) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 35b29940cb4a15..d19605169d5e55 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4268,33 +4268,20 @@ _PyStackRef next; null_or_index = stack_pointer[-1]; iter = stack_pointer[-2]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (!PyStackRef_IsNull(null_or_index)) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); stack_pointer = _PyFrame_GetStackPointer(frame); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (!matches) { - JUMP_TO_ERROR(); - } - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyEval_MonitorRaise(tstate, frame, frame->instr_ptr); - _PyErr_Clear(tstate); - stack_pointer = _PyFrame_GetStackPointer(frame); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + JUMP_TO_ERROR(); } if (true) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } } - next = PyStackRef_FromPyObjectSteal(next_o); + next = item; + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e3cd3b71a1de08..c8825df3ade5a5 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5753,41 +5753,18 @@ } // _FOR_ITER { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (PyStackRef_IsTaggedInt(null_or_index)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - next = _PyForIter_NextWithIndex(iter_o, null_or_index); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (PyStackRef_IsNull(next)) { - JUMPBY(oparg + 1); - DISPATCH(); - } - null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); - } - else { - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (!matches) { - JUMP_TO_LABEL(error); - } - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - JUMPBY(oparg + 1); - DISPATCH(); + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + JUMP_TO_LABEL(error); } - next = PyStackRef_FromPyObjectSteal(next_o); + JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; + DISPATCH(); } + next = item; } stack_pointer[-1] = null_or_index; stack_pointer[0] = next; @@ -7059,45 +7036,19 @@ /* Skip 1 cache entry */ null_or_index = stack_pointer[-1]; iter = stack_pointer[-2]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (PyStackRef_IsTaggedInt(null_or_index)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - next = _PyForIter_NextWithIndex(iter_o, null_or_index); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (PyStackRef_IsNull(next)) { - JUMPBY(oparg + 1); - DISPATCH(); - } - null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); - INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); - } - else { - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (next_o != NULL) { - next = PyStackRef_FromPyObjectSteal(next_o); - INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); - } - else { - if (_PyErr_Occurred(tstate)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (!matches) { - JUMP_TO_LABEL(error); - } - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - JUMPBY(oparg + 1); - DISPATCH(); + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + JUMP_TO_LABEL(error); } + JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; + DISPATCH(); } + next = item; + INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e1209209660f92..b4220e2c627ecb 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -840,6 +840,17 @@ dummy_func(void) { value = sym_new_unknown(ctx); } + op(_GET_ITER, (iterable -- iter, index_or_null)) { + if (sym_matches_type(iterable, &PyTuple_Type) || sym_matches_type(iterable, &PyList_Type)) { + iter = iterable; + index_or_null = sym_new_not_null(ctx); + } + else { + iter = sym_new_not_null(ctx); + index_or_null = sym_new_unknown(ctx); + } + } + op(_FOR_ITER_GEN_FRAME, (unused, unused -- unused, unused, gen_frame: _Py_UOpsAbstractFrame*)) { gen_frame = NULL; /* We are about to hit the end of the trace */ diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index db86edcc7859b5..960c683800455e 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1562,10 +1562,18 @@ } case _GET_ITER: { + JitOptSymbol *iterable; JitOptSymbol *iter; JitOptSymbol *index_or_null; - iter = sym_new_not_null(ctx); - index_or_null = sym_new_not_null(ctx); + iterable = stack_pointer[-1]; + if (sym_matches_type(iterable, &PyTuple_Type) || sym_matches_type(iterable, &PyList_Type)) { + iter = iterable; + index_or_null = sym_new_not_null(ctx); + } + else { + iter = sym_new_not_null(ctx); + index_or_null = sym_new_unknown(ctx); + } stack_pointer[-1] = iter; stack_pointer[0] = index_or_null; stack_pointer += 1; diff --git a/Python/stackrefs.c b/Python/stackrefs.c index b2a1369031ad2b..ecc0012ef17b39 100644 --- a/Python/stackrefs.c +++ b/Python/stackrefs.c @@ -40,6 +40,7 @@ make_table_entry(PyObject *obj, const char *filename, int linenumber) PyObject * _Py_stackref_get_object(_PyStackRef ref) { + assert(!PyStackRef_IsError(ref)); if (ref.index == 0) { return NULL; } @@ -64,6 +65,7 @@ PyStackRef_Is(_PyStackRef a, _PyStackRef b) PyObject * _Py_stackref_close(_PyStackRef ref, const char *filename, int linenumber) { + assert(!PyStackRef_IsError(ref)); PyInterpreterState *interp = PyInterpreterState_Get(); if (ref.index >= interp->next_stackref) { _Py_FatalErrorFormat(__func__, "Invalid StackRef with ID %" PRIu64 " at %s:%d\n", (void *)ref.index, filename, linenumber); @@ -128,6 +130,7 @@ _Py_stackref_create(PyObject *obj, const char *filename, int linenumber) void _Py_stackref_record_borrow(_PyStackRef ref, const char *filename, int linenumber) { + assert(!PyStackRef_IsError(ref)); if (ref.index < INITIAL_STACKREF_INDEX) { return; } @@ -152,6 +155,7 @@ _Py_stackref_record_borrow(_PyStackRef ref, const char *filename, int linenumber void _Py_stackref_associate(PyInterpreterState *interp, PyObject *obj, _PyStackRef ref) { + assert(!PyStackRef_IsError(ref)); assert(ref.index < INITIAL_STACKREF_INDEX); TableEntry *entry = make_table_entry(obj, "builtin-object", 0); if (entry == NULL) { diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 3070559db8ae57..1447f365336d82 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -681,6 +681,8 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_UntagInt", "PyStackRef_IncrementTaggedIntNoOverflow", "PyStackRef_IsNullOrInt", + "PyStackRef_IsError", + "PyStackRef_IsValid", ) diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 6b681775f48c81..df168afa88888c 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -496,7 +496,7 @@ def _push_defined_outputs(self) -> None: f"Expected '{undefined}' to be defined before '{out.name}'" else: undefined = out.name - while len(self.outputs) > self.peeks and not self.needs_defining(self.outputs[0]): + while len(self.outputs) > self.peeks and not self.needs_defining(self.outputs[self.peeks]): out = self.outputs.pop(self.peeks) self.stack.push(out) From a7d41e8aab5211f4ed7f636c41d63adcab0affba Mon Sep 17 00:00:00 2001 From: Weipeng Hong Date: Fri, 6 Jun 2025 08:44:25 +0800 Subject: [PATCH 489/989] gh-135155: Added dependencies required for compiling the _zstd module (GH-135156) --- .github/workflows/posix-deps-apt.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index d5538cd9367ec6..7773222af5d26f 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -17,6 +17,7 @@ apt-get -yq install \ libreadline6-dev \ libsqlite3-dev \ libssl-dev \ + libzstd-dev \ lzma \ lzma-dev \ strace \ From 0d9ccc87a2198a0c1881ab4b17a24fc7fec62418 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Fri, 6 Jun 2025 03:51:06 +0300 Subject: [PATCH 490/989] gh-134036: Improve error messages for invalid `raise` statements (#134077) --- Grammar/python.gram | 6 + Lib/test/test_syntax.py | 22 + ...-05-16-09-06-38.gh-issue-134036.st2e-B.rst | 2 + Parser/parser.c | 901 ++++++++++-------- 4 files changed, 529 insertions(+), 402 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-09-06-38.gh-issue-134036.st2e-B.rst diff --git a/Grammar/python.gram b/Grammar/python.gram index de435537095031..a5ab769910b5ac 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -184,6 +184,7 @@ return_stmt[stmt_ty]: | 'return' a=[star_expressions] { _PyAST_Return(a, EXTRA) } raise_stmt[stmt_ty]: + | invalid_raise_stmt | 'raise' a=expression b=['from' z=expression { z }] { _PyAST_Raise(a, b, EXTRA) } | 'raise' { _PyAST_Raise(NULL, NULL, EXTRA) } @@ -1287,6 +1288,11 @@ invalid_ann_assign_target[expr_ty]: | list | tuple | '(' a=invalid_ann_assign_target ')' { a } +invalid_raise_stmt: + | a='raise' b='from' { + RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "did you forget an expression between 'raise' and 'from'?") } + | 'raise' expression a='from' { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "did you forget an expression after 'from'?") } invalid_del_stmt: | 'del' a=star_expressions { RAISE_SYNTAX_ERROR_INVALID_TARGET(DEL_TARGETS, a) } diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index c7ac791415830f..965c096475f590 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1695,6 +1695,28 @@ ... SyntaxError: invalid syntax +Better errors for `raise` statement: + + >>> raise ValueError from + Traceback (most recent call last): + SyntaxError: did you forget an expression after 'from'? + + >>> raise mod.ValueError() from + Traceback (most recent call last): + SyntaxError: did you forget an expression after 'from'? + + >>> raise from exc + Traceback (most recent call last): + SyntaxError: did you forget an expression between 'raise' and 'from'? + + >>> raise from None + Traceback (most recent call last): + SyntaxError: did you forget an expression between 'raise' and 'from'? + + >>> raise from + Traceback (most recent call last): + SyntaxError: did you forget an expression between 'raise' and 'from'? + Check that an multiple exception types with missing parentheses raise a custom exception only when using 'as' diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-09-06-38.gh-issue-134036.st2e-B.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-09-06-38.gh-issue-134036.st2e-B.rst new file mode 100644 index 00000000000000..176aab1c93ab83 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-16-09-06-38.gh-issue-134036.st2e-B.rst @@ -0,0 +1,2 @@ +Improve :exc:`SyntaxError` message when using invalid :keyword:`raise` +statements. diff --git a/Parser/parser.c b/Parser/parser.c index d5aafef826ed3a..82311b4f40eebf 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -21,54 +21,54 @@ static KeywordToken *reserved_keywords[] = { (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) { - {"if", 682}, - {"as", 680}, - {"in", 695}, + {"if", 686}, + {"as", 684}, + {"in", 699}, {"or", 588}, {"is", 596}, {NULL, -1}, }, (KeywordToken[]) { - {"del", 625}, - {"def", 699}, - {"for", 694}, - {"try", 656}, + {"del", 629}, + {"def", 703}, + {"for", 698}, + {"try", 660}, {"and", 589}, - {"not", 703}, + {"not", 707}, {NULL, -1}, }, (KeywordToken[]) { - {"from", 633}, + {"from", 637}, {"pass", 526}, - {"with", 647}, - {"elif", 687}, - {"else", 686}, + {"with", 651}, + {"elif", 691}, + {"else", 690}, {"None", 623}, {"True", 622}, {NULL, -1}, }, (KeywordToken[]) { - {"raise", 525}, + {"raise", 627}, {"yield", 587}, {"break", 527}, - {"async", 698}, - {"class", 701}, - {"while", 689}, + {"async", 702}, + {"class", 705}, + {"while", 693}, {"False", 624}, {"await", 597}, {NULL, -1}, }, (KeywordToken[]) { {"return", 522}, - {"import", 634}, + {"import", 638}, {"assert", 532}, {"global", 529}, - {"except", 677}, + {"except", 681}, {"lambda", 621}, {NULL, -1}, }, (KeywordToken[]) { - {"finally", 673}, + {"finally", 677}, {NULL, -1}, }, (KeywordToken[]) { @@ -298,235 +298,236 @@ static char *soft_keywords[] = { #define invalid_named_expression_type 1211 #define invalid_assignment_type 1212 #define invalid_ann_assign_target_type 1213 -#define invalid_del_stmt_type 1214 -#define invalid_block_type 1215 -#define invalid_comprehension_type 1216 -#define invalid_dict_comprehension_type 1217 -#define invalid_parameters_type 1218 -#define invalid_default_type 1219 -#define invalid_star_etc_type 1220 -#define invalid_kwds_type 1221 -#define invalid_parameters_helper_type 1222 -#define invalid_lambda_parameters_type 1223 -#define invalid_lambda_parameters_helper_type 1224 -#define invalid_lambda_star_etc_type 1225 -#define invalid_lambda_kwds_type 1226 -#define invalid_double_type_comments_type 1227 -#define invalid_with_item_type 1228 -#define invalid_for_if_clause_type 1229 -#define invalid_for_target_type 1230 -#define invalid_group_type 1231 -#define invalid_import_type 1232 -#define invalid_dotted_as_name_type 1233 -#define invalid_import_from_as_name_type 1234 -#define invalid_import_from_targets_type 1235 -#define invalid_with_stmt_type 1236 -#define invalid_with_stmt_indent_type 1237 -#define invalid_try_stmt_type 1238 -#define invalid_except_stmt_type 1239 -#define invalid_except_star_stmt_type 1240 -#define invalid_finally_stmt_type 1241 -#define invalid_except_stmt_indent_type 1242 -#define invalid_except_star_stmt_indent_type 1243 -#define invalid_match_stmt_type 1244 -#define invalid_case_block_type 1245 -#define invalid_as_pattern_type 1246 -#define invalid_class_pattern_type 1247 -#define invalid_class_argument_pattern_type 1248 -#define invalid_if_stmt_type 1249 -#define invalid_elif_stmt_type 1250 -#define invalid_else_stmt_type 1251 -#define invalid_while_stmt_type 1252 -#define invalid_for_stmt_type 1253 -#define invalid_def_raw_type 1254 -#define invalid_class_def_raw_type 1255 -#define invalid_double_starred_kvpairs_type 1256 -#define invalid_kvpair_type 1257 -#define invalid_starred_expression_unpacking_type 1258 -#define invalid_starred_expression_type 1259 -#define invalid_fstring_replacement_field_type 1260 -#define invalid_fstring_conversion_character_type 1261 -#define invalid_tstring_replacement_field_type 1262 -#define invalid_tstring_conversion_character_type 1263 -#define invalid_arithmetic_type 1264 -#define invalid_factor_type 1265 -#define invalid_type_params_type 1266 -#define _loop0_1_type 1267 -#define _loop1_2_type 1268 -#define _loop0_3_type 1269 -#define _gather_4_type 1270 -#define _tmp_5_type 1271 -#define _tmp_6_type 1272 -#define _tmp_7_type 1273 -#define _tmp_8_type 1274 -#define _tmp_9_type 1275 -#define _tmp_10_type 1276 -#define _tmp_11_type 1277 -#define _loop1_12_type 1278 -#define _tmp_13_type 1279 -#define _loop0_14_type 1280 -#define _gather_15_type 1281 -#define _tmp_16_type 1282 -#define _tmp_17_type 1283 -#define _loop0_18_type 1284 -#define _loop1_19_type 1285 -#define _loop0_20_type 1286 -#define _gather_21_type 1287 -#define _tmp_22_type 1288 -#define _loop0_23_type 1289 -#define _gather_24_type 1290 -#define _loop1_25_type 1291 -#define _tmp_26_type 1292 -#define _tmp_27_type 1293 -#define _loop0_28_type 1294 -#define _loop0_29_type 1295 -#define _loop1_30_type 1296 -#define _loop1_31_type 1297 -#define _loop0_32_type 1298 -#define _loop1_33_type 1299 -#define _loop0_34_type 1300 -#define _gather_35_type 1301 -#define _tmp_36_type 1302 -#define _loop1_37_type 1303 -#define _loop1_38_type 1304 -#define _loop1_39_type 1305 -#define _loop0_40_type 1306 -#define _gather_41_type 1307 -#define _tmp_42_type 1308 -#define _tmp_43_type 1309 -#define _tmp_44_type 1310 -#define _loop0_45_type 1311 -#define _gather_46_type 1312 -#define _loop0_47_type 1313 -#define _gather_48_type 1314 -#define _tmp_49_type 1315 -#define _loop0_50_type 1316 -#define _gather_51_type 1317 -#define _loop0_52_type 1318 -#define _gather_53_type 1319 -#define _loop0_54_type 1320 -#define _gather_55_type 1321 -#define _loop1_56_type 1322 -#define _loop1_57_type 1323 -#define _loop0_58_type 1324 -#define _gather_59_type 1325 -#define _loop1_60_type 1326 -#define _loop1_61_type 1327 -#define _loop1_62_type 1328 -#define _tmp_63_type 1329 -#define _loop0_64_type 1330 -#define _gather_65_type 1331 -#define _tmp_66_type 1332 -#define _tmp_67_type 1333 -#define _tmp_68_type 1334 -#define _tmp_69_type 1335 -#define _tmp_70_type 1336 -#define _loop0_71_type 1337 -#define _loop0_72_type 1338 -#define _loop1_73_type 1339 -#define _loop1_74_type 1340 -#define _loop0_75_type 1341 -#define _loop1_76_type 1342 -#define _loop0_77_type 1343 -#define _loop0_78_type 1344 -#define _loop0_79_type 1345 -#define _loop0_80_type 1346 -#define _loop1_81_type 1347 -#define _tmp_82_type 1348 -#define _loop0_83_type 1349 -#define _gather_84_type 1350 -#define _loop1_85_type 1351 -#define _loop0_86_type 1352 -#define _tmp_87_type 1353 -#define _loop0_88_type 1354 -#define _gather_89_type 1355 -#define _tmp_90_type 1356 -#define _loop0_91_type 1357 -#define _gather_92_type 1358 -#define _loop0_93_type 1359 -#define _gather_94_type 1360 -#define _loop0_95_type 1361 -#define _loop0_96_type 1362 -#define _gather_97_type 1363 -#define _loop1_98_type 1364 -#define _tmp_99_type 1365 -#define _loop0_100_type 1366 -#define _gather_101_type 1367 -#define _loop0_102_type 1368 -#define _gather_103_type 1369 -#define _tmp_104_type 1370 -#define _tmp_105_type 1371 -#define _loop0_106_type 1372 -#define _gather_107_type 1373 -#define _tmp_108_type 1374 -#define _tmp_109_type 1375 -#define _tmp_110_type 1376 -#define _tmp_111_type 1377 -#define _tmp_112_type 1378 -#define _loop1_113_type 1379 -#define _tmp_114_type 1380 -#define _tmp_115_type 1381 -#define _tmp_116_type 1382 -#define _tmp_117_type 1383 -#define _tmp_118_type 1384 -#define _loop0_119_type 1385 -#define _loop0_120_type 1386 -#define _tmp_121_type 1387 -#define _tmp_122_type 1388 -#define _tmp_123_type 1389 -#define _tmp_124_type 1390 -#define _tmp_125_type 1391 -#define _tmp_126_type 1392 -#define _tmp_127_type 1393 -#define _tmp_128_type 1394 -#define _tmp_129_type 1395 -#define _loop0_130_type 1396 -#define _gather_131_type 1397 -#define _tmp_132_type 1398 -#define _tmp_133_type 1399 -#define _tmp_134_type 1400 -#define _tmp_135_type 1401 -#define _loop0_136_type 1402 -#define _gather_137_type 1403 -#define _tmp_138_type 1404 -#define _loop0_139_type 1405 -#define _gather_140_type 1406 -#define _loop0_141_type 1407 -#define _gather_142_type 1408 -#define _tmp_143_type 1409 -#define _loop0_144_type 1410 -#define _tmp_145_type 1411 -#define _tmp_146_type 1412 -#define _tmp_147_type 1413 -#define _tmp_148_type 1414 -#define _tmp_149_type 1415 -#define _tmp_150_type 1416 -#define _tmp_151_type 1417 -#define _tmp_152_type 1418 -#define _tmp_153_type 1419 -#define _tmp_154_type 1420 -#define _tmp_155_type 1421 -#define _tmp_156_type 1422 -#define _tmp_157_type 1423 -#define _tmp_158_type 1424 -#define _tmp_159_type 1425 -#define _tmp_160_type 1426 -#define _tmp_161_type 1427 -#define _tmp_162_type 1428 -#define _tmp_163_type 1429 -#define _tmp_164_type 1430 -#define _tmp_165_type 1431 -#define _tmp_166_type 1432 -#define _tmp_167_type 1433 -#define _tmp_168_type 1434 -#define _tmp_169_type 1435 -#define _tmp_170_type 1436 -#define _loop0_171_type 1437 -#define _tmp_172_type 1438 -#define _tmp_173_type 1439 -#define _tmp_174_type 1440 -#define _tmp_175_type 1441 -#define _tmp_176_type 1442 +#define invalid_raise_stmt_type 1214 +#define invalid_del_stmt_type 1215 +#define invalid_block_type 1216 +#define invalid_comprehension_type 1217 +#define invalid_dict_comprehension_type 1218 +#define invalid_parameters_type 1219 +#define invalid_default_type 1220 +#define invalid_star_etc_type 1221 +#define invalid_kwds_type 1222 +#define invalid_parameters_helper_type 1223 +#define invalid_lambda_parameters_type 1224 +#define invalid_lambda_parameters_helper_type 1225 +#define invalid_lambda_star_etc_type 1226 +#define invalid_lambda_kwds_type 1227 +#define invalid_double_type_comments_type 1228 +#define invalid_with_item_type 1229 +#define invalid_for_if_clause_type 1230 +#define invalid_for_target_type 1231 +#define invalid_group_type 1232 +#define invalid_import_type 1233 +#define invalid_dotted_as_name_type 1234 +#define invalid_import_from_as_name_type 1235 +#define invalid_import_from_targets_type 1236 +#define invalid_with_stmt_type 1237 +#define invalid_with_stmt_indent_type 1238 +#define invalid_try_stmt_type 1239 +#define invalid_except_stmt_type 1240 +#define invalid_except_star_stmt_type 1241 +#define invalid_finally_stmt_type 1242 +#define invalid_except_stmt_indent_type 1243 +#define invalid_except_star_stmt_indent_type 1244 +#define invalid_match_stmt_type 1245 +#define invalid_case_block_type 1246 +#define invalid_as_pattern_type 1247 +#define invalid_class_pattern_type 1248 +#define invalid_class_argument_pattern_type 1249 +#define invalid_if_stmt_type 1250 +#define invalid_elif_stmt_type 1251 +#define invalid_else_stmt_type 1252 +#define invalid_while_stmt_type 1253 +#define invalid_for_stmt_type 1254 +#define invalid_def_raw_type 1255 +#define invalid_class_def_raw_type 1256 +#define invalid_double_starred_kvpairs_type 1257 +#define invalid_kvpair_type 1258 +#define invalid_starred_expression_unpacking_type 1259 +#define invalid_starred_expression_type 1260 +#define invalid_fstring_replacement_field_type 1261 +#define invalid_fstring_conversion_character_type 1262 +#define invalid_tstring_replacement_field_type 1263 +#define invalid_tstring_conversion_character_type 1264 +#define invalid_arithmetic_type 1265 +#define invalid_factor_type 1266 +#define invalid_type_params_type 1267 +#define _loop0_1_type 1268 +#define _loop1_2_type 1269 +#define _loop0_3_type 1270 +#define _gather_4_type 1271 +#define _tmp_5_type 1272 +#define _tmp_6_type 1273 +#define _tmp_7_type 1274 +#define _tmp_8_type 1275 +#define _tmp_9_type 1276 +#define _tmp_10_type 1277 +#define _tmp_11_type 1278 +#define _loop1_12_type 1279 +#define _tmp_13_type 1280 +#define _loop0_14_type 1281 +#define _gather_15_type 1282 +#define _tmp_16_type 1283 +#define _tmp_17_type 1284 +#define _loop0_18_type 1285 +#define _loop1_19_type 1286 +#define _loop0_20_type 1287 +#define _gather_21_type 1288 +#define _tmp_22_type 1289 +#define _loop0_23_type 1290 +#define _gather_24_type 1291 +#define _loop1_25_type 1292 +#define _tmp_26_type 1293 +#define _tmp_27_type 1294 +#define _loop0_28_type 1295 +#define _loop0_29_type 1296 +#define _loop1_30_type 1297 +#define _loop1_31_type 1298 +#define _loop0_32_type 1299 +#define _loop1_33_type 1300 +#define _loop0_34_type 1301 +#define _gather_35_type 1302 +#define _tmp_36_type 1303 +#define _loop1_37_type 1304 +#define _loop1_38_type 1305 +#define _loop1_39_type 1306 +#define _loop0_40_type 1307 +#define _gather_41_type 1308 +#define _tmp_42_type 1309 +#define _tmp_43_type 1310 +#define _tmp_44_type 1311 +#define _loop0_45_type 1312 +#define _gather_46_type 1313 +#define _loop0_47_type 1314 +#define _gather_48_type 1315 +#define _tmp_49_type 1316 +#define _loop0_50_type 1317 +#define _gather_51_type 1318 +#define _loop0_52_type 1319 +#define _gather_53_type 1320 +#define _loop0_54_type 1321 +#define _gather_55_type 1322 +#define _loop1_56_type 1323 +#define _loop1_57_type 1324 +#define _loop0_58_type 1325 +#define _gather_59_type 1326 +#define _loop1_60_type 1327 +#define _loop1_61_type 1328 +#define _loop1_62_type 1329 +#define _tmp_63_type 1330 +#define _loop0_64_type 1331 +#define _gather_65_type 1332 +#define _tmp_66_type 1333 +#define _tmp_67_type 1334 +#define _tmp_68_type 1335 +#define _tmp_69_type 1336 +#define _tmp_70_type 1337 +#define _loop0_71_type 1338 +#define _loop0_72_type 1339 +#define _loop1_73_type 1340 +#define _loop1_74_type 1341 +#define _loop0_75_type 1342 +#define _loop1_76_type 1343 +#define _loop0_77_type 1344 +#define _loop0_78_type 1345 +#define _loop0_79_type 1346 +#define _loop0_80_type 1347 +#define _loop1_81_type 1348 +#define _tmp_82_type 1349 +#define _loop0_83_type 1350 +#define _gather_84_type 1351 +#define _loop1_85_type 1352 +#define _loop0_86_type 1353 +#define _tmp_87_type 1354 +#define _loop0_88_type 1355 +#define _gather_89_type 1356 +#define _tmp_90_type 1357 +#define _loop0_91_type 1358 +#define _gather_92_type 1359 +#define _loop0_93_type 1360 +#define _gather_94_type 1361 +#define _loop0_95_type 1362 +#define _loop0_96_type 1363 +#define _gather_97_type 1364 +#define _loop1_98_type 1365 +#define _tmp_99_type 1366 +#define _loop0_100_type 1367 +#define _gather_101_type 1368 +#define _loop0_102_type 1369 +#define _gather_103_type 1370 +#define _tmp_104_type 1371 +#define _tmp_105_type 1372 +#define _loop0_106_type 1373 +#define _gather_107_type 1374 +#define _tmp_108_type 1375 +#define _tmp_109_type 1376 +#define _tmp_110_type 1377 +#define _tmp_111_type 1378 +#define _tmp_112_type 1379 +#define _loop1_113_type 1380 +#define _tmp_114_type 1381 +#define _tmp_115_type 1382 +#define _tmp_116_type 1383 +#define _tmp_117_type 1384 +#define _tmp_118_type 1385 +#define _loop0_119_type 1386 +#define _loop0_120_type 1387 +#define _tmp_121_type 1388 +#define _tmp_122_type 1389 +#define _tmp_123_type 1390 +#define _tmp_124_type 1391 +#define _tmp_125_type 1392 +#define _tmp_126_type 1393 +#define _tmp_127_type 1394 +#define _tmp_128_type 1395 +#define _tmp_129_type 1396 +#define _loop0_130_type 1397 +#define _gather_131_type 1398 +#define _tmp_132_type 1399 +#define _tmp_133_type 1400 +#define _tmp_134_type 1401 +#define _tmp_135_type 1402 +#define _loop0_136_type 1403 +#define _gather_137_type 1404 +#define _tmp_138_type 1405 +#define _loop0_139_type 1406 +#define _gather_140_type 1407 +#define _loop0_141_type 1408 +#define _gather_142_type 1409 +#define _tmp_143_type 1410 +#define _loop0_144_type 1411 +#define _tmp_145_type 1412 +#define _tmp_146_type 1413 +#define _tmp_147_type 1414 +#define _tmp_148_type 1415 +#define _tmp_149_type 1416 +#define _tmp_150_type 1417 +#define _tmp_151_type 1418 +#define _tmp_152_type 1419 +#define _tmp_153_type 1420 +#define _tmp_154_type 1421 +#define _tmp_155_type 1422 +#define _tmp_156_type 1423 +#define _tmp_157_type 1424 +#define _tmp_158_type 1425 +#define _tmp_159_type 1426 +#define _tmp_160_type 1427 +#define _tmp_161_type 1428 +#define _tmp_162_type 1429 +#define _tmp_163_type 1430 +#define _tmp_164_type 1431 +#define _tmp_165_type 1432 +#define _tmp_166_type 1433 +#define _tmp_167_type 1434 +#define _tmp_168_type 1435 +#define _tmp_169_type 1436 +#define _tmp_170_type 1437 +#define _loop0_171_type 1438 +#define _tmp_172_type 1439 +#define _tmp_173_type 1440 +#define _tmp_174_type 1441 +#define _tmp_175_type 1442 +#define _tmp_176_type 1443 static mod_ty file_rule(Parser *p); static mod_ty interactive_rule(Parser *p); @@ -742,6 +743,7 @@ static void *invalid_expression_rule(Parser *p); static void *invalid_named_expression_rule(Parser *p); static void *invalid_assignment_rule(Parser *p); static expr_ty invalid_ann_assign_target_rule(Parser *p); +static void *invalid_raise_stmt_rule(Parser *p); static void *invalid_del_stmt_rule(Parser *p); static void *invalid_block_rule(Parser *p); static void *invalid_comprehension_rule(Parser *p); @@ -1698,7 +1700,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'raise' raise_stmt")); stmt_ty raise_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 525) // token='raise' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 627) // token='raise' && (raise_stmt_var = raise_stmt_rule(p)) // raise_stmt ) @@ -1740,7 +1742,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'del' del_stmt")); stmt_ty del_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 625) // token='del' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 629) // token='del' && (del_stmt_var = del_stmt_rule(p)) // del_stmt ) @@ -1936,7 +1938,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'if' if_stmt")); stmt_ty if_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 682) // token='if' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 686) // token='if' && (if_stmt_var = if_stmt_rule(p)) // if_stmt ) @@ -2020,7 +2022,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'try' try_stmt")); stmt_ty try_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 656) // token='try' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 660) // token='try' && (try_stmt_var = try_stmt_rule(p)) // try_stmt ) @@ -2041,7 +2043,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'while' while_stmt")); stmt_ty while_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 689) // token='while' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 693) // token='while' && (while_stmt_var = while_stmt_rule(p)) // while_stmt ) @@ -2767,7 +2769,7 @@ return_stmt_rule(Parser *p) return _res; } -// raise_stmt: 'raise' expression ['from' expression] | 'raise' +// raise_stmt: invalid_raise_stmt | 'raise' expression ['from' expression] | 'raise' static stmt_ty raise_stmt_rule(Parser *p) { @@ -2789,6 +2791,25 @@ raise_stmt_rule(Parser *p) UNUSED(_start_lineno); // Only used by EXTRA macro int _start_col_offset = p->tokens[_mark]->col_offset; UNUSED(_start_col_offset); // Only used by EXTRA macro + if (p->call_invalid_rules) { // invalid_raise_stmt + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> raise_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "invalid_raise_stmt")); + void *invalid_raise_stmt_var; + if ( + (invalid_raise_stmt_var = invalid_raise_stmt_rule(p)) // invalid_raise_stmt + ) + { + D(fprintf(stderr, "%*c+ raise_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "invalid_raise_stmt")); + _res = invalid_raise_stmt_var; + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s raise_stmt[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "invalid_raise_stmt")); + } { // 'raise' expression ['from' expression] if (p->error_indicator) { p->level--; @@ -2799,7 +2820,7 @@ raise_stmt_rule(Parser *p) expr_ty a; void *b; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='raise' + (_keyword = _PyPegen_expect_token(p, 627)) // token='raise' && (a = expression_rule(p)) // expression && @@ -2836,7 +2857,7 @@ raise_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> raise_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'raise'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='raise' + (_keyword = _PyPegen_expect_token(p, 627)) // token='raise' ) { D(fprintf(stderr, "%*c+ raise_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'raise'")); @@ -3209,7 +3230,7 @@ del_stmt_rule(Parser *p) Token * _keyword; asdl_expr_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 625)) // token='del' + (_keyword = _PyPegen_expect_token(p, 629)) // token='del' && (a = del_targets_rule(p)) // del_targets && @@ -3498,7 +3519,7 @@ import_name_rule(Parser *p) Token * _keyword; asdl_alias_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 634)) // token='import' + (_keyword = _PyPegen_expect_token(p, 638)) // token='import' && (a = dotted_as_names_rule(p)) // dotted_as_names ) @@ -3567,13 +3588,13 @@ import_from_rule(Parser *p) expr_ty b; asdl_alias_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 633)) // token='from' + (_keyword = _PyPegen_expect_token(p, 637)) // token='from' && (a = _loop0_18_rule(p)) // (('.' | '...'))* && (b = dotted_name_rule(p)) // dotted_name && - (_keyword_1 = _PyPegen_expect_token(p, 634)) // token='import' + (_keyword_1 = _PyPegen_expect_token(p, 638)) // token='import' && (c = import_from_targets_rule(p)) // import_from_targets ) @@ -3611,11 +3632,11 @@ import_from_rule(Parser *p) asdl_seq * a; asdl_alias_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 633)) // token='from' + (_keyword = _PyPegen_expect_token(p, 637)) // token='from' && (a = _loop1_19_rule(p)) // (('.' | '...'))+ && - (_keyword_1 = _PyPegen_expect_token(p, 634)) // token='import' + (_keyword_1 = _PyPegen_expect_token(p, 638)) // token='import' && (b = import_from_targets_rule(p)) // import_from_targets ) @@ -4402,7 +4423,7 @@ class_def_raw_rule(Parser *p) asdl_stmt_seq* c; void *t; if ( - (_keyword = _PyPegen_expect_token(p, 701)) // token='class' + (_keyword = _PyPegen_expect_token(p, 705)) // token='class' && (a = _PyPegen_name_token(p)) // NAME && @@ -4569,7 +4590,7 @@ function_def_raw_rule(Parser *p) void *t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 699)) // token='def' + (_keyword = _PyPegen_expect_token(p, 703)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -4630,9 +4651,9 @@ function_def_raw_rule(Parser *p) void *t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 698)) // token='async' + (_keyword = _PyPegen_expect_token(p, 702)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 699)) // token='def' + (_keyword_1 = _PyPegen_expect_token(p, 703)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -5970,7 +5991,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -6015,7 +6036,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -6110,7 +6131,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 687)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 691)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -6155,7 +6176,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 687)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 691)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -6236,7 +6257,7 @@ else_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 686)) // token='else' + (_keyword = _PyPegen_expect_token(p, 690)) // token='else' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6315,7 +6336,7 @@ while_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 689)) // token='while' + (_keyword = _PyPegen_expect_token(p, 693)) // token='while' && (a = named_expression_rule(p)) // named_expression && @@ -6415,11 +6436,11 @@ for_stmt_rule(Parser *p) expr_ty t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 694)) // token='for' + (_keyword = _PyPegen_expect_token(p, 698)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 695)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 699)) // token='in' && (_cut_var = 1) && @@ -6477,13 +6498,13 @@ for_stmt_rule(Parser *p) expr_ty t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 698)) // token='async' + (_keyword = _PyPegen_expect_token(p, 702)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 694)) // token='for' + (_keyword_1 = _PyPegen_expect_token(p, 698)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_2 = _PyPegen_expect_token(p, 695)) // token='in' + (_keyword_2 = _PyPegen_expect_token(p, 699)) // token='in' && (_cut_var = 1) && @@ -6612,7 +6633,7 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 647)) // token='with' + (_keyword = _PyPegen_expect_token(p, 651)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -6663,7 +6684,7 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 647)) // token='with' + (_keyword = _PyPegen_expect_token(p, 651)) // token='with' && (a = (asdl_withitem_seq*)_gather_35_rule(p)) // ','.with_item+ && @@ -6712,9 +6733,9 @@ with_stmt_rule(Parser *p) asdl_withitem_seq* a; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 698)) // token='async' + (_keyword = _PyPegen_expect_token(p, 702)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 647)) // token='with' + (_keyword_1 = _PyPegen_expect_token(p, 651)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -6764,9 +6785,9 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 698)) // token='async' + (_keyword = _PyPegen_expect_token(p, 702)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 647)) // token='with' + (_keyword_1 = _PyPegen_expect_token(p, 651)) // token='with' && (a = (asdl_withitem_seq*)_gather_35_rule(p)) // ','.with_item+ && @@ -6852,7 +6873,7 @@ with_item_rule(Parser *p) if ( (e = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && (t = star_target_rule(p)) // star_target && @@ -6977,7 +6998,7 @@ try_stmt_rule(Parser *p) asdl_stmt_seq* b; asdl_stmt_seq* f; if ( - (_keyword = _PyPegen_expect_token(p, 656)) // token='try' + (_keyword = _PyPegen_expect_token(p, 660)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7021,7 +7042,7 @@ try_stmt_rule(Parser *p) asdl_excepthandler_seq* ex; void *f; if ( - (_keyword = _PyPegen_expect_token(p, 656)) // token='try' + (_keyword = _PyPegen_expect_token(p, 660)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7069,7 +7090,7 @@ try_stmt_rule(Parser *p) asdl_excepthandler_seq* ex; void *f; if ( - (_keyword = _PyPegen_expect_token(p, 656)) // token='try' + (_keyword = _PyPegen_expect_token(p, 660)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7168,7 +7189,7 @@ except_block_rule(Parser *p) asdl_stmt_seq* b; expr_ty e; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (e = expression_rule(p)) // expression && @@ -7212,11 +7233,11 @@ except_block_rule(Parser *p) expr_ty e; expr_ty t; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (e = expression_rule(p)) // expression && - (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword_1 = _PyPegen_expect_token(p, 684)) // token='as' && (t = _PyPegen_name_token(p)) // NAME && @@ -7258,7 +7279,7 @@ except_block_rule(Parser *p) asdl_stmt_seq* b; expr_ty e; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (e = expressions_rule(p)) // expressions && @@ -7299,7 +7320,7 @@ except_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -7411,7 +7432,7 @@ except_star_block_rule(Parser *p) asdl_stmt_seq* b; expr_ty e; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -7458,13 +7479,13 @@ except_star_block_rule(Parser *p) expr_ty e; expr_ty t; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && (e = expression_rule(p)) // expression && - (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword_1 = _PyPegen_expect_token(p, 684)) // token='as' && (t = _PyPegen_name_token(p)) // NAME && @@ -7507,7 +7528,7 @@ except_star_block_rule(Parser *p) asdl_stmt_seq* b; expr_ty e; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -7607,7 +7628,7 @@ finally_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 673)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 677)) // token='finally' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7915,7 +7936,7 @@ guard_rule(Parser *p) Token * _keyword; expr_ty guard; if ( - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (guard = named_expression_rule(p)) // named_expression ) @@ -8110,7 +8131,7 @@ as_pattern_rule(Parser *p) if ( (pattern = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && (target = pattern_capture_target_rule(p)) // pattern_capture_target ) @@ -11407,11 +11428,11 @@ expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 686)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 690)) // token='else' && (c = expression_rule(p)) // expression ) @@ -11517,7 +11538,7 @@ yield_expr_rule(Parser *p) if ( (_keyword = _PyPegen_expect_token(p, 587)) // token='yield' && - (_keyword_1 = _PyPegen_expect_token(p, 633)) // token='from' + (_keyword_1 = _PyPegen_expect_token(p, 637)) // token='from' && (a = expression_rule(p)) // expression ) @@ -12293,7 +12314,7 @@ inversion_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 703)) // token='not' + (_keyword = _PyPegen_expect_token(p, 707)) // token='not' && (a = inversion_rule(p)) // inversion ) @@ -12947,9 +12968,9 @@ notin_bitwise_or_rule(Parser *p) Token * _keyword_1; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 703)) // token='not' + (_keyword = _PyPegen_expect_token(p, 707)) // token='not' && - (_keyword_1 = _PyPegen_expect_token(p, 695)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 699)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -12995,7 +13016,7 @@ in_bitwise_or_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 695)) // token='in' + (_keyword = _PyPegen_expect_token(p, 699)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -13044,7 +13065,7 @@ isnot_bitwise_or_rule(Parser *p) if ( (_keyword = _PyPegen_expect_token(p, 596)) // token='is' && - (_keyword_1 = _PyPegen_expect_token(p, 703)) // token='not' + (_keyword_1 = _PyPegen_expect_token(p, 707)) // token='not' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -17651,13 +17672,13 @@ for_if_clause_rule(Parser *p) expr_ty b; asdl_expr_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 698)) // token='async' + (_keyword = _PyPegen_expect_token(p, 702)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 694)) // token='for' + (_keyword_1 = _PyPegen_expect_token(p, 698)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_2 = _PyPegen_expect_token(p, 695)) // token='in' + (_keyword_2 = _PyPegen_expect_token(p, 699)) // token='in' && (_cut_var = 1) && @@ -17696,11 +17717,11 @@ for_if_clause_rule(Parser *p) expr_ty b; asdl_expr_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 694)) // token='for' + (_keyword = _PyPegen_expect_token(p, 698)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 695)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 699)) // token='in' && (_cut_var = 1) && @@ -21001,11 +21022,11 @@ expression_without_invalid_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 686)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 690)) // token='else' && (c = expression_rule(p)) // expression ) @@ -21305,7 +21326,7 @@ invalid_expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (b = disjunction_rule(p)) // disjunction && @@ -21338,11 +21359,11 @@ invalid_expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 686)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 690)) // token='else' && _PyPegen_lookahead(0, (void *(*)(Parser *)) expression_rule, p) ) @@ -21374,11 +21395,11 @@ invalid_expression_rule(Parser *p) if ( (a = (stmt_ty)_tmp_116_rule(p)) // pass_stmt | break_stmt | continue_stmt && - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 686)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 690)) // token='else' && (c = simple_stmt_rule(p)) // simple_stmt ) @@ -21889,6 +21910,82 @@ invalid_ann_assign_target_rule(Parser *p) return _res; } +// invalid_raise_stmt: 'raise' 'from' | 'raise' expression 'from' +static void * +invalid_raise_stmt_rule(Parser *p) +{ + if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { + _Pypegen_stack_overflow(p); + } + if (p->error_indicator) { + p->level--; + return NULL; + } + void * _res = NULL; + int _mark = p->mark; + { // 'raise' 'from' + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> invalid_raise_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'raise' 'from'")); + Token * a; + Token * b; + if ( + (a = _PyPegen_expect_token(p, 627)) // token='raise' + && + (b = _PyPegen_expect_token(p, 637)) // token='from' + ) + { + D(fprintf(stderr, "%*c+ invalid_raise_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'raise' 'from'")); + _res = RAISE_SYNTAX_ERROR_KNOWN_RANGE ( a , b , "did you forget an expression between 'raise' and 'from'?" ); + if (_res == NULL && PyErr_Occurred()) { + p->error_indicator = 1; + p->level--; + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s invalid_raise_stmt[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'raise' 'from'")); + } + { // 'raise' expression 'from' + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> invalid_raise_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'raise' expression 'from'")); + Token * _keyword; + Token * a; + expr_ty expression_var; + if ( + (_keyword = _PyPegen_expect_token(p, 627)) // token='raise' + && + (expression_var = expression_rule(p)) // expression + && + (a = _PyPegen_expect_token(p, 637)) // token='from' + ) + { + D(fprintf(stderr, "%*c+ invalid_raise_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'raise' expression 'from'")); + _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "did you forget an expression after 'from'?" ); + if (_res == NULL && PyErr_Occurred()) { + p->error_indicator = 1; + p->level--; + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s invalid_raise_stmt[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'raise' expression 'from'")); + } + _res = NULL; + done: + p->level--; + return _res; +} + // invalid_del_stmt: 'del' star_expressions static void * invalid_del_stmt_rule(Parser *p) @@ -21911,7 +22008,7 @@ invalid_del_stmt_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 625)) // token='del' + (_keyword = _PyPegen_expect_token(p, 629)) // token='del' && (a = star_expressions_rule(p)) // star_expressions ) @@ -23347,7 +23444,7 @@ invalid_with_item_rule(Parser *p) if ( (expression_var = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && (a = expression_rule(p)) // expression && @@ -23397,13 +23494,13 @@ invalid_for_if_clause_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings void *_tmp_135_var; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 694)) // token='for' + (_keyword = _PyPegen_expect_token(p, 698)) // token='for' && (_tmp_135_var = _tmp_135_rule(p)) // bitwise_or ((',' bitwise_or))* ','? && - _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 695) // token='in' + _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 699) // token='in' ) { D(fprintf(stderr, "%*c+ invalid_for_if_clause[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'? 'for' (bitwise_or ((',' bitwise_or))* ','?) !'in'")); @@ -23449,9 +23546,9 @@ invalid_for_target_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings expr_ty a; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 694)) // token='for' + (_keyword = _PyPegen_expect_token(p, 698)) // token='for' && (a = star_expressions_rule(p)) // star_expressions ) @@ -23581,11 +23678,11 @@ invalid_import_rule(Parser *p) Token * a; expr_ty dotted_name_var; if ( - (a = _PyPegen_expect_token(p, 634)) // token='import' + (a = _PyPegen_expect_token(p, 638)) // token='import' && (_gather_137_var = _gather_137_rule(p)) // ','.dotted_name+ && - (_keyword = _PyPegen_expect_token(p, 633)) // token='from' + (_keyword = _PyPegen_expect_token(p, 637)) // token='from' && (dotted_name_var = dotted_name_rule(p)) // dotted_name ) @@ -23612,7 +23709,7 @@ invalid_import_rule(Parser *p) Token * _keyword; Token * token; if ( - (_keyword = _PyPegen_expect_token(p, 634)) // token='import' + (_keyword = _PyPegen_expect_token(p, 638)) // token='import' && (token = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) @@ -23661,7 +23758,7 @@ invalid_dotted_as_name_rule(Parser *p) if ( (dotted_name_var = dotted_name_rule(p)) // dotted_name && - (_keyword = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_138_rule, p) && @@ -23712,7 +23809,7 @@ invalid_import_from_as_name_rule(Parser *p) if ( (name_var = _PyPegen_name_token(p)) // NAME && - (_keyword = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_138_rule, p) && @@ -23838,9 +23935,9 @@ invalid_with_stmt_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 647)) // token='with' + (_keyword = _PyPegen_expect_token(p, 651)) // token='with' && (_gather_140_var = _gather_140_rule(p)) // ','.(expression ['as' star_target])+ && @@ -23876,9 +23973,9 @@ invalid_with_stmt_rule(Parser *p) UNUSED(_opt_var_1); // Silence compiler warnings Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 647)) // token='with' + (_keyword = _PyPegen_expect_token(p, 651)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -23938,9 +24035,9 @@ invalid_with_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 647)) // token='with' + (a = _PyPegen_expect_token(p, 651)) // token='with' && (_gather_140_var = _gather_140_rule(p)) // ','.(expression ['as' star_target])+ && @@ -23981,9 +24078,9 @@ invalid_with_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 647)) // token='with' + (a = _PyPegen_expect_token(p, 651)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -24046,7 +24143,7 @@ invalid_try_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 656)) // token='try' + (a = _PyPegen_expect_token(p, 660)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -24078,7 +24175,7 @@ invalid_try_stmt_rule(Parser *p) Token * _literal; asdl_stmt_seq* block_var; if ( - (_keyword = _PyPegen_expect_token(p, 656)) // token='try' + (_keyword = _PyPegen_expect_token(p, 660)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -24117,7 +24214,7 @@ invalid_try_stmt_rule(Parser *p) Token * b; expr_ty expression_var; if ( - (_keyword = _PyPegen_expect_token(p, 656)) // token='try' + (_keyword = _PyPegen_expect_token(p, 660)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -24125,7 +24222,7 @@ invalid_try_stmt_rule(Parser *p) && (_loop1_37_var = _loop1_37_rule(p)) // except_block+ && - (a = _PyPegen_expect_token(p, 677)) // token='except' + (a = _PyPegen_expect_token(p, 681)) // token='except' && (b = _PyPegen_expect_token(p, 16)) // token='*' && @@ -24164,7 +24261,7 @@ invalid_try_stmt_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings Token * a; if ( - (_keyword = _PyPegen_expect_token(p, 656)) // token='try' + (_keyword = _PyPegen_expect_token(p, 660)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -24172,7 +24269,7 @@ invalid_try_stmt_rule(Parser *p) && (_loop1_38_var = _loop1_38_rule(p)) // except_star_block+ && - (a = _PyPegen_expect_token(p, 677)) // token='except' + (a = _PyPegen_expect_token(p, 681)) // token='except' && (_opt_var = _tmp_145_rule(p), !p->error_indicator) // [expression ['as' NAME]] && @@ -24229,7 +24326,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty expressions_var; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (a = expression_rule(p)) // expression && @@ -24237,7 +24334,7 @@ invalid_except_stmt_rule(Parser *p) && (expressions_var = expressions_rule(p)) // expressions && - (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword_1 = _PyPegen_expect_token(p, 684)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -24269,7 +24366,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 677)) // token='except' + (a = _PyPegen_expect_token(p, 681)) // token='except' && (expression_var = expression_rule(p)) // expression && @@ -24300,7 +24397,7 @@ invalid_except_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 677)) // token='except' + (a = _PyPegen_expect_token(p, 681)) // token='except' && (newline_var = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) @@ -24331,11 +24428,11 @@ invalid_except_stmt_rule(Parser *p) asdl_stmt_seq* block_var; expr_ty expression_var; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (expression_var = expression_rule(p)) // expression && - (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword_1 = _PyPegen_expect_token(p, 684)) // token='as' && (a = expression_rule(p)) // expression && @@ -24395,7 +24492,7 @@ invalid_except_star_stmt_rule(Parser *p) expr_ty expressions_var; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -24405,7 +24502,7 @@ invalid_except_star_stmt_rule(Parser *p) && (expressions_var = expressions_rule(p)) // expressions && - (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword_1 = _PyPegen_expect_token(p, 684)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -24438,7 +24535,7 @@ invalid_except_star_stmt_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 677)) // token='except' + (a = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -24472,7 +24569,7 @@ invalid_except_star_stmt_rule(Parser *p) void *_tmp_146_var; Token * a; if ( - (a = _PyPegen_expect_token(p, 677)) // token='except' + (a = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -24506,13 +24603,13 @@ invalid_except_star_stmt_rule(Parser *p) asdl_stmt_seq* block_var; expr_ty expression_var; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && (expression_var = expression_rule(p)) // expression && - (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword_1 = _PyPegen_expect_token(p, 684)) // token='as' && (a = expression_rule(p)) // expression && @@ -24563,7 +24660,7 @@ invalid_finally_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 673)) // token='finally' + (a = _PyPegen_expect_token(p, 677)) // token='finally' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -24619,7 +24716,7 @@ invalid_except_stmt_indent_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 677)) // token='except' + (a = _PyPegen_expect_token(p, 681)) // token='except' && (expression_var = expression_rule(p)) // expression && @@ -24655,7 +24752,7 @@ invalid_except_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 677)) // token='except' + (a = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -24711,7 +24808,7 @@ invalid_except_star_stmt_indent_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 677)) // token='except' + (a = _PyPegen_expect_token(p, 681)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -24950,7 +25047,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && (a = _PyPegen_expect_soft_keyword(p, "_")) // soft_keyword='"_"' ) @@ -24980,7 +25077,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && (a = expression_rule(p)) // expression ) @@ -25132,7 +25229,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -25163,7 +25260,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty a_1; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 682)) // token='if' + (a = _PyPegen_expect_token(p, 686)) // token='if' && (a_1 = named_expression_rule(p)) // named_expression && @@ -25218,7 +25315,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 687)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 691)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -25249,7 +25346,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 687)) // token='elif' + (a = _PyPegen_expect_token(p, 691)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -25302,7 +25399,7 @@ invalid_else_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 686)) // token='else' + (a = _PyPegen_expect_token(p, 690)) // token='else' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -25335,13 +25432,13 @@ invalid_else_stmt_rule(Parser *p) Token * _literal; asdl_stmt_seq* block_var; if ( - (_keyword = _PyPegen_expect_token(p, 686)) // token='else' + (_keyword = _PyPegen_expect_token(p, 690)) // token='else' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && (block_var = block_rule(p)) // block && - (_keyword_1 = _PyPegen_expect_token(p, 687)) // token='elif' + (_keyword_1 = _PyPegen_expect_token(p, 691)) // token='elif' ) { D(fprintf(stderr, "%*c+ invalid_else_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else' ':' block 'elif'")); @@ -25388,7 +25485,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 689)) // token='while' + (_keyword = _PyPegen_expect_token(p, 693)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -25419,7 +25516,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 689)) // token='while' + (a = _PyPegen_expect_token(p, 693)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -25478,13 +25575,13 @@ invalid_for_stmt_rule(Parser *p) expr_ty star_expressions_var; expr_ty star_targets_var; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 694)) // token='for' + (_keyword = _PyPegen_expect_token(p, 698)) // token='for' && (star_targets_var = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 695)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 699)) // token='in' && (star_expressions_var = star_expressions_rule(p)) // star_expressions && @@ -25519,13 +25616,13 @@ invalid_for_stmt_rule(Parser *p) expr_ty star_expressions_var; expr_ty star_targets_var; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 694)) // token='for' + (a = _PyPegen_expect_token(p, 698)) // token='for' && (star_targets_var = star_targets_rule(p)) // star_targets && - (_keyword = _PyPegen_expect_token(p, 695)) // token='in' + (_keyword = _PyPegen_expect_token(p, 699)) // token='in' && (star_expressions_var = star_expressions_rule(p)) // star_expressions && @@ -25591,9 +25688,9 @@ invalid_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 699)) // token='def' + (a = _PyPegen_expect_token(p, 703)) // token='def' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -25650,9 +25747,9 @@ invalid_def_raw_rule(Parser *p) asdl_stmt_seq* block_var; expr_ty name_var; if ( - (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 702), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 699)) // token='def' + (_keyword = _PyPegen_expect_token(p, 703)) // token='def' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -25716,7 +25813,7 @@ invalid_class_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 701)) // token='class' + (_keyword = _PyPegen_expect_token(p, 705)) // token='class' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -25755,7 +25852,7 @@ invalid_class_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 701)) // token='class' + (a = _PyPegen_expect_token(p, 705)) // token='class' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -27015,7 +27112,7 @@ invalid_arithmetic_rule(Parser *p) && (_tmp_153_var = _tmp_153_rule(p)) // '+' | '-' | '*' | '/' | '%' | '//' | '@' && - (a = _PyPegen_expect_token(p, 703)) // token='not' + (a = _PyPegen_expect_token(p, 707)) // token='not' && (b = inversion_rule(p)) // inversion ) @@ -27064,7 +27161,7 @@ invalid_factor_rule(Parser *p) if ( (_tmp_154_var = _tmp_154_rule(p)) // '+' | '-' | '~' && - (a = _PyPegen_expect_token(p, 703)) // token='not' + (a = _PyPegen_expect_token(p, 707)) // token='not' && (b = factor_rule(p)) // factor ) @@ -27411,7 +27508,7 @@ _tmp_5_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_5[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'import'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 634)) // token='import' + (_keyword = _PyPegen_expect_token(p, 638)) // token='import' ) { D(fprintf(stderr, "%*c+ _tmp_5[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'import'")); @@ -27430,7 +27527,7 @@ _tmp_5_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_5[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'from'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 633)) // token='from' + (_keyword = _PyPegen_expect_token(p, 637)) // token='from' ) { D(fprintf(stderr, "%*c+ _tmp_5[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'from'")); @@ -27468,7 +27565,7 @@ _tmp_6_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_6[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'def'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 699)) // token='def' + (_keyword = _PyPegen_expect_token(p, 703)) // token='def' ) { D(fprintf(stderr, "%*c+ _tmp_6[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'def'")); @@ -27506,7 +27603,7 @@ _tmp_6_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_6[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 698)) // token='async' + (_keyword = _PyPegen_expect_token(p, 702)) // token='async' ) { D(fprintf(stderr, "%*c+ _tmp_6[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'")); @@ -27544,7 +27641,7 @@ _tmp_7_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_7[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'class'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 701)) // token='class' + (_keyword = _PyPegen_expect_token(p, 705)) // token='class' ) { D(fprintf(stderr, "%*c+ _tmp_7[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'class'")); @@ -27601,7 +27698,7 @@ _tmp_8_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_8[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'with'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 647)) // token='with' + (_keyword = _PyPegen_expect_token(p, 651)) // token='with' ) { D(fprintf(stderr, "%*c+ _tmp_8[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'with'")); @@ -27620,7 +27717,7 @@ _tmp_8_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_8[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 698)) // token='async' + (_keyword = _PyPegen_expect_token(p, 702)) // token='async' ) { D(fprintf(stderr, "%*c+ _tmp_8[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'")); @@ -27658,7 +27755,7 @@ _tmp_9_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_9[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'for'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 694)) // token='for' + (_keyword = _PyPegen_expect_token(p, 698)) // token='for' ) { D(fprintf(stderr, "%*c+ _tmp_9[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'for'")); @@ -27677,7 +27774,7 @@ _tmp_9_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_9[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 698)) // token='async' + (_keyword = _PyPegen_expect_token(p, 702)) // token='async' ) { D(fprintf(stderr, "%*c+ _tmp_9[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'")); @@ -27902,7 +27999,7 @@ _tmp_13_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 633)) // token='from' + (_keyword = _PyPegen_expect_token(p, 637)) // token='from' && (z = expression_rule(p)) // expression ) @@ -28424,7 +28521,7 @@ _tmp_22_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -34188,7 +34285,7 @@ _tmp_115_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_115[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'else'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 686)) // token='else' + (_keyword = _PyPegen_expect_token(p, 690)) // token='else' ) { D(fprintf(stderr, "%*c+ _tmp_115[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else'")); @@ -35927,7 +36024,7 @@ _tmp_143_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_143[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='except' + (_keyword = _PyPegen_expect_token(p, 681)) // token='except' ) { D(fprintf(stderr, "%*c+ _tmp_143[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except'")); @@ -35946,7 +36043,7 @@ _tmp_143_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_143[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'finally'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 673)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 677)) // token='finally' ) { D(fprintf(stderr, "%*c+ _tmp_143[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'finally'")); @@ -37169,7 +37266,7 @@ _tmp_163_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 682)) // token='if' + (_keyword = _PyPegen_expect_token(p, 686)) // token='if' && (z = disjunction_rule(p)) // disjunction ) @@ -37827,7 +37924,7 @@ _tmp_176_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 680)) // token='as' + (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) From 6ef06fad84244261c695ec337c7d2734277054db Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 6 Jun 2025 14:52:48 +0300 Subject: [PATCH 491/989] gh-135120: Add test.support.subTests() (GH-135121) --- Lib/test/support/__init__.py | 25 ++ Lib/test/test_http_cookiejar.py | 187 ++++---- Lib/test/test_ntpath.py | 20 +- Lib/test/test_posixpath.py | 18 +- Lib/test/test_urlparse.py | 398 +++++++++--------- ...-06-04-13-07-44.gh-issue-135120.NapnZT.rst | 1 + 6 files changed, 307 insertions(+), 342 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2025-06-04-13-07-44.gh-issue-135120.NapnZT.rst diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index f9b66b88d3d05c..48e74adcce3bb7 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -945,6 +945,31 @@ def check_sizeof(test, o, size): % (type(o), result, size) test.assertEqual(result, size, msg) +def subTests(arg_names, arg_values, /, *, _do_cleanups=False): + """Run multiple subtests with different parameters. + """ + single_param = False + if isinstance(arg_names, str): + arg_names = arg_names.replace(',',' ').split() + if len(arg_names) == 1: + single_param = True + arg_values = tuple(arg_values) + def decorator(func): + if isinstance(func, type): + raise TypeError('subTests() can only decorate methods, not classes') + @functools.wraps(func) + def wrapper(self, /, *args, **kwargs): + for values in arg_values: + if single_param: + values = (values,) + subtest_kwargs = dict(zip(arg_names, values)) + with self.subTest(**subtest_kwargs): + func(self, *args, **kwargs, **subtest_kwargs) + if _do_cleanups: + self.doCleanups() + return wrapper + return decorator + #======================================================================= # Decorator/context manager for running a code in a different locale, # correctly resetting it afterwards. diff --git a/Lib/test/test_http_cookiejar.py b/Lib/test/test_http_cookiejar.py index 6bc33b15ec32e9..04cb440cd4ccf6 100644 --- a/Lib/test/test_http_cookiejar.py +++ b/Lib/test/test_http_cookiejar.py @@ -4,6 +4,7 @@ import stat import sys import re +from test import support from test.support import os_helper from test.support import warnings_helper import time @@ -105,8 +106,7 @@ def test_http2time_formats(self): self.assertEqual(http2time(s.lower()), test_t, s.lower()) self.assertEqual(http2time(s.upper()), test_t, s.upper()) - def test_http2time_garbage(self): - for test in [ + @support.subTests('test', [ '', 'Garbage', 'Mandag 16. September 1996', @@ -121,10 +121,9 @@ def test_http2time_garbage(self): '08-01-3697739', '09 Feb 19942632 22:23:32 GMT', 'Wed, 09 Feb 1994834 22:23:32 GMT', - ]: - self.assertIsNone(http2time(test), - "http2time(%s) is not None\n" - "http2time(test) %s" % (test, http2time(test))) + ]) + def test_http2time_garbage(self, test): + self.assertIsNone(http2time(test)) def test_http2time_redos_regression_actually_completes(self): # LOOSE_HTTP_DATE_RE was vulnerable to malicious input which caused catastrophic backtracking (REDoS). @@ -149,9 +148,7 @@ def parse_date(text): self.assertEqual(parse_date("1994-02-03 19:45:29 +0530"), (1994, 2, 3, 14, 15, 29)) - def test_iso2time_formats(self): - # test iso2time for supported dates. - tests = [ + @support.subTests('s', [ '1994-02-03 00:00:00 -0000', # ISO 8601 format '1994-02-03 00:00:00 +0000', # ISO 8601 format '1994-02-03 00:00:00', # zone is optional @@ -164,16 +161,15 @@ def test_iso2time_formats(self): # A few tests with extra space at various places ' 1994-02-03 ', ' 1994-02-03T00:00:00 ', - ] - + ]) + def test_iso2time_formats(self, s): + # test iso2time for supported dates. test_t = 760233600 # assume broken POSIX counting of seconds - for s in tests: - self.assertEqual(iso2time(s), test_t, s) - self.assertEqual(iso2time(s.lower()), test_t, s.lower()) - self.assertEqual(iso2time(s.upper()), test_t, s.upper()) + self.assertEqual(iso2time(s), test_t, s) + self.assertEqual(iso2time(s.lower()), test_t, s.lower()) + self.assertEqual(iso2time(s.upper()), test_t, s.upper()) - def test_iso2time_garbage(self): - for test in [ + @support.subTests('test', [ '', 'Garbage', 'Thursday, 03-Feb-94 00:00:00 GMT', @@ -186,9 +182,9 @@ def test_iso2time_garbage(self): '01-01-1980 00:00:62', '01-01-1980T00:00:62', '19800101T250000Z', - ]: - self.assertIsNone(iso2time(test), - "iso2time(%r)" % test) + ]) + def test_iso2time_garbage(self, test): + self.assertIsNone(iso2time(test)) def test_iso2time_performance_regression(self): # If ISO_DATE_RE regresses to quadratic complexity, this test will take a very long time to succeed. @@ -199,24 +195,23 @@ def test_iso2time_performance_regression(self): class HeaderTests(unittest.TestCase): - def test_parse_ns_headers(self): - # quotes should be stripped - expected = [[('foo', 'bar'), ('expires', 2209069412), ('version', '0')]] - for hdr in [ + @support.subTests('hdr', [ 'foo=bar; expires=01 Jan 2040 22:23:32 GMT', 'foo=bar; expires="01 Jan 2040 22:23:32 GMT"', - ]: - self.assertEqual(parse_ns_headers([hdr]), expected) - - def test_parse_ns_headers_version(self): - + ]) + def test_parse_ns_headers(self, hdr): # quotes should be stripped - expected = [[('foo', 'bar'), ('version', '1')]] - for hdr in [ + expected = [[('foo', 'bar'), ('expires', 2209069412), ('version', '0')]] + self.assertEqual(parse_ns_headers([hdr]), expected) + + @support.subTests('hdr', [ 'foo=bar; version="1"', 'foo=bar; Version="1"', - ]: - self.assertEqual(parse_ns_headers([hdr]), expected) + ]) + def test_parse_ns_headers_version(self, hdr): + # quotes should be stripped + expected = [[('foo', 'bar'), ('version', '1')]] + self.assertEqual(parse_ns_headers([hdr]), expected) def test_parse_ns_headers_special_names(self): # names such as 'expires' are not special in first name=value pair @@ -226,8 +221,7 @@ def test_parse_ns_headers_special_names(self): expected = [[("expires", "01 Jan 2040 22:23:32 GMT"), ("version", "0")]] self.assertEqual(parse_ns_headers([hdr]), expected) - def test_join_header_words(self): - for src, expected in [ + @support.subTests('src,expected', [ ([[("foo", None), ("bar", "baz")]], "foo; bar=baz"), (([]), ""), (([[]]), ""), @@ -237,12 +231,11 @@ def test_join_header_words(self): 'n; foo="foo;_", bar=foo_bar'), ([[("n", "m"), ("foo", None)], [("bar", "foo_bar")]], 'n=m; foo, bar=foo_bar'), - ]: - with self.subTest(src=src): - self.assertEqual(join_header_words(src), expected) + ]) + def test_join_header_words(self, src, expected): + self.assertEqual(join_header_words(src), expected) - def test_split_header_words(self): - tests = [ + @support.subTests('arg,expect', [ ("foo", [[("foo", None)]]), ("foo=bar", [[("foo", "bar")]]), (" foo ", [[("foo", None)]]), @@ -259,24 +252,22 @@ def test_split_header_words(self): (r'foo; bar=baz, spam=, foo="\,\;\"", bar= ', [[("foo", None), ("bar", "baz")], [("spam", "")], [("foo", ',;"')], [("bar", "")]]), - ] - - for arg, expect in tests: - try: - result = split_header_words([arg]) - except: - import traceback, io - f = io.StringIO() - traceback.print_exc(None, f) - result = "(error -- traceback follows)\n\n%s" % f.getvalue() - self.assertEqual(result, expect, """ + ]) + def test_split_header_words(self, arg, expect): + try: + result = split_header_words([arg]) + except: + import traceback, io + f = io.StringIO() + traceback.print_exc(None, f) + result = "(error -- traceback follows)\n\n%s" % f.getvalue() + self.assertEqual(result, expect, """ When parsing: '%s' Expected: '%s' Got: '%s' """ % (arg, expect, result)) - def test_roundtrip(self): - tests = [ + @support.subTests('arg,expect', [ ("foo", "foo"), ("foo=bar", "foo=bar"), (" foo ", "foo"), @@ -309,12 +300,11 @@ def test_roundtrip(self): ('n; foo="foo;_", bar="foo,_"', 'n; foo="foo;_", bar="foo,_"'), - ] - - for arg, expect in tests: - input = split_header_words([arg]) - res = join_header_words(input) - self.assertEqual(res, expect, """ + ]) + def test_roundtrip(self, arg, expect): + input = split_header_words([arg]) + res = join_header_words(input) + self.assertEqual(res, expect, """ When parsing: '%s' Expected: '%s' Got: '%s' @@ -516,14 +506,7 @@ class CookieTests(unittest.TestCase): ## just the 7 special TLD's listed in their spec. And folks rely on ## that... - def test_domain_return_ok(self): - # test optimization: .domain_return_ok() should filter out most - # domains in the CookieJar before we try to access them (because that - # may require disk access -- in particular, with MSIECookieJar) - # This is only a rough check for performance reasons, so it's not too - # critical as long as it's sufficiently liberal. - pol = DefaultCookiePolicy() - for url, domain, ok in [ + @support.subTests('url,domain,ok', [ ("http://foo.bar.com/", "blah.com", False), ("http://foo.bar.com/", "rhubarb.blah.com", False), ("http://foo.bar.com/", "rhubarb.foo.bar.com", False), @@ -543,11 +526,18 @@ def test_domain_return_ok(self): ("http://foo/", ".local", True), ("http://barfoo.com", ".foo.com", False), ("http://barfoo.com", "foo.com", False), - ]: - request = urllib.request.Request(url) - r = pol.domain_return_ok(domain, request) - if ok: self.assertTrue(r) - else: self.assertFalse(r) + ]) + def test_domain_return_ok(self, url, domain, ok): + # test optimization: .domain_return_ok() should filter out most + # domains in the CookieJar before we try to access them (because that + # may require disk access -- in particular, with MSIECookieJar) + # This is only a rough check for performance reasons, so it's not too + # critical as long as it's sufficiently liberal. + pol = DefaultCookiePolicy() + request = urllib.request.Request(url) + r = pol.domain_return_ok(domain, request) + if ok: self.assertTrue(r) + else: self.assertFalse(r) def test_missing_value(self): # missing = sign in Cookie: header is regarded by Mozilla as a missing @@ -581,10 +571,7 @@ def test_missing_value(self): self.assertEqual(interact_netscape(c, "http://www.acme.com/foo/"), '"spam"; eggs') - def test_rfc2109_handling(self): - # RFC 2109 cookies are handled as RFC 2965 or Netscape cookies, - # dependent on policy settings - for rfc2109_as_netscape, rfc2965, version in [ + @support.subTests('rfc2109_as_netscape,rfc2965,version', [ # default according to rfc2965 if not explicitly specified (None, False, 0), (None, True, 1), @@ -593,24 +580,27 @@ def test_rfc2109_handling(self): (False, True, 1), (True, False, 0), (True, True, 0), - ]: - policy = DefaultCookiePolicy( - rfc2109_as_netscape=rfc2109_as_netscape, - rfc2965=rfc2965) - c = CookieJar(policy) - interact_netscape(c, "http://www.example.com/", "ni=ni; Version=1") - try: - cookie = c._cookies["www.example.com"]["/"]["ni"] - except KeyError: - self.assertIsNone(version) # didn't expect a stored cookie - else: - self.assertEqual(cookie.version, version) - # 2965 cookies are unaffected - interact_2965(c, "http://www.example.com/", - "foo=bar; Version=1") - if rfc2965: - cookie2965 = c._cookies["www.example.com"]["/"]["foo"] - self.assertEqual(cookie2965.version, 1) + ]) + def test_rfc2109_handling(self, rfc2109_as_netscape, rfc2965, version): + # RFC 2109 cookies are handled as RFC 2965 or Netscape cookies, + # dependent on policy settings + policy = DefaultCookiePolicy( + rfc2109_as_netscape=rfc2109_as_netscape, + rfc2965=rfc2965) + c = CookieJar(policy) + interact_netscape(c, "http://www.example.com/", "ni=ni; Version=1") + try: + cookie = c._cookies["www.example.com"]["/"]["ni"] + except KeyError: + self.assertIsNone(version) # didn't expect a stored cookie + else: + self.assertEqual(cookie.version, version) + # 2965 cookies are unaffected + interact_2965(c, "http://www.example.com/", + "foo=bar; Version=1") + if rfc2965: + cookie2965 = c._cookies["www.example.com"]["/"]["foo"] + self.assertEqual(cookie2965.version, 1) def test_ns_parser(self): c = CookieJar() @@ -778,8 +768,7 @@ def test_default_path_with_query(self): # Cookie is sent back to the same URI. self.assertEqual(interact_netscape(cj, uri), value) - def test_escape_path(self): - cases = [ + @support.subTests('arg,result', [ # quoted safe ("/foo%2f/bar", "/foo%2F/bar"), ("/foo%2F/bar", "/foo%2F/bar"), @@ -799,9 +788,9 @@ def test_escape_path(self): ("/foo/bar\u00fc", "/foo/bar%C3%BC"), # UTF-8 encoded # unicode ("/foo/bar\uabcd", "/foo/bar%EA%AF%8D"), # UTF-8 encoded - ] - for arg, result in cases: - self.assertEqual(escape_path(arg), result) + ]) + def test_escape_path(self, arg, result): + self.assertEqual(escape_path(arg), result) def test_request_path(self): # with parameters diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index b891d0734ca0a6..22f6403d482bc4 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -7,6 +7,7 @@ import unittest import warnings from ntpath import ALLOW_MISSING +from test import support from test.support import TestFailed, cpython_only, os_helper from test.support.os_helper import FakePath from test import test_genericpath @@ -78,24 +79,7 @@ def tester(fn, wantResult): def _parameterize(*parameters): - """Simplistic decorator to parametrize a test - - Runs the decorated test multiple times in subTest, with a value from - 'parameters' passed as an extra positional argument. - Calls doCleanups() after each run. - - Not for general use. Intended to avoid indenting for easier backports. - - See https://discuss.python.org/t/91827 for discussing generalizations. - """ - def _parametrize_decorator(func): - def _parameterized(self, *args, **kwargs): - for parameter in parameters: - with self.subTest(parameter): - func(self, *args, parameter, **kwargs) - self.doCleanups() - return _parameterized - return _parametrize_decorator + return support.subTests('kwargs', parameters, _do_cleanups=True) class NtpathTestCase(unittest.TestCase): diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index c45ce6d3ef7820..21f06712548d88 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -36,23 +36,7 @@ def skip_if_ABSTFN_contains_backslash(test): def _parameterize(*parameters): - """Simplistic decorator to parametrize a test - - Runs the decorated test multiple times in subTest, with a value from - 'parameters' passed as an extra positional argument. - Does *not* call doCleanups() after each run. - - Not for general use. Intended to avoid indenting for easier backports. - - See https://discuss.python.org/t/91827 for discussing generalizations. - """ - def _parametrize_decorator(func): - def _parameterized(self, *args, **kwargs): - for parameter in parameters: - with self.subTest(parameter): - func(self, *args, parameter, **kwargs) - return _parameterized - return _parametrize_decorator + return support.subTests('kwargs', parameters) class PosixPathTest(unittest.TestCase): diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index aabc360289a0d0..b2bde5a9b1d696 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -2,6 +2,7 @@ import unicodedata import unittest import urllib.parse +from test import support RFC1808_BASE = "http://a/b/c/d;p?q#f" RFC2396_BASE = "http://a/b/c/d;p?q" @@ -156,27 +157,25 @@ def checkRoundtrips(self, url, parsed, split, url2=None): self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port) - def test_qsl(self): - for orig, expect in parse_qsl_test_cases: - result = urllib.parse.parse_qsl(orig, keep_blank_values=True) - self.assertEqual(result, expect, "Error parsing %r" % orig) - expect_without_blanks = [v for v in expect if len(v[1])] - result = urllib.parse.parse_qsl(orig, keep_blank_values=False) - self.assertEqual(result, expect_without_blanks, - "Error parsing %r" % orig) - - def test_qs(self): - for orig, expect in parse_qs_test_cases: - result = urllib.parse.parse_qs(orig, keep_blank_values=True) - self.assertEqual(result, expect, "Error parsing %r" % orig) - expect_without_blanks = {v: expect[v] - for v in expect if len(expect[v][0])} - result = urllib.parse.parse_qs(orig, keep_blank_values=False) - self.assertEqual(result, expect_without_blanks, - "Error parsing %r" % orig) - - def test_roundtrips(self): - str_cases = [ + @support.subTests('orig,expect', parse_qsl_test_cases) + def test_qsl(self, orig, expect): + result = urllib.parse.parse_qsl(orig, keep_blank_values=True) + self.assertEqual(result, expect) + expect_without_blanks = [v for v in expect if len(v[1])] + result = urllib.parse.parse_qsl(orig, keep_blank_values=False) + self.assertEqual(result, expect_without_blanks) + + @support.subTests('orig,expect', parse_qs_test_cases) + def test_qs(self, orig, expect): + result = urllib.parse.parse_qs(orig, keep_blank_values=True) + self.assertEqual(result, expect) + expect_without_blanks = {v: expect[v] + for v in expect if len(expect[v][0])} + result = urllib.parse.parse_qs(orig, keep_blank_values=False) + self.assertEqual(result, expect_without_blanks) + + @support.subTests('bytes', (False, True)) + @support.subTests('url,parsed,split', [ ('path/to/file', ('', '', 'path/to/file', '', '', ''), ('', '', 'path/to/file', '', '')), @@ -263,23 +262,21 @@ def test_roundtrips(self): ('sch_me:path/to/file', ('', '', 'sch_me:path/to/file', '', '', ''), ('', '', 'sch_me:path/to/file', '', '')), - ] - def _encode(t): - return (t[0].encode('ascii'), - tuple(x.encode('ascii') for x in t[1]), - tuple(x.encode('ascii') for x in t[2])) - bytes_cases = [_encode(x) for x in str_cases] - str_cases += [ ('schème:path/to/file', ('', '', 'schème:path/to/file', '', '', ''), ('', '', 'schème:path/to/file', '', '')), - ] - for url, parsed, split in str_cases + bytes_cases: - with self.subTest(url): - self.checkRoundtrips(url, parsed, split) - - def test_roundtrips_normalization(self): - str_cases = [ + ]) + def test_roundtrips(self, bytes, url, parsed, split): + if bytes: + if not url.isascii(): + self.skipTest('non-ASCII bytes') + url = str_encode(url) + parsed = tuple_encode(parsed) + split = tuple_encode(split) + self.checkRoundtrips(url, parsed, split) + + @support.subTests('bytes', (False, True)) + @support.subTests('url,url2,parsed,split', [ ('///path/to/file', '/path/to/file', ('', '', '/path/to/file', '', '', ''), @@ -300,22 +297,18 @@ def test_roundtrips_normalization(self): 'https:///tmp/junk.txt', ('https', '', '/tmp/junk.txt', '', '', ''), ('https', '', '/tmp/junk.txt', '', '')), - ] - def _encode(t): - return (t[0].encode('ascii'), - t[1].encode('ascii'), - tuple(x.encode('ascii') for x in t[2]), - tuple(x.encode('ascii') for x in t[3])) - bytes_cases = [_encode(x) for x in str_cases] - for url, url2, parsed, split in str_cases + bytes_cases: - with self.subTest(url): - self.checkRoundtrips(url, parsed, split, url2) - - def test_http_roundtrips(self): - # urllib.parse.urlsplit treats 'http:' as an optimized special case, - # so we test both 'http:' and 'https:' in all the following. - # Three cheers for white box knowledge! - str_cases = [ + ]) + def test_roundtrips_normalization(self, bytes, url, url2, parsed, split): + if bytes: + url = str_encode(url) + url2 = str_encode(url2) + parsed = tuple_encode(parsed) + split = tuple_encode(split) + self.checkRoundtrips(url, parsed, split, url2) + + @support.subTests('bytes', (False, True)) + @support.subTests('scheme', ('http', 'https')) + @support.subTests('url,parsed,split', [ ('://www.python.org', ('www.python.org', '', '', '', ''), ('www.python.org', '', '', '')), @@ -331,23 +324,20 @@ def test_http_roundtrips(self): ('://a/b/c/d;p?q#f', ('a', '/b/c/d', 'p', 'q', 'f'), ('a', '/b/c/d;p', 'q', 'f')), - ] - def _encode(t): - return (t[0].encode('ascii'), - tuple(x.encode('ascii') for x in t[1]), - tuple(x.encode('ascii') for x in t[2])) - bytes_cases = [_encode(x) for x in str_cases] - str_schemes = ('http', 'https') - bytes_schemes = (b'http', b'https') - str_tests = str_schemes, str_cases - bytes_tests = bytes_schemes, bytes_cases - for schemes, test_cases in (str_tests, bytes_tests): - for scheme in schemes: - for url, parsed, split in test_cases: - url = scheme + url - parsed = (scheme,) + parsed - split = (scheme,) + split - self.checkRoundtrips(url, parsed, split) + ]) + def test_http_roundtrips(self, bytes, scheme, url, parsed, split): + # urllib.parse.urlsplit treats 'http:' as an optimized special case, + # so we test both 'http:' and 'https:' in all the following. + # Three cheers for white box knowledge! + if bytes: + scheme = str_encode(scheme) + url = str_encode(url) + parsed = tuple_encode(parsed) + split = tuple_encode(split) + url = scheme + url + parsed = (scheme,) + parsed + split = (scheme,) + split + self.checkRoundtrips(url, parsed, split) def checkJoin(self, base, relurl, expected, *, relroundtrip=True): with self.subTest(base=base, relurl=relurl): @@ -363,12 +353,13 @@ def checkJoin(self, base, relurl, expected, *, relroundtrip=True): relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb)) self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) - def test_unparse_parse(self): - str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] - bytes_cases = [x.encode('ascii') for x in str_cases] - for u in str_cases + bytes_cases: - self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) - self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) + @support.subTests('bytes', (False, True)) + @support.subTests('u', ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]) + def test_unparse_parse(self, bytes, u): + if bytes: + u = str_encode(u) + self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) + self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) def test_RFC1808(self): # "normal" cases from RFC 1808: @@ -695,8 +686,8 @@ def test_urljoins_relative_base(self): self.checkJoin('///b/c', '///w', '///w') self.checkJoin('///b/c', 'w', '///b/w') - def test_RFC2732(self): - str_cases = [ + @support.subTests('bytes', (False, True)) + @support.subTests('url,hostname,port', [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ('http://[::1]:5432/foo/', '::1', 5432), @@ -727,26 +718,28 @@ def test_RFC2732(self): ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]:/foo/', '::ffff:12.34.56.78', None), - ] - def _encode(t): - return t[0].encode('ascii'), t[1].encode('ascii'), t[2] - bytes_cases = [_encode(x) for x in str_cases] - for url, hostname, port in str_cases + bytes_cases: - urlparsed = urllib.parse.urlparse(url) - self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) - - str_cases = [ + ]) + def test_RFC2732(self, bytes, url, hostname, port): + if bytes: + url = str_encode(url) + hostname = str_encode(hostname) + urlparsed = urllib.parse.urlparse(url) + self.assertEqual((urlparsed.hostname, urlparsed.port), (hostname, port)) + + @support.subTests('bytes', (False, True)) + @support.subTests('invalid_url', [ 'http://::12.34.56.78]/', 'http://[::1/foo/', 'ftp://[::1/foo/bad]/bad', 'http://[::1/foo/bad]/bad', - 'http://[::ffff:12.34.56.78'] - bytes_cases = [x.encode('ascii') for x in str_cases] - for invalid_url in str_cases + bytes_cases: - self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) - - def test_urldefrag(self): - str_cases = [ + 'http://[::ffff:12.34.56.78']) + def test_RFC2732_invalid(self, bytes, invalid_url): + if bytes: + invalid_url = str_encode(invalid_url) + self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) + + @support.subTests('bytes', (False, True)) + @support.subTests('url,defrag,frag', [ ('http://python.org#frag', 'http://python.org', 'frag'), ('http://python.org', 'http://python.org', ''), ('http://python.org/#frag', 'http://python.org/', 'frag'), @@ -770,18 +763,18 @@ def test_urldefrag(self): ('http:?q#f', 'http:?q', 'f'), ('//a/b/c;p?q#f', '//a/b/c;p?q', 'f'), ('://a/b/c;p?q#f', '://a/b/c;p?q', 'f'), - ] - def _encode(t): - return type(t)(x.encode('ascii') for x in t) - bytes_cases = [_encode(x) for x in str_cases] - for url, defrag, frag in str_cases + bytes_cases: - with self.subTest(url): - result = urllib.parse.urldefrag(url) - hash = '#' if isinstance(url, str) else b'#' - self.assertEqual(result.geturl(), url.rstrip(hash)) - self.assertEqual(result, (defrag, frag)) - self.assertEqual(result.url, defrag) - self.assertEqual(result.fragment, frag) + ]) + def test_urldefrag(self, bytes, url, defrag, frag): + if bytes: + url = str_encode(url) + defrag = str_encode(defrag) + frag = str_encode(frag) + result = urllib.parse.urldefrag(url) + hash = '#' if isinstance(url, str) else b'#' + self.assertEqual(result.geturl(), url.rstrip(hash)) + self.assertEqual(result, (defrag, frag)) + self.assertEqual(result.url, defrag) + self.assertEqual(result.fragment, frag) def test_urlsplit_scoped_IPv6(self): p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') @@ -981,42 +974,35 @@ def test_urlsplit_strip_url(self): self.assertEqual(p.scheme, "https") self.assertEqual(p.geturl(), "https://www.python.org/") - def test_attributes_bad_port(self): + @support.subTests('bytes', (False, True)) + @support.subTests('parse', (urllib.parse.urlsplit, urllib.parse.urlparse)) + @support.subTests('port', ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६")) + def test_attributes_bad_port(self, bytes, parse, port): """Check handling of invalid ports.""" - for bytes in (False, True): - for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): - for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"): - with self.subTest(bytes=bytes, parse=parse, port=port): - netloc = "www.example.net:" + port - url = "http://" + netloc + "/" - if bytes: - if netloc.isascii() and port.isascii(): - netloc = netloc.encode("ascii") - url = url.encode("ascii") - else: - continue - p = parse(url) - self.assertEqual(p.netloc, netloc) - with self.assertRaises(ValueError): - p.port + netloc = "www.example.net:" + port + url = "http://" + netloc + "/" + if bytes: + if not (netloc.isascii() and port.isascii()): + self.skipTest('non-ASCII bytes') + netloc = str_encode(netloc) + url = str_encode(url) + p = parse(url) + self.assertEqual(p.netloc, netloc) + with self.assertRaises(ValueError): + p.port - def test_attributes_bad_scheme(self): + @support.subTests('bytes', (False, True)) + @support.subTests('parse', (urllib.parse.urlsplit, urllib.parse.urlparse)) + @support.subTests('scheme', (".", "+", "-", "0", "http&", "६http")) + def test_attributes_bad_scheme(self, bytes, parse, scheme): """Check handling of invalid schemes.""" - for bytes in (False, True): - for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): - for scheme in (".", "+", "-", "0", "http&", "६http"): - with self.subTest(bytes=bytes, parse=parse, scheme=scheme): - url = scheme + "://www.example.net" - if bytes: - if url.isascii(): - url = url.encode("ascii") - else: - continue - p = parse(url) - if bytes: - self.assertEqual(p.scheme, b"") - else: - self.assertEqual(p.scheme, "") + url = scheme + "://www.example.net" + if bytes: + if not url.isascii(): + self.skipTest('non-ASCII bytes') + url = url.encode("ascii") + p = parse(url) + self.assertEqual(p.scheme, b"" if bytes else "") def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it @@ -1128,24 +1114,21 @@ def test_anyscheme(self): self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) - def test_default_scheme(self): + @support.subTests('func', (urllib.parse.urlparse, urllib.parse.urlsplit)) + def test_default_scheme(self, func): # Exercise the scheme parameter of urlparse() and urlsplit() - for func in (urllib.parse.urlparse, urllib.parse.urlsplit): - with self.subTest(function=func): - result = func("http://example.net/", "ftp") - self.assertEqual(result.scheme, "http") - result = func(b"http://example.net/", b"ftp") - self.assertEqual(result.scheme, b"http") - self.assertEqual(func("path", "ftp").scheme, "ftp") - self.assertEqual(func("path", scheme="ftp").scheme, "ftp") - self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") - self.assertEqual(func("path").scheme, "") - self.assertEqual(func(b"path").scheme, b"") - self.assertEqual(func(b"path", "").scheme, b"") - - def test_parse_fragments(self): - # Exercise the allow_fragments parameter of urlparse() and urlsplit() - tests = ( + result = func("http://example.net/", "ftp") + self.assertEqual(result.scheme, "http") + result = func(b"http://example.net/", b"ftp") + self.assertEqual(result.scheme, b"http") + self.assertEqual(func("path", "ftp").scheme, "ftp") + self.assertEqual(func("path", scheme="ftp").scheme, "ftp") + self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") + self.assertEqual(func("path").scheme, "") + self.assertEqual(func(b"path").scheme, b"") + self.assertEqual(func(b"path", "").scheme, b"") + + @support.subTests('url,attr,expected_frag', ( ("http:#frag", "path", "frag"), ("//example.net#frag", "path", "frag"), ("index.html#frag", "path", "frag"), @@ -1156,24 +1139,24 @@ def test_parse_fragments(self): ("//abc#@frag", "path", "@frag"), ("//abc:80#@frag", "path", "@frag"), ("//abc#@frag:80", "path", "@frag:80"), - ) - for url, attr, expected_frag in tests: - for func in (urllib.parse.urlparse, urllib.parse.urlsplit): - if attr == "params" and func is urllib.parse.urlsplit: - attr = "path" - with self.subTest(url=url, function=func): - result = func(url, allow_fragments=False) - self.assertEqual(result.fragment, "") - self.assertEndsWith(getattr(result, attr), - "#" + expected_frag) - self.assertEqual(func(url, "", False).fragment, "") - - result = func(url, allow_fragments=True) - self.assertEqual(result.fragment, expected_frag) - self.assertNotEndsWith(getattr(result, attr), expected_frag) - self.assertEqual(func(url, "", True).fragment, - expected_frag) - self.assertEqual(func(url).fragment, expected_frag) + )) + @support.subTests('func', (urllib.parse.urlparse, urllib.parse.urlsplit)) + def test_parse_fragments(self, url, attr, expected_frag, func): + # Exercise the allow_fragments parameter of urlparse() and urlsplit() + if attr == "params" and func is urllib.parse.urlsplit: + attr = "path" + result = func(url, allow_fragments=False) + self.assertEqual(result.fragment, "") + self.assertEndsWith(getattr(result, attr), + "#" + expected_frag) + self.assertEqual(func(url, "", False).fragment, "") + + result = func(url, allow_fragments=True) + self.assertEqual(result.fragment, expected_frag) + self.assertNotEndsWith(getattr(result, attr), expected_frag) + self.assertEqual(func(url, "", True).fragment, + expected_frag) + self.assertEqual(func(url).fragment, expected_frag) def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes @@ -1199,7 +1182,14 @@ def test_mixed_types_rejected(self): with self.assertRaisesRegex(TypeError, "Cannot mix str"): urllib.parse.urljoin(b"http://python.org", "http://python.org") - def _check_result_type(self, str_type): + @support.subTests('result_type', [ + urllib.parse.DefragResult, + urllib.parse.SplitResult, + urllib.parse.ParseResult, + ]) + def test_result_pairs(self, result_type): + # Check encoding and decoding between result pairs + str_type = result_type num_args = len(str_type._fields) bytes_type = str_type._encoded_counterpart self.assertIs(bytes_type._decoded_counterpart, str_type) @@ -1224,16 +1214,6 @@ def _check_result_type(self, str_type): self.assertEqual(str_result.encode(encoding, errors), bytes_args) self.assertEqual(str_result.encode(encoding, errors), bytes_result) - def test_result_pairs(self): - # Check encoding and decoding between result pairs - result_types = [ - urllib.parse.DefragResult, - urllib.parse.SplitResult, - urllib.parse.ParseResult, - ] - for result_type in result_types: - self._check_result_type(result_type) - def test_parse_qs_encoding(self): result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1") self.assertEqual(result, {'key': ['\u0141\xE9']}) @@ -1265,8 +1245,7 @@ def test_parse_qsl_max_num_fields(self): urllib.parse.parse_qsl('&'.join(['a=a']*11), max_num_fields=10) urllib.parse.parse_qsl('&'.join(['a=a']*10), max_num_fields=10) - def test_parse_qs_separator(self): - parse_qs_semicolon_cases = [ + @support.subTests('orig,expect', [ (";", {}), (";;", {}), (";a=b", {'a': ['b']}), @@ -1277,17 +1256,14 @@ def test_parse_qs_separator(self): (b";a=b", {b'a': [b'b']}), (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), (b"a=1;a=2", {b'a': [b'1', b'2']}), - ] - for orig, expect in parse_qs_semicolon_cases: - with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): - result = urllib.parse.parse_qs(orig, separator=';') - self.assertEqual(result, expect, "Error parsing %r" % orig) - result_bytes = urllib.parse.parse_qs(orig, separator=b';') - self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) - - - def test_parse_qsl_separator(self): - parse_qsl_semicolon_cases = [ + ]) + def test_parse_qs_separator(self, orig, expect): + result = urllib.parse.parse_qs(orig, separator=';') + self.assertEqual(result, expect) + result_bytes = urllib.parse.parse_qs(orig, separator=b';') + self.assertEqual(result_bytes, expect) + + @support.subTests('orig,expect', [ (";", []), (";;", []), (";a=b", [('a', 'b')]), @@ -1298,13 +1274,12 @@ def test_parse_qsl_separator(self): (b";a=b", [(b'a', b'b')]), (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), - ] - for orig, expect in parse_qsl_semicolon_cases: - with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): - result = urllib.parse.parse_qsl(orig, separator=';') - self.assertEqual(result, expect, "Error parsing %r" % orig) - result_bytes = urllib.parse.parse_qsl(orig, separator=b';') - self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) + ]) + def test_parse_qsl_separator(self, orig, expect): + result = urllib.parse.parse_qsl(orig, separator=';') + self.assertEqual(result, expect) + result_bytes = urllib.parse.parse_qsl(orig, separator=b';') + self.assertEqual(result_bytes, expect) def test_parse_qsl_bytes(self): self.assertEqual(urllib.parse.parse_qsl(b'a=b'), [(b'a', b'b')]) @@ -1695,11 +1670,12 @@ def test_to_bytes(self): self.assertRaises(UnicodeError, urllib.parse._to_bytes, 'http://www.python.org/medi\u00e6val') - def test_unwrap(self): - for wrapped_url in ('', '', - 'URL:scheme://host/path', 'scheme://host/path'): - url = urllib.parse.unwrap(wrapped_url) - self.assertEqual(url, 'scheme://host/path') + @support.subTests('wrapped_url', + ('', '', + 'URL:scheme://host/path', 'scheme://host/path')) + def test_unwrap(self, wrapped_url): + url = urllib.parse.unwrap(wrapped_url) + self.assertEqual(url, 'scheme://host/path') class DeprecationTest(unittest.TestCase): @@ -1780,5 +1756,11 @@ def test_to_bytes_deprecation(self): 'urllib.parse.to_bytes() is deprecated as of 3.8') +def str_encode(s): + return s.encode('ascii') + +def tuple_encode(t): + return tuple(str_encode(x) for x in t) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Tests/2025-06-04-13-07-44.gh-issue-135120.NapnZT.rst b/Misc/NEWS.d/next/Tests/2025-06-04-13-07-44.gh-issue-135120.NapnZT.rst new file mode 100644 index 00000000000000..772173774b1ac1 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-06-04-13-07-44.gh-issue-135120.NapnZT.rst @@ -0,0 +1 @@ +Add :func:`!test.support.subTests`. From e004cf8fd5c006a7a1c60807a03066f4c43452e5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 6 Jun 2025 14:11:49 +0200 Subject: [PATCH 492/989] gh-134993: Add os.lstat() to os.supports_dir_fd (#135188) --- Lib/os.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/os.py b/Lib/os.py index 266e40b56f6c81..643a7b2f58176a 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -118,6 +118,7 @@ def _add(str, fn): _add("HAVE_FCHMODAT", "chmod") _add("HAVE_FCHOWNAT", "chown") _add("HAVE_FSTATAT", "stat") + _add("HAVE_LSTAT", "lstat") _add("HAVE_FUTIMESAT", "utime") _add("HAVE_LINKAT", "link") _add("HAVE_MKDIRAT", "mkdir") From 82415acf62f039b384e7ffdc5a24ff1f1207cfe5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 6 Jun 2025 14:22:14 +0200 Subject: [PATCH 493/989] gh-134036: Update test_syntax for gh-133999 (#135204) --- Lib/test/test_syntax.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 965c096475f590..13aaba405e3204 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1436,17 +1436,17 @@ >>> try: pass ... except TypeError as name: raise from None Traceback (most recent call last): - SyntaxError: invalid syntax + SyntaxError: did you forget an expression between 'raise' and 'from'? >>> try: pass ... except* TypeError as name: raise from None Traceback (most recent call last): - SyntaxError: invalid syntax + SyntaxError: did you forget an expression between 'raise' and 'from'? >>> match 1: ... case 1 | 2 as abc: raise from None Traceback (most recent call last): - SyntaxError: invalid syntax + SyntaxError: did you forget an expression between 'raise' and 'from'? Ensure that early = are not matched by the parser as invalid comparisons >>> f(2, 4, x=34); 1 $ 2 From 39859fcac5c8335ca9c1bddc5945a7d3e934aebc Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Fri, 6 Jun 2025 09:23:07 -0300 Subject: [PATCH 494/989] Fix versionadded directive rendering in c-api/arg.rst (#135199) --- Doc/c-api/arg.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 3bbc990b6329c0..49dbc8d71cce62 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -685,6 +685,7 @@ Building values ``p`` (:class:`bool`) [int] Convert a C :c:expr:`int` to a Python :class:`bool` object. + .. versionadded:: 3.14 ``c`` (:class:`bytes` of length 1) [char] From 343182853f19a42c0ba8980d3104076a8c7bcfe7 Mon Sep 17 00:00:00 2001 From: Jean-Louis GUENEGO Date: Fri, 6 Jun 2025 15:16:12 +0200 Subject: [PATCH 495/989] feat(docs): type fix - apply pep8 by using docstring instead of comment in the doc. (#135181) Giving the right example incitates the tutorial readers to do the same in the future. --- Doc/tutorial/modules.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Doc/tutorial/modules.rst b/Doc/tutorial/modules.rst index de7aa0e2342946..47bf7547b4ae1d 100644 --- a/Doc/tutorial/modules.rst +++ b/Doc/tutorial/modules.rst @@ -27,14 +27,16 @@ called :file:`fibo.py` in the current directory with the following contents:: # Fibonacci numbers module - def fib(n): # write Fibonacci series up to n + def fib(n): + """Write Fibonacci series up to n.""" a, b = 0, 1 while a < n: print(a, end=' ') a, b = b, a+b print() - def fib2(n): # return Fibonacci series up to n + def fib2(n): + """Return Fibonacci series up to n.""" result = [] a, b = 0, 1 while a < n: From e413e2671916ed8f4513af92830f4fb2bc59b1d2 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 6 Jun 2025 15:51:00 +0200 Subject: [PATCH 496/989] gh-134891: Add PyUnstable_Unicode_GET_CACHED_HASH (GH-134892) --- Doc/c-api/unicode.rst | 16 ++++++++++++++++ Doc/whatsnew/3.15.rst | 4 ++++ Include/cpython/unicodeobject.h | 11 +++++++++++ Lib/test/test_capi/test_unicode.py | 14 ++++++++++++++ ...025-05-29-16-56-23.gh-issue-134891.7eKO8U.rst | 2 ++ Modules/_testcapi/unicode.c | 7 +++++++ Objects/unicodeobject.c | 6 +----- 7 files changed, 55 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-05-29-16-56-23.gh-issue-134891.7eKO8U.rst diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 45f50ba5f97d26..07fdcfd9729ee0 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -191,6 +191,22 @@ access to internal read-only data of Unicode objects: .. versionadded:: 3.2 +.. c:function:: Py_hash_t PyUnstable_Unicode_GET_CACHED_HASH(PyObject *str) + + If the hash of *str*, as returned by :c:func:`PyObject_Hash`, has been + cached and is immediately available, return it. + Otherwise, return ``-1`` *without* setting an exception. + + If *str* is not a string (that is, if ``PyUnicode_Check(obj)`` + is false), the behavior is undefined. + + This function never fails with an exception. + + Note that there are no guarantees on when an object's hash is cached, + and the (non-)existence of a cached hash does not imply that the string has + any other properties. + + Unicode Character Properties """""""""""""""""""""""""""" diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index daf3e8fb6c2c2b..e5bccf17eb63e2 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -304,6 +304,10 @@ New features input string contains non-ASCII characters. (Contributed by Victor Stinner in :gh:`133968`.) +* Add :c:type:`PyUnstable_Unicode_GET_CACHED_HASH` to get the cached hash of + a string. See the documentation for caveats. + (Contributed by Petr Viktorin in :gh:`131510`) + Porting to Python 3.15 ---------------------- diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 3d0414f5291fe4..7c1aac9696dec9 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -300,6 +300,17 @@ static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) { } #define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op)) +/* Returns the cached hash, or -1 if not cached yet. */ +static inline Py_hash_t +PyUnstable_Unicode_GET_CACHED_HASH(PyObject *op) { + assert(PyUnicode_Check(op)); +#ifdef Py_GIL_DISABLED + return _Py_atomic_load_ssize_relaxed(&_PyASCIIObject_CAST(op)->hash); +#else + return _PyASCIIObject_CAST(op)->hash; +#endif +} + /* Write into the canonical representation, this function does not do any sanity checks and is intended for usage in loops. The caller should cache the kind and data pointers obtained from other function calls. diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index c8be4f3faa9483..6a9c60f3a6d75e 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1739,6 +1739,20 @@ def test_pep393_utf8_caching_bug(self): # Check that the second call returns the same result self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_GET_CACHED_HASH(self): + from _testcapi import unicode_GET_CACHED_HASH + content_bytes = b'some new string' + # avoid parser interning & constant folding + obj = str(content_bytes, 'ascii') + # impl detail: fresh strings do not have cached hash + self.assertEqual(unicode_GET_CACHED_HASH(obj), -1) + # impl detail: adding string to a dict caches its hash + {obj: obj} + # impl detail: ASCII string hashes are equal to bytes ones + self.assertEqual(unicode_GET_CACHED_HASH(obj), hash(content_bytes)) + class PyUnicodeWriterTest(unittest.TestCase): def create_writer(self, size): diff --git a/Misc/NEWS.d/next/C_API/2025-05-29-16-56-23.gh-issue-134891.7eKO8U.rst b/Misc/NEWS.d/next/C_API/2025-05-29-16-56-23.gh-issue-134891.7eKO8U.rst new file mode 100644 index 00000000000000..db30d5e9a94584 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-05-29-16-56-23.gh-issue-134891.7eKO8U.rst @@ -0,0 +1,2 @@ +Add :c:type:`PyUnstable_Unicode_GET_CACHED_HASH` to get the cached hash of a +string. diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index e70f5c68bc3b69..203282dd53dd0a 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -220,6 +220,12 @@ unicode_copycharacters(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", to_copy, copied); } +static PyObject* +unicode_GET_CACHED_HASH(PyObject *self, PyObject *arg) +{ + return PyLong_FromSsize_t(PyUnstable_Unicode_GET_CACHED_HASH(arg)); +} + // --- PyUnicodeWriter type ------------------------------------------------- @@ -570,6 +576,7 @@ static PyMethodDef TestMethods[] = { {"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, + {"unicode_GET_CACHED_HASH", unicode_GET_CACHED_HASH, METH_O}, {NULL}, }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5611f839627a2e..5c2308a012142a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -167,11 +167,7 @@ static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length) #define _PyUnicode_HASH(op) \ (_PyASCIIObject_CAST(op)->hash) -static inline Py_hash_t PyUnicode_HASH(PyObject *op) -{ - assert(_PyUnicode_CHECK(op)); - return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash); -} +#define PyUnicode_HASH PyUnstable_Unicode_GET_CACHED_HASH static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash) { From 1adca08d658ee2d520f3193960eaf3ae2ead1cef Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 6 Jun 2025 16:08:58 +0200 Subject: [PATCH 497/989] gh-134160: Use PyModuleDef.m_free in the example module xxlimited (GH-135174) Co-authored-by: neonene <53406459+neonene@users.noreply.github.com> --- Modules/xxlimited.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Modules/xxlimited.c b/Modules/xxlimited.c index 26ac35734fb060..0480fb0849876b 100644 --- a/Modules/xxlimited.c +++ b/Modules/xxlimited.c @@ -424,6 +424,13 @@ xx_clear(PyObject *module) return 0; } +static void +xx_free(void *module) +{ + // allow xx_modexec to omit calling xx_clear on error + (void)xx_clear((PyObject *)module); +} + static struct PyModuleDef xxmodule = { PyModuleDef_HEAD_INIT, .m_name = "xxlimited", @@ -433,9 +440,7 @@ static struct PyModuleDef xxmodule = { .m_slots = xx_slots, .m_traverse = xx_traverse, .m_clear = xx_clear, - /* m_free is not necessary here: xx_clear clears all references, - * and the module state is deallocated along with the module. - */ + .m_free = xx_free, }; From b22b964a5c245e13ae0439c5efbb8984acf607ab Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 6 Jun 2025 16:51:40 +0200 Subject: [PATCH 498/989] gh-57089: Note _layout_ in the bitfield docs (GH-134148) Co-authored-by: Meador Inge Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- Doc/library/ctypes.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index 8e74c6c9dee744..2ee4450698a107 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -714,10 +714,16 @@ item in the :attr:`~Structure._fields_` tuples:: ... ("second_16", c_int, 16)] ... >>> print(Int.first_16) - + >>> print(Int.second_16) - - >>> + + +It is important to note that bit field allocation and layout in memory are not +defined as a C standard; their implementation is compiler-specific. +By default, Python will attempt to match the behavior of a "native" compiler +for the current platform. +See the :attr:`~Structure._layout_` attribute for details on the default +behavior and how to change it. .. _ctypes-arrays: From 62b3d2d443785c4ea5262edb4f9f7040440f9463 Mon Sep 17 00:00:00 2001 From: Tan Long Date: Fri, 6 Jun 2025 22:52:41 +0800 Subject: [PATCH 499/989] gh-133390: Support SQL keyword completion for sqlite3 CLI (#133393) --- Doc/whatsnew/3.15.rst | 7 ++ Lib/sqlite3/__main__.py | 11 +-- Lib/sqlite3/_completer.py | 42 ++++++++ Lib/test/test_sqlite3/test_cli.py | 98 +++++++++++++++++++ Misc/ACKS | 1 + ...-05-05-03-14-08.gh-issue-133390.AuTggn.rst | 1 + Modules/_sqlite/module.c | 39 ++++++++ 7 files changed, 193 insertions(+), 6 deletions(-) create mode 100644 Lib/sqlite3/_completer.py create mode 100644 Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index e5bccf17eb63e2..2f8335a895c70b 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -134,6 +134,13 @@ shelve (Contributed by Andrea Oliveri in :gh:`134004`.) +sqlite3 +------- + +* Support SQL keyword completion in the :mod:`sqlite3` command-line interface. + (Contributed by Long Tan in :gh:`133393`.) + + ssl --- diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py index c2fa23c46cf990..9e74b49ee828bc 100644 --- a/Lib/sqlite3/__main__.py +++ b/Lib/sqlite3/__main__.py @@ -12,6 +12,8 @@ from textwrap import dedent from _colorize import get_theme, theme_no_color +from ._completer import completer + def execute(c, sql, suppress_errors=True, theme=theme_no_color): """Helper that wraps execution of SQL code. @@ -136,12 +138,9 @@ def main(*args): execute(con, args.sql, suppress_errors=False, theme=theme) else: # No SQL provided; start the REPL. - console = SqliteInteractiveConsole(con, use_color=True) - try: - import readline # noqa: F401 - except ImportError: - pass - console.interact(banner, exitmsg="") + with completer(): + console = SqliteInteractiveConsole(con, use_color=True) + console.interact(banner, exitmsg="") finally: con.close() diff --git a/Lib/sqlite3/_completer.py b/Lib/sqlite3/_completer.py new file mode 100644 index 00000000000000..f21ef69cad6439 --- /dev/null +++ b/Lib/sqlite3/_completer.py @@ -0,0 +1,42 @@ +from contextlib import contextmanager + +try: + from _sqlite3 import SQLITE_KEYWORDS +except ImportError: + SQLITE_KEYWORDS = () + +_completion_matches = [] + + +def _complete(text, state): + global _completion_matches + + if state == 0: + text_upper = text.upper() + _completion_matches = [c for c in SQLITE_KEYWORDS if c.startswith(text_upper)] + try: + return _completion_matches[state] + " " + except IndexError: + return None + + +@contextmanager +def completer(): + try: + import readline + except ImportError: + yield + return + + old_completer = readline.get_completer() + try: + readline.set_completer(_complete) + if readline.backend == "editline": + # libedit uses "^I" instead of "tab" + command_string = "bind ^I rl_complete" + else: + command_string = "tab: complete" + readline.parse_and_bind(command_string) + yield + finally: + readline.set_completer(old_completer) diff --git a/Lib/test/test_sqlite3/test_cli.py b/Lib/test/test_sqlite3/test_cli.py index 37e0f74f688659..7f0b0f3650535a 100644 --- a/Lib/test/test_sqlite3/test_cli.py +++ b/Lib/test/test_sqlite3/test_cli.py @@ -1,14 +1,19 @@ """sqlite3 CLI tests.""" import sqlite3 +import sys +import textwrap import unittest from sqlite3.__main__ import main as cli +from test.support.import_helper import import_module from test.support.os_helper import TESTFN, unlink +from test.support.pty_helper import run_pty from test.support import ( captured_stdout, captured_stderr, captured_stdin, force_not_colorized_test_class, + requires_subprocess, ) @@ -200,5 +205,98 @@ def test_color(self): self.assertIn('\x1b[1;35mOperationalError (SQLITE_ERROR)\x1b[0m: ' '\x1b[35mnear "sel": syntax error\x1b[0m', err) + +@requires_subprocess() +@force_not_colorized_test_class +class Completion(unittest.TestCase): + PS1 = "sqlite> " + + @classmethod + def setUpClass(cls): + _sqlite3 = import_module("_sqlite3") + if not hasattr(_sqlite3, "SQLITE_KEYWORDS"): + raise unittest.SkipTest("unable to determine SQLite keywords") + + readline = import_module("readline") + if readline.backend == "editline": + raise unittest.SkipTest("libedit readline is not supported") + + def write_input(self, input_, env=None): + script = textwrap.dedent(""" + import readline + from sqlite3.__main__ import main + + readline.parse_and_bind("set colored-completion-prefix off") + main() + """) + return run_pty(script, input_, env) + + def test_complete_sql_keywords(self): + # List candidates starting with 'S', there should be multiple matches. + input_ = b"S\t\tEL\t 1;\n.quit\n" + output = self.write_input(input_) + self.assertIn(b"SELECT", output) + self.assertIn(b"SET", output) + self.assertIn(b"SAVEPOINT", output) + self.assertIn(b"(1,)", output) + + # Keywords are completed in upper case for even lower case user input. + input_ = b"sel\t\t 1;\n.quit\n" + output = self.write_input(input_) + self.assertIn(b"SELECT", output) + self.assertIn(b"(1,)", output) + + @unittest.skipIf(sys.platform.startswith("freebsd"), + "Two actual tabs are inserted when there are no matching" + " completions in the pseudo-terminal opened by run_pty()" + " on FreeBSD") + def test_complete_no_match(self): + input_ = b"xyzzy\t\t\b\b\b\b\b\b\b.quit\n" + # Set NO_COLOR to disable coloring for self.PS1. + output = self.write_input(input_, env={"NO_COLOR": "1"}) + lines = output.decode().splitlines() + indices = ( + i for i, line in enumerate(lines, 1) + if line.startswith(f"{self.PS1}xyzzy") + ) + line_num = next(indices, -1) + self.assertNotEqual(line_num, -1) + # Completions occupy lines, assert no extra lines when there is nothing + # to complete. + self.assertEqual(line_num, len(lines)) + + def test_complete_no_input(self): + from _sqlite3 import SQLITE_KEYWORDS + + script = textwrap.dedent(""" + import readline + from sqlite3.__main__ import main + + # Configure readline to ...: + # - hide control sequences surrounding each candidate + # - hide "Display all xxx possibilities? (y or n)" + # - hide "--More--" + # - show candidates one per line + readline.parse_and_bind("set colored-completion-prefix off") + readline.parse_and_bind("set colored-stats off") + readline.parse_and_bind("set completion-query-items 0") + readline.parse_and_bind("set page-completions off") + readline.parse_and_bind("set completion-display-width 0") + + main() + """) + input_ = b"\t\t.quit\n" + output = run_pty(script, input_, env={"NO_COLOR": "1"}) + lines = output.decode().splitlines() + indices = [ + i for i, line in enumerate(lines) + if line.startswith(self.PS1) + ] + self.assertEqual(len(indices), 2) + start, end = indices + candidates = [l.strip() for l in lines[start+1:end]] + self.assertEqual(candidates, sorted(SQLITE_KEYWORDS)) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/ACKS b/Misc/ACKS index 2435943f1bb2bd..739af8d9e11a10 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1868,6 +1868,7 @@ Neil Tallim Geoff Talvola Anish Tambe Musashi Tamura +Long Tan William Tanksley Christian Tanzer Steven Taschuk diff --git a/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst b/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst new file mode 100644 index 00000000000000..38d5c311b1d437 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst @@ -0,0 +1 @@ +Support keyword completion in the :mod:`sqlite3` command-line interface. diff --git a/Modules/_sqlite/module.c b/Modules/_sqlite/module.c index 909ddd1f990e19..5464fd1227ad20 100644 --- a/Modules/_sqlite/module.c +++ b/Modules/_sqlite/module.c @@ -32,6 +32,7 @@ #include "microprotocols.h" #include "row.h" #include "blob.h" +#include "util.h" #if SQLITE_VERSION_NUMBER < 3015002 #error "SQLite 3.15.2 or higher required" @@ -404,6 +405,40 @@ pysqlite_error_name(int rc) return NULL; } +static int +add_keyword_tuple(PyObject *module) +{ +#if SQLITE_VERSION_NUMBER >= 3024000 + int count = sqlite3_keyword_count(); + PyObject *keywords = PyTuple_New(count); + if (keywords == NULL) { + return -1; + } + for (int i = 0; i < count; i++) { + const char *keyword; + int size; + int result = sqlite3_keyword_name(i, &keyword, &size); + if (result != SQLITE_OK) { + pysqlite_state *state = pysqlite_get_state(module); + set_error_from_code(state, result); + goto error; + } + PyObject *kwd = PyUnicode_FromStringAndSize(keyword, size); + if (!kwd) { + goto error; + } + PyTuple_SET_ITEM(keywords, i, kwd); + } + return PyModule_Add(module, "SQLITE_KEYWORDS", keywords); + +error: + Py_DECREF(keywords); + return -1; +#else + return 0; +#endif +} + static int add_integer_constants(PyObject *module) { #define ADD_INT(ival) \ @@ -702,6 +737,10 @@ module_exec(PyObject *module) goto error; } + if (add_keyword_tuple(module) < 0) { + goto error; + } + if (PyModule_AddStringConstant(module, "sqlite_version", sqlite3_libversion())) { goto error; } From f00512db20561370faad437853f6ecee0eec4856 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Sat, 7 Jun 2025 00:51:47 +0900 Subject: [PATCH 500/989] Docs: Update `PyExc_*` tables in the c-api documentation (GH-131640) Add `PyExc_BaseExceptionGroup` and `PyExc_EncodingWarning` --- Doc/c-api/exceptions.rst | 12 ++++++++++++ Doc/conf.py | 2 ++ 2 files changed, 14 insertions(+) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index c8e1b5c2461738..885dbeb75303d1 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -982,6 +982,7 @@ the variables: .. index:: single: PyExc_BaseException (C var) + single: PyExc_BaseExceptionGroup (C var) single: PyExc_Exception (C var) single: PyExc_ArithmeticError (C var) single: PyExc_AssertionError (C var) @@ -1041,6 +1042,8 @@ the variables: +=========================================+=================================+==========+ | :c:data:`PyExc_BaseException` | :exc:`BaseException` | [1]_ | +-----------------------------------------+---------------------------------+----------+ +| :c:data:`PyExc_BaseExceptionGroup` | :exc:`BaseExceptionGroup` | [1]_ | ++-----------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_Exception` | :exc:`Exception` | [1]_ | +-----------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_ArithmeticError` | :exc:`ArithmeticError` | [1]_ | @@ -1164,6 +1167,9 @@ the variables: .. versionadded:: 3.6 :c:data:`PyExc_ModuleNotFoundError`. +.. versionadded:: 3.11 + :c:data:`PyExc_BaseExceptionGroup`. + These are compatibility aliases to :c:data:`PyExc_OSError`: .. index:: @@ -1207,6 +1213,7 @@ the variables: single: PyExc_Warning (C var) single: PyExc_BytesWarning (C var) single: PyExc_DeprecationWarning (C var) + single: PyExc_EncodingWarning (C var) single: PyExc_FutureWarning (C var) single: PyExc_ImportWarning (C var) single: PyExc_PendingDeprecationWarning (C var) @@ -1225,6 +1232,8 @@ the variables: +------------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_DeprecationWarning` | :exc:`DeprecationWarning` | | +------------------------------------------+---------------------------------+----------+ +| :c:data:`PyExc_EncodingWarning` | :exc:`EncodingWarning` | | ++------------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_FutureWarning` | :exc:`FutureWarning` | | +------------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_ImportWarning` | :exc:`ImportWarning` | | @@ -1245,6 +1254,9 @@ the variables: .. versionadded:: 3.2 :c:data:`PyExc_ResourceWarning`. +.. versionadded:: 3.10 + :c:data:`PyExc_EncodingWarning`. + Notes: .. [3] diff --git a/Doc/conf.py b/Doc/conf.py index 7fadad66cb3238..b08f5452901b15 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -234,6 +234,7 @@ ('c:data', 'PyExc_AssertionError'), ('c:data', 'PyExc_AttributeError'), ('c:data', 'PyExc_BaseException'), + ('c:data', 'PyExc_BaseExceptionGroup'), ('c:data', 'PyExc_BlockingIOError'), ('c:data', 'PyExc_BrokenPipeError'), ('c:data', 'PyExc_BufferError'), @@ -287,6 +288,7 @@ # C API: Standard Python warning classes ('c:data', 'PyExc_BytesWarning'), ('c:data', 'PyExc_DeprecationWarning'), + ('c:data', 'PyExc_EncodingWarning'), ('c:data', 'PyExc_FutureWarning'), ('c:data', 'PyExc_ImportWarning'), ('c:data', 'PyExc_PendingDeprecationWarning'), From 46151648ca8ba1edd2a2783e8e154692a13d8ea8 Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Sat, 7 Jun 2025 01:44:43 +0200 Subject: [PATCH 501/989] GH-131798: Optimize away type(x) in the JIT when the result is known (GH-135194) --- Lib/test/test_capi/test_opt.py | 25 ++++++++++++++++--- ...-06-05-21-58-30.gh-issue-131798.nt5Ab7.rst | 2 ++ Python/optimizer_bytecodes.c | 7 ++++-- Python/optimizer_cases.c.h | 7 ++++-- 4 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-05-21-58-30.gh-issue-131798.nt5Ab7.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index a292ebcc7f4aed..ee8d261685d463 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1778,11 +1778,12 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_UNICODE", uops) self.assertIn("_BINARY_OP_ADD_UNICODE", uops) - def test_call_type_1(self): + def test_call_type_1_guards_removed(self): def testfunc(n): x = 0 for _ in range(n): - x += type(42) is int + foo = eval('42') + x += type(foo) is int return x res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) @@ -1793,6 +1794,25 @@ def testfunc(n): self.assertNotIn("_GUARD_NOS_NULL", uops) self.assertNotIn("_GUARD_CALLABLE_TYPE_1", uops) + def test_call_type_1_known_type(self): + def testfunc(n): + x = 0 + for _ in range(n): + x += type(42) is int + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # When the result of type(...) is known, _CALL_TYPE_1 is replaced with + # _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW which is optimized away in + # remove_unneeded_uops. + self.assertNotIn("_CALL_TYPE_1", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + def test_call_type_1_result_is_const(self): def testfunc(n): x = 0 @@ -1806,7 +1826,6 @@ def testfunc(n): self.assertEqual(res, TIER2_THRESHOLD) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_CALL_TYPE_1", uops) self.assertNotIn("_GUARD_IS_NOT_NONE_POP", uops) def test_call_str_1(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-05-21-58-30.gh-issue-131798.nt5Ab7.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-05-21-58-30.gh-issue-131798.nt5Ab7.rst new file mode 100644 index 00000000000000..e4b5f610353f94 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-05-21-58-30.gh-issue-131798.nt5Ab7.rst @@ -0,0 +1,2 @@ +Optimize away ``_CALL_TYPE_1`` in the JIT when the return type is known. +Patch by Tomas Roun diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index b4220e2c627ecb..12efaacd8f0dfc 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -937,8 +937,11 @@ dummy_func(void) { } op(_CALL_TYPE_1, (unused, unused, arg -- res)) { - if (sym_has_type(arg)) { - res = sym_new_const(ctx, (PyObject *)sym_get_type(arg)); + PyObject* type = (PyObject *)sym_get_type(arg); + if (type) { + res = sym_new_const(ctx, type); + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, 0, + (uintptr_t)type); } else { res = sym_new_not_null(ctx); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 960c683800455e..1a2d49973ee916 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2056,8 +2056,11 @@ JitOptSymbol *arg; JitOptSymbol *res; arg = stack_pointer[-1]; - if (sym_has_type(arg)) { - res = sym_new_const(ctx, (PyObject *)sym_get_type(arg)); + PyObject* type = (PyObject *)sym_get_type(arg); + if (type) { + res = sym_new_const(ctx, type); + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, 0, + (uintptr_t)type); } else { res = sym_new_not_null(ctx); From 24069fbca861a5904ee7718469919e84828f22e7 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Sat, 7 Jun 2025 10:56:43 +0200 Subject: [PATCH 502/989] Revert "gh-133390: Support SQL keyword completion for sqlite3 CLI (#133393)" temporarily (GH-135232) This reverts commit 62b3d2d443785c4ea5262edb4f9f7040440f9463, which broke buildbots --- Doc/whatsnew/3.15.rst | 7 -- Lib/sqlite3/__main__.py | 11 ++- Lib/sqlite3/_completer.py | 42 -------- Lib/test/test_sqlite3/test_cli.py | 98 ------------------- Misc/ACKS | 1 - ...-05-05-03-14-08.gh-issue-133390.AuTggn.rst | 1 - Modules/_sqlite/module.c | 39 -------- 7 files changed, 6 insertions(+), 193 deletions(-) delete mode 100644 Lib/sqlite3/_completer.py delete mode 100644 Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 2f8335a895c70b..e5bccf17eb63e2 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -134,13 +134,6 @@ shelve (Contributed by Andrea Oliveri in :gh:`134004`.) -sqlite3 -------- - -* Support SQL keyword completion in the :mod:`sqlite3` command-line interface. - (Contributed by Long Tan in :gh:`133393`.) - - ssl --- diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py index 9e74b49ee828bc..c2fa23c46cf990 100644 --- a/Lib/sqlite3/__main__.py +++ b/Lib/sqlite3/__main__.py @@ -12,8 +12,6 @@ from textwrap import dedent from _colorize import get_theme, theme_no_color -from ._completer import completer - def execute(c, sql, suppress_errors=True, theme=theme_no_color): """Helper that wraps execution of SQL code. @@ -138,9 +136,12 @@ def main(*args): execute(con, args.sql, suppress_errors=False, theme=theme) else: # No SQL provided; start the REPL. - with completer(): - console = SqliteInteractiveConsole(con, use_color=True) - console.interact(banner, exitmsg="") + console = SqliteInteractiveConsole(con, use_color=True) + try: + import readline # noqa: F401 + except ImportError: + pass + console.interact(banner, exitmsg="") finally: con.close() diff --git a/Lib/sqlite3/_completer.py b/Lib/sqlite3/_completer.py deleted file mode 100644 index f21ef69cad6439..00000000000000 --- a/Lib/sqlite3/_completer.py +++ /dev/null @@ -1,42 +0,0 @@ -from contextlib import contextmanager - -try: - from _sqlite3 import SQLITE_KEYWORDS -except ImportError: - SQLITE_KEYWORDS = () - -_completion_matches = [] - - -def _complete(text, state): - global _completion_matches - - if state == 0: - text_upper = text.upper() - _completion_matches = [c for c in SQLITE_KEYWORDS if c.startswith(text_upper)] - try: - return _completion_matches[state] + " " - except IndexError: - return None - - -@contextmanager -def completer(): - try: - import readline - except ImportError: - yield - return - - old_completer = readline.get_completer() - try: - readline.set_completer(_complete) - if readline.backend == "editline": - # libedit uses "^I" instead of "tab" - command_string = "bind ^I rl_complete" - else: - command_string = "tab: complete" - readline.parse_and_bind(command_string) - yield - finally: - readline.set_completer(old_completer) diff --git a/Lib/test/test_sqlite3/test_cli.py b/Lib/test/test_sqlite3/test_cli.py index 7f0b0f3650535a..37e0f74f688659 100644 --- a/Lib/test/test_sqlite3/test_cli.py +++ b/Lib/test/test_sqlite3/test_cli.py @@ -1,19 +1,14 @@ """sqlite3 CLI tests.""" import sqlite3 -import sys -import textwrap import unittest from sqlite3.__main__ import main as cli -from test.support.import_helper import import_module from test.support.os_helper import TESTFN, unlink -from test.support.pty_helper import run_pty from test.support import ( captured_stdout, captured_stderr, captured_stdin, force_not_colorized_test_class, - requires_subprocess, ) @@ -205,98 +200,5 @@ def test_color(self): self.assertIn('\x1b[1;35mOperationalError (SQLITE_ERROR)\x1b[0m: ' '\x1b[35mnear "sel": syntax error\x1b[0m', err) - -@requires_subprocess() -@force_not_colorized_test_class -class Completion(unittest.TestCase): - PS1 = "sqlite> " - - @classmethod - def setUpClass(cls): - _sqlite3 = import_module("_sqlite3") - if not hasattr(_sqlite3, "SQLITE_KEYWORDS"): - raise unittest.SkipTest("unable to determine SQLite keywords") - - readline = import_module("readline") - if readline.backend == "editline": - raise unittest.SkipTest("libedit readline is not supported") - - def write_input(self, input_, env=None): - script = textwrap.dedent(""" - import readline - from sqlite3.__main__ import main - - readline.parse_and_bind("set colored-completion-prefix off") - main() - """) - return run_pty(script, input_, env) - - def test_complete_sql_keywords(self): - # List candidates starting with 'S', there should be multiple matches. - input_ = b"S\t\tEL\t 1;\n.quit\n" - output = self.write_input(input_) - self.assertIn(b"SELECT", output) - self.assertIn(b"SET", output) - self.assertIn(b"SAVEPOINT", output) - self.assertIn(b"(1,)", output) - - # Keywords are completed in upper case for even lower case user input. - input_ = b"sel\t\t 1;\n.quit\n" - output = self.write_input(input_) - self.assertIn(b"SELECT", output) - self.assertIn(b"(1,)", output) - - @unittest.skipIf(sys.platform.startswith("freebsd"), - "Two actual tabs are inserted when there are no matching" - " completions in the pseudo-terminal opened by run_pty()" - " on FreeBSD") - def test_complete_no_match(self): - input_ = b"xyzzy\t\t\b\b\b\b\b\b\b.quit\n" - # Set NO_COLOR to disable coloring for self.PS1. - output = self.write_input(input_, env={"NO_COLOR": "1"}) - lines = output.decode().splitlines() - indices = ( - i for i, line in enumerate(lines, 1) - if line.startswith(f"{self.PS1}xyzzy") - ) - line_num = next(indices, -1) - self.assertNotEqual(line_num, -1) - # Completions occupy lines, assert no extra lines when there is nothing - # to complete. - self.assertEqual(line_num, len(lines)) - - def test_complete_no_input(self): - from _sqlite3 import SQLITE_KEYWORDS - - script = textwrap.dedent(""" - import readline - from sqlite3.__main__ import main - - # Configure readline to ...: - # - hide control sequences surrounding each candidate - # - hide "Display all xxx possibilities? (y or n)" - # - hide "--More--" - # - show candidates one per line - readline.parse_and_bind("set colored-completion-prefix off") - readline.parse_and_bind("set colored-stats off") - readline.parse_and_bind("set completion-query-items 0") - readline.parse_and_bind("set page-completions off") - readline.parse_and_bind("set completion-display-width 0") - - main() - """) - input_ = b"\t\t.quit\n" - output = run_pty(script, input_, env={"NO_COLOR": "1"}) - lines = output.decode().splitlines() - indices = [ - i for i, line in enumerate(lines) - if line.startswith(self.PS1) - ] - self.assertEqual(len(indices), 2) - start, end = indices - candidates = [l.strip() for l in lines[start+1:end]] - self.assertEqual(candidates, sorted(SQLITE_KEYWORDS)) - - if __name__ == "__main__": unittest.main() diff --git a/Misc/ACKS b/Misc/ACKS index 739af8d9e11a10..2435943f1bb2bd 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1868,7 +1868,6 @@ Neil Tallim Geoff Talvola Anish Tambe Musashi Tamura -Long Tan William Tanksley Christian Tanzer Steven Taschuk diff --git a/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst b/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst deleted file mode 100644 index 38d5c311b1d437..00000000000000 --- a/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst +++ /dev/null @@ -1 +0,0 @@ -Support keyword completion in the :mod:`sqlite3` command-line interface. diff --git a/Modules/_sqlite/module.c b/Modules/_sqlite/module.c index 5464fd1227ad20..909ddd1f990e19 100644 --- a/Modules/_sqlite/module.c +++ b/Modules/_sqlite/module.c @@ -32,7 +32,6 @@ #include "microprotocols.h" #include "row.h" #include "blob.h" -#include "util.h" #if SQLITE_VERSION_NUMBER < 3015002 #error "SQLite 3.15.2 or higher required" @@ -405,40 +404,6 @@ pysqlite_error_name(int rc) return NULL; } -static int -add_keyword_tuple(PyObject *module) -{ -#if SQLITE_VERSION_NUMBER >= 3024000 - int count = sqlite3_keyword_count(); - PyObject *keywords = PyTuple_New(count); - if (keywords == NULL) { - return -1; - } - for (int i = 0; i < count; i++) { - const char *keyword; - int size; - int result = sqlite3_keyword_name(i, &keyword, &size); - if (result != SQLITE_OK) { - pysqlite_state *state = pysqlite_get_state(module); - set_error_from_code(state, result); - goto error; - } - PyObject *kwd = PyUnicode_FromStringAndSize(keyword, size); - if (!kwd) { - goto error; - } - PyTuple_SET_ITEM(keywords, i, kwd); - } - return PyModule_Add(module, "SQLITE_KEYWORDS", keywords); - -error: - Py_DECREF(keywords); - return -1; -#else - return 0; -#endif -} - static int add_integer_constants(PyObject *module) { #define ADD_INT(ival) \ @@ -737,10 +702,6 @@ module_exec(PyObject *module) goto error; } - if (add_keyword_tuple(module) < 0) { - goto error; - } - if (PyModule_AddStringConstant(module, "sqlite_version", sqlite3_libversion())) { goto error; } From ac9c3431cc5916a795c42b3e2b965233ceffe6f0 Mon Sep 17 00:00:00 2001 From: Daniel Golding Date: Sat, 7 Jun 2025 20:32:06 +0200 Subject: [PATCH 503/989] gh-134876: Add fallback for when process_vm_readv fails with ENOSYS (#134878) --- Misc/ACKS | 1 + ...-05-31-10-26-46.gh-issue-134876.8mBGJI.rst | 2 + Python/remote_debug.h | 70 +++++++++++++++++++ Python/remote_debugging.c | 39 +++++++++++ 4 files changed, 112 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-31-10-26-46.gh-issue-134876.8mBGJI.rst diff --git a/Misc/ACKS b/Misc/ACKS index 2435943f1bb2bd..0be31560387ccb 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -658,6 +658,7 @@ Michael Goderbauer Karan Goel Jeroen Van Goey Christoph Gohlke +Daniel Golding Tim Golden Yonatan Goldschmidt Mark Gollahon diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-31-10-26-46.gh-issue-134876.8mBGJI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-31-10-26-46.gh-issue-134876.8mBGJI.rst new file mode 100644 index 00000000000000..1da76561469a41 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-31-10-26-46.gh-issue-134876.8mBGJI.rst @@ -0,0 +1,2 @@ +Add support to :pep:`768` remote debugging for Linux kernels which don't +have CONFIG_CROSS_MEMORY_ATTACH configured. diff --git a/Python/remote_debug.h b/Python/remote_debug.h index 6cbf1c8deaaed9..0a817bdbd488e0 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -116,6 +116,8 @@ typedef struct { mach_port_t task; #elif defined(MS_WINDOWS) HANDLE hProcess; +#elif defined(__linux__) + int memfd; #endif page_cache_entry_t pages[MAX_PAGES]; Py_ssize_t page_size; @@ -162,6 +164,8 @@ _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { _set_debug_exception_cause(PyExc_RuntimeError, "Failed to initialize Windows process handle"); return -1; } +#elif defined(__linux__) + handle->memfd = -1; #endif handle->page_size = get_page_size(); for (int i = 0; i < MAX_PAGES; i++) { @@ -179,6 +183,11 @@ _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) { CloseHandle(handle->hProcess); handle->hProcess = NULL; } +#elif defined(__linux__) + if (handle->memfd != -1) { + close(handle->memfd); + handle->memfd = -1; + } #endif handle->pid = 0; _Py_RemoteDebug_FreePageCache(handle); @@ -907,6 +916,61 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) return address; } +#if defined(__linux__) && HAVE_PROCESS_VM_READV + +static int +open_proc_mem_fd(proc_handle_t *handle) +{ + char mem_file_path[64]; + sprintf(mem_file_path, "/proc/%d/mem", handle->pid); + + handle->memfd = open(mem_file_path, O_RDWR); + if (handle->memfd == -1) { + PyErr_SetFromErrno(PyExc_OSError); + _set_debug_exception_cause(PyExc_OSError, + "failed to open file %s: %s", mem_file_path, strerror(errno)); + return -1; + } + return 0; +} + +// Why is pwritev not guarded? Except on Android API level 23 (no longer +// supported), HAVE_PROCESS_VM_READV is sufficient. +static int +read_remote_memory_fallback(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* dst) +{ + if (handle->memfd == -1) { + if (open_proc_mem_fd(handle) < 0) { + return -1; + } + } + + struct iovec local[1]; + Py_ssize_t result = 0; + Py_ssize_t read_bytes = 0; + + do { + local[0].iov_base = (char*)dst + result; + local[0].iov_len = len - result; + off_t offset = remote_address + result; + + read_bytes = preadv(handle->memfd, local, 1, offset); + if (read_bytes < 0) { + PyErr_SetFromErrno(PyExc_OSError); + _set_debug_exception_cause(PyExc_OSError, + "preadv failed for PID %d at address 0x%lx " + "(size %zu, partial read %zd bytes): %s", + handle->pid, remote_address + result, len - result, result, strerror(errno)); + return -1; + } + + result += read_bytes; + } while ((size_t)read_bytes != local[0].iov_len); + return 0; +} + +#endif // __linux__ + // Platform-independent memory read function static int _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* dst) @@ -928,6 +992,9 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address } while (result < len); return 0; #elif defined(__linux__) && HAVE_PROCESS_VM_READV + if (handle->memfd != -1) { + return read_remote_memory_fallback(handle, remote_address, len, dst); + } struct iovec local[1]; struct iovec remote[1]; Py_ssize_t result = 0; @@ -941,6 +1008,9 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address read_bytes = process_vm_readv(handle->pid, local, 1, remote, 1, 0); if (read_bytes < 0) { + if (errno == ENOSYS) { + return read_remote_memory_fallback(handle, remote_address, len, dst); + } PyErr_SetFromErrno(PyExc_OSError); _set_debug_exception_cause(PyExc_OSError, "process_vm_readv failed for PID %d at address 0x%lx " diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index dd55b7812d4dee..7aee87ef05a407 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -24,6 +24,39 @@ read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* ds return _Py_RemoteDebug_ReadRemoteMemory(handle, remote_address, len, dst); } +// Why is pwritev not guarded? Except on Android API level 23 (no longer +// supported), HAVE_PROCESS_VM_READV is sufficient. +#if defined(__linux__) && HAVE_PROCESS_VM_READV +static int +write_memory_fallback(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) +{ + if (handle->memfd == -1) { + if (open_proc_mem_fd(handle) < 0) { + return -1; + } + } + + struct iovec local[1]; + Py_ssize_t result = 0; + Py_ssize_t written = 0; + + do { + local[0].iov_base = (char*)src + result; + local[0].iov_len = len - result; + off_t offset = remote_address + result; + + written = pwritev(handle->memfd, local, 1, offset); + if (written < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } + + result += written; + } while ((size_t)written != local[0].iov_len); + return 0; +} +#endif // __linux__ + static int write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) { @@ -39,6 +72,9 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const } while (result < len); return 0; #elif defined(__linux__) && HAVE_PROCESS_VM_READV + if (handle->memfd != -1) { + return write_memory_fallback(handle, remote_address, len, src); + } struct iovec local[1]; struct iovec remote[1]; Py_ssize_t result = 0; @@ -52,6 +88,9 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const written = process_vm_writev(handle->pid, local, 1, remote, 1, 0); if (written < 0) { + if (errno == ENOSYS) { + return write_memory_fallback(handle, remote_address, len, src); + } PyErr_SetFromErrno(PyExc_OSError); return -1; } From 8fdbbf8b18f1405abe677d0e04874c1c86ccdb4a Mon Sep 17 00:00:00 2001 From: Amit Lavon Date: Sat, 7 Jun 2025 14:08:44 -0700 Subject: [PATCH 504/989] GH-131798: Type-propagate string/list/tuple slices (GH-134671) --- Lib/test/test_capi/test_opt.py | 30 +++++++++++++++++-- ...-05-25-19-32-15.gh-issue-131798.f5h8aI.rst | 1 + Python/optimizer_bytecodes.c | 14 +++++++++ Python/optimizer_cases.c.h | 13 +++++++- 4 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-25-19-32-15.gh-issue-131798.f5h8aI.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index ee8d261685d463..8a3819dabe44ce 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1666,13 +1666,11 @@ def testfunc(n): self.assertIn("_CONTAINS_OP_DICT", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) - def test_remove_guard_for_known_type_str(self): def f(n): for i in range(n): false = i == TIER2_THRESHOLD empty = "X"[:false] - empty += "" # Make JIT realize this is a string. if empty: return 1 return 0 @@ -2249,6 +2247,34 @@ def f(n): self.assertNotIn("_LOAD_ATTR_METHOD_NO_DICT", uops) self.assertNotIn("_LOAD_ATTR_METHOD_LAZY_DICT", uops) + def test_remove_guard_for_slice_list(self): + def f(n): + for i in range(n): + false = i == TIER2_THRESHOLD + sliced = [1, 2, 3][:false] + if sliced: + return 1 + return 0 + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, 0) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_TO_BOOL_LIST", uops) + self.assertNotIn("_GUARD_TOS_LIST", uops) + + def test_remove_guard_for_slice_tuple(self): + def f(n): + for i in range(n): + false = i == TIER2_THRESHOLD + a, b = (1, 2, 3)[: false + 2] + + _, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_UNPACK_SEQUENCE_TWO_TUPLE", uops) + self.assertNotIn("_GUARD_TOS_TUPLE", uops) + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-25-19-32-15.gh-issue-131798.f5h8aI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-25-19-32-15.gh-issue-131798.f5h8aI.rst new file mode 100644 index 00000000000000..6ecbfb8d9cf7df --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-25-19-32-15.gh-issue-131798.f5h8aI.rst @@ -0,0 +1 @@ +Make the JIT optimizer understand that slicing a string/list/tuple returns the same type. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 12efaacd8f0dfc..fbf4dfd3db629c 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1237,6 +1237,20 @@ dummy_func(void) { sym_set_const(callable, list_append); } + op(_BINARY_SLICE, (container, start, stop -- res)) { + // Slicing a string/list/tuple always returns the same type. + PyTypeObject *type = sym_get_type(container); + if (type == &PyUnicode_Type || + type == &PyList_Type || + type == &PyTuple_Type) + { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } + } + // END BYTECODES // } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 1a2d49973ee916..b42f47c75eaf50 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -568,8 +568,19 @@ } case _BINARY_SLICE: { + JitOptSymbol *container; JitOptSymbol *res; - res = sym_new_not_null(ctx); + container = stack_pointer[-3]; + PyTypeObject *type = sym_get_type(container); + if (type == &PyUnicode_Type || + type == &PyList_Type || + type == &PyTuple_Type) + { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); From d610f11d21241d353b25843f66e51098a5c0ddad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Jun 2025 09:10:52 +0200 Subject: [PATCH 505/989] gh-133579: correctly report C curses errors in `_curses_panel` (#134629) This is a follow-up to ee36db550076e5a9185444ffbc53eaf8157ef04c. --- Modules/_curses_panel.c | 369 ++++++++++++++++++++----------- Modules/clinic/_curses_panel.c.h | 159 ++++--------- 2 files changed, 278 insertions(+), 250 deletions(-) diff --git a/Modules/_curses_panel.c b/Modules/_curses_panel.c index eecf7a1c8a1e56..d7acfc6a06a974 100644 --- a/Modules/_curses_panel.c +++ b/Modules/_curses_panel.c @@ -17,6 +17,7 @@ static const char PyCursesVersion[] = "2.1"; #include "Python.h" +#define CURSES_PANEL_MODULE #include "py_curses.h" #if defined(HAVE_NCURSESW_PANEL_H) @@ -28,10 +29,12 @@ static const char PyCursesVersion[] = "2.1"; #endif typedef struct { - PyObject *PyCursesError; + PyObject *error; PyTypeObject *PyCursesPanel_Type; } _curses_panel_state; +typedef struct PyCursesPanelObject PyCursesPanelObject; + static inline _curses_panel_state * get_curses_panel_state(PyObject *module) { @@ -40,11 +43,30 @@ get_curses_panel_state(PyObject *module) return (_curses_panel_state *)state; } +static inline _curses_panel_state * +get_curses_panel_state_by_panel(PyCursesPanelObject *panel) +{ + /* + * Note: 'state' may be NULL if Py_TYPE(panel) is not a heap + * type associated with this module, but the compiler would + * have likely already complained with an "invalid pointer + * type" at compile-time. + * + * To make it more robust, all functions recovering a module's + * state from an object should expect to return NULL with an + * exception set (in contrast to functions recovering a module's + * state from a module itself). + */ + void *state = PyType_GetModuleState(Py_TYPE(panel)); + assert(state != NULL); + return (_curses_panel_state *)state; +} + static int _curses_panel_clear(PyObject *mod) { _curses_panel_state *state = get_curses_panel_state(mod); - Py_CLEAR(state->PyCursesError); + Py_CLEAR(state->error); Py_CLEAR(state->PyCursesPanel_Type); return 0; } @@ -54,7 +76,7 @@ _curses_panel_traverse(PyObject *mod, visitproc visit, void *arg) { Py_VISIT(Py_TYPE(mod)); _curses_panel_state *state = get_curses_panel_state(mod); - Py_VISIT(state->PyCursesError); + Py_VISIT(state->error); Py_VISIT(state->PyCursesPanel_Type); return 0; } @@ -65,28 +87,149 @@ _curses_panel_free(void *mod) (void)_curses_panel_clear((PyObject *)mod); } +/* Utility Error Procedures + * + * The naming and implementations are identical to those in _cursesmodule.c. + * Functions that are not yet needed (for instance, reporting an ERR value + * from a module-wide function, namely curses_panel_set_error()) are + * omitted and should only be added if needed. + */ + +static void +_curses_panel_format_error(_curses_panel_state *state, + const char *curses_funcname, + const char *python_funcname, + const char *return_value, + const char *default_message) +{ + assert(!PyErr_Occurred()); + if (python_funcname == NULL && curses_funcname == NULL) { + PyErr_SetString(state->error, default_message); + } + else if (python_funcname == NULL) { + (void)PyErr_Format(state->error, CURSES_ERROR_FORMAT, + curses_funcname, return_value); + } + else { + assert(python_funcname != NULL); + (void)PyErr_Format(state->error, CURSES_ERROR_VERBOSE_FORMAT, + curses_funcname, python_funcname, return_value); + } +} + +/* + * Format a curses error for a function that returned ERR. + * + * Specify a non-NULL 'python_funcname' when the latter differs from + * 'curses_funcname'. If both names are NULL, uses the 'catchall_ERR' + * message instead. + */ +static void +_curses_panel_set_error(_curses_panel_state *state, + const char *curses_funcname, + const char *python_funcname) +{ + _curses_panel_format_error(state, curses_funcname, python_funcname, + "ERR", catchall_ERR); +} + +/* + * Format a curses error for a function that returned NULL. + * + * Specify a non-NULL 'python_funcname' when the latter differs from + * 'curses_funcname'. If both names are NULL, uses the 'catchall_NULL' + * message instead. + */ +static void +_curses_panel_set_null_error(_curses_panel_state *state, + const char *curses_funcname, + const char *python_funcname) +{ + _curses_panel_format_error(state, curses_funcname, python_funcname, + "NULL", catchall_NULL); +} + +/* Same as _curses_panel_set_null_error() for a module object. */ +static void +curses_panel_set_null_error(PyObject *module, + const char *curses_funcname, + const char *python_funcname) +{ + _curses_panel_state *state = get_curses_panel_state(module); + _curses_panel_set_null_error(state, curses_funcname, python_funcname); +} + +/* Same as _curses_panel_set_error() for a panel object. */ +static void +curses_panel_panel_set_error(PyCursesPanelObject *panel, + const char *curses_funcname, + const char *python_funcname) +{ + _curses_panel_state *state = get_curses_panel_state_by_panel(panel); + _curses_panel_set_error(state, curses_funcname, python_funcname); +} + +/* Same as _curses_panel_set_null_error() for a panel object. */ +static void +curses_panel_panel_set_null_error(PyCursesPanelObject *panel, + const char *curses_funcname, + const char *python_funcname) +{ + _curses_panel_state *state = get_curses_panel_state_by_panel(panel); + _curses_panel_set_null_error(state, curses_funcname, python_funcname); +} + +/* + * Indicate that a panel object couldn't be found. + * + * Use it for the following constructions: + * + * PROC caller_funcname: + * pan = called_funcname() + * find_po(panel) + * + * PROC caller_funcname: + * find_po(self->pan) +*/ +static void +curses_panel_notfound_error(const char *called_funcname, + const char *caller_funcname) +{ + assert(!(called_funcname == NULL && caller_funcname == NULL)); + if (caller_funcname == NULL) { + (void)PyErr_Format(PyExc_RuntimeError, + "%s(): cannot find panel object", + called_funcname); + } + else { + (void)PyErr_Format(PyExc_RuntimeError, + "%s() (called by %s()): cannot find panel object", + called_funcname, caller_funcname); + } +} + /* Utility Functions */ /* - * Check the return code from a curses function and return None - * or raise an exception as appropriate. + * Check the return code from a curses function, returning None + * on success and setting an exception on error. */ +/* + * Return None if 'code' is different from ERR (implementation-defined). + * Otherwise, set an exception using curses_panel_panel_set_error() and + * the remaining arguments, and return NULL. + */ static PyObject * -PyCursesCheckERR(_curses_panel_state *state, int code, const char *fname) +curses_panel_panel_check_err(PyCursesPanelObject *panel, int code, + const char *curses_funcname, + const char *python_funcname) { if (code != ERR) { Py_RETURN_NONE; } - else { - if (fname == NULL) { - PyErr_SetString(state->PyCursesError, catchall_ERR); - } - else { - PyErr_Format(state->PyCursesError, "%s() returned ERR", fname); - } - return NULL; - } + curses_panel_panel_set_error(panel, curses_funcname, python_funcname); + return NULL; } /***************************************************************************** @@ -95,7 +238,7 @@ PyCursesCheckERR(_curses_panel_state *state, int code, const char *fname) /* Definition of the panel object and panel type */ -typedef struct { +typedef struct PyCursesPanelObject { PyObject_HEAD PANEL *pan; PyCursesWindowObject *wo; /* for reference counts */ @@ -144,8 +287,11 @@ insert_lop(PyCursesPanelObject *po) return 0; } -/* Remove the panel object from lop */ -static void +/* Remove the panel object from lop. + * + * Return -1 on error but do NOT set an exception; otherwise return 0. + */ +static int remove_lop(PyCursesPanelObject *po) { list_of_panels *temp, *n; @@ -154,25 +300,23 @@ remove_lop(PyCursesPanelObject *po) if (temp->po == po) { lop = temp->next; PyMem_Free(temp); - return; + return 0; } while (temp->next == NULL || temp->next->po != po) { if (temp->next == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "remove_lop: can't find Panel Object"); - return; + return -1; } temp = temp->next; } n = temp->next->next; PyMem_Free(temp->next); temp->next = n; - return; + return 0; } /* Return the panel object that corresponds to pan */ static PyCursesPanelObject * -find_po(PANEL *pan) +find_po_impl(PANEL *pan) { list_of_panels *temp; for (temp = lop; temp->po->pan != pan; temp = temp->next) @@ -180,6 +324,17 @@ find_po(PANEL *pan) return temp->po; } +/* Same as find_po_impl() but with caller context information. */ +static PyCursesPanelObject * +find_po(PANEL *pan, const char *called_funcname, const char *caller_funcname) +{ + PyCursesPanelObject *res = find_po_impl(pan); + if (res == NULL) { + curses_panel_notfound_error(called_funcname, caller_funcname); + } + return res; +} + /*[clinic input] module _curses_panel class _curses_panel.panel "PyCursesPanelObject *" "&PyCursesPanel_Type" @@ -193,67 +348,59 @@ class _curses_panel.panel "PyCursesPanelObject *" "&PyCursesPanel_Type" /*[clinic input] _curses_panel.panel.bottom - cls: defining_class - Push the panel to the bottom of the stack. [clinic start generated code]*/ static PyObject * -_curses_panel_panel_bottom_impl(PyCursesPanelObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=8ec7fbbc08554021 input=6b7d2c0578b5a1c4]*/ +_curses_panel_panel_bottom_impl(PyCursesPanelObject *self) +/*[clinic end generated code: output=7aa7d14d7e1d1ce6 input=b6c920c071b61e2e]*/ { - _curses_panel_state *state = PyType_GetModuleState(cls); - return PyCursesCheckERR(state, bottom_panel(self->pan), "bottom"); + int rtn = bottom_panel(self->pan); + return curses_panel_panel_check_err(self, rtn, "bottom_panel", "bottom"); } /*[clinic input] _curses_panel.panel.hide - cls: defining_class - Hide the panel. This does not delete the object, it just makes the window on screen invisible. [clinic start generated code]*/ static PyObject * -_curses_panel_panel_hide_impl(PyCursesPanelObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=cc6ab7203cdc1450 input=1bfc741f473e6055]*/ +_curses_panel_panel_hide_impl(PyCursesPanelObject *self) +/*[clinic end generated code: output=a7bbbd523e1eab49 input=f6ab884e99386118]*/ { - _curses_panel_state *state = PyType_GetModuleState(cls); - return PyCursesCheckERR(state, hide_panel(self->pan), "hide"); + int rtn = hide_panel(self->pan); + return curses_panel_panel_check_err(self, rtn, "hide_panel", "hide"); } /*[clinic input] _curses_panel.panel.show - cls: defining_class - Display the panel (which might have been hidden). [clinic start generated code]*/ static PyObject * -_curses_panel_panel_show_impl(PyCursesPanelObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=dc3421de375f0409 input=8122e80151cb4379]*/ +_curses_panel_panel_show_impl(PyCursesPanelObject *self) +/*[clinic end generated code: output=6b4553ab45c97769 input=57b167bbefaa3755]*/ { - _curses_panel_state *state = PyType_GetModuleState(cls); - return PyCursesCheckERR(state, show_panel(self->pan), "show"); + int rtn = show_panel(self->pan); + return curses_panel_panel_check_err(self, rtn, "show_panel", "show"); } /*[clinic input] _curses_panel.panel.top - cls: defining_class - Push panel to the top of the stack. [clinic start generated code]*/ static PyObject * -_curses_panel_panel_top_impl(PyCursesPanelObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=10a072e511e873f7 input=1f372d597dda3379]*/ +_curses_panel_panel_top_impl(PyCursesPanelObject *self) +/*[clinic end generated code: output=0f5f2f8cdd2d1777 input=be33975ec3ca0e9a]*/ { - _curses_panel_state *state = PyType_GetModuleState(cls); - return PyCursesCheckERR(state, top_panel(self->pan), "top"); + int rtn = top_panel(self->pan); + return curses_panel_panel_check_err(self, rtn, "top_panel", "top"); } /* Allocation and deallocation of Panel Objects */ @@ -287,13 +434,22 @@ PyCursesPanel_Dealloc(PyObject *self) tp = (PyObject *) Py_TYPE(po); obj = (PyObject *) panel_userptr(po->pan); if (obj) { - (void)set_panel_userptr(po->pan, NULL); Py_DECREF(obj); + if (set_panel_userptr(po->pan, NULL) == ERR) { + curses_panel_panel_set_error(po, "set_panel_userptr", "__del__"); + PyErr_FormatUnraisable("Exception ignored in PyCursesPanel_Dealloc()"); + } + } + if (del_panel(po->pan) == ERR && !PyErr_Occurred()) { + curses_panel_panel_set_error(po, "del_panel", "__del__"); + PyErr_FormatUnraisable("Exception ignored in PyCursesPanel_Dealloc()"); } - (void)del_panel(po->pan); if (po->wo != NULL) { Py_DECREF(po->wo); - remove_lop(po); + if (remove_lop(po) < 0) { + PyErr_SetString(PyExc_RuntimeError, "__del__: no panel object to delete"); + PyErr_FormatUnraisable("Exception ignored in PyCursesPanel_Dealloc()"); + } } PyObject_Free(po); Py_DECREF(tp); @@ -315,18 +471,11 @@ _curses_panel_panel_above_impl(PyCursesPanelObject *self) PyCursesPanelObject *po; pan = panel_above(self->pan); - - if (pan == NULL) { /* valid output, it means the calling panel - is on top of the stack */ + if (pan == NULL) { /* valid output: it means no panel exists yet */ Py_RETURN_NONE; } - po = find_po(pan); - if (po == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "panel_above: can't find Panel Object"); - return NULL; - } - return Py_NewRef(po); + po = find_po(pan, "panel_above", "above"); + return Py_XNewRef(po); } /* panel_below(NULL) returns the top panel in the stack. To get @@ -345,18 +494,11 @@ _curses_panel_panel_below_impl(PyCursesPanelObject *self) PyCursesPanelObject *po; pan = panel_below(self->pan); - - if (pan == NULL) { /* valid output, it means the calling panel - is on the bottom of the stack */ + if (pan == NULL) { /* valid output: it means no panel exists yet */ Py_RETURN_NONE; } - po = find_po(pan); - if (po == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "panel_below: can't find Panel Object"); - return NULL; - } - return Py_NewRef(po); + po = find_po(pan, "panel_below", "below"); + return Py_XNewRef(po); } /*[clinic input] @@ -378,7 +520,6 @@ _curses_panel_panel_hidden_impl(PyCursesPanelObject *self) /*[clinic input] _curses_panel.panel.move - cls: defining_class y: int x: int / @@ -387,12 +528,11 @@ Move the panel to the screen coordinates (y, x). [clinic start generated code]*/ static PyObject * -_curses_panel_panel_move_impl(PyCursesPanelObject *self, PyTypeObject *cls, - int y, int x) -/*[clinic end generated code: output=ce546c93e56867da input=60a0e7912ff99849]*/ +_curses_panel_panel_move_impl(PyCursesPanelObject *self, int y, int x) +/*[clinic end generated code: output=d867535a89777415 input=e0b36b78acc03fba]*/ { - _curses_panel_state *state = PyType_GetModuleState(cls); - return PyCursesCheckERR(state, move_panel(self->pan, y, x), "move_panel"); + int rtn = move_panel(self->pan, y, x); + return curses_panel_panel_check_err(self, rtn, "move_panel", "move"); } /*[clinic input] @@ -411,7 +551,6 @@ _curses_panel_panel_window_impl(PyCursesPanelObject *self) /*[clinic input] _curses_panel.panel.replace - cls: defining_class win: object(type="PyCursesWindowObject *", subclass_of="&PyCursesWindow_Type") / @@ -420,22 +559,17 @@ Change the window associated with the panel to the window win. static PyObject * _curses_panel_panel_replace_impl(PyCursesPanelObject *self, - PyTypeObject *cls, PyCursesWindowObject *win) -/*[clinic end generated code: output=c71f95c212d58ae7 input=dbec7180ece41ff5]*/ +/*[clinic end generated code: output=2253a95f7b287255 input=4b1c4283987d9dfa]*/ { - _curses_panel_state *state = PyType_GetModuleState(cls); - - PyCursesPanelObject *po = find_po(self->pan); + PyCursesPanelObject *po = find_po(self->pan, "replace", NULL); if (po == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "replace_panel: can't find Panel Object"); return NULL; } int rtn = replace_panel(self->pan, win->win); if (rtn == ERR) { - PyErr_SetString(state->PyCursesError, "replace_panel() returned ERR"); + curses_panel_panel_set_error(self, "replace_panel", "replace"); return NULL; } Py_SETREF(po->wo, (PyCursesWindowObject*)Py_NewRef(win)); @@ -445,7 +579,6 @@ _curses_panel_panel_replace_impl(PyCursesPanelObject *self, /*[clinic input] _curses_panel.panel.set_userptr - cls: defining_class obj: object / @@ -454,8 +587,8 @@ Set the panel's user pointer to obj. static PyObject * _curses_panel_panel_set_userptr_impl(PyCursesPanelObject *self, - PyTypeObject *cls, PyObject *obj) -/*[clinic end generated code: output=db74f3db07b28080 input=e3fee2ff7b1b8e48]*/ + PyObject *obj) +/*[clinic end generated code: output=7fa1fd23f69db71e input=d2c6a9dbefabbf39]*/ { PyCursesInitialised; Py_INCREF(obj); @@ -464,34 +597,27 @@ _curses_panel_panel_set_userptr_impl(PyCursesPanelObject *self, if (rc == ERR) { /* In case of an ncurses error, decref the new object again */ Py_DECREF(obj); + curses_panel_panel_set_error(self, "set_panel_userptr", "set_userptr"); + return NULL; } - else { - Py_XDECREF(oldobj); - } - - _curses_panel_state *state = PyType_GetModuleState(cls); - return PyCursesCheckERR(state, rc, "set_panel_userptr"); + Py_XDECREF(oldobj); + Py_RETURN_NONE; } /*[clinic input] _curses_panel.panel.userptr - cls: defining_class - Return the user pointer for the panel. [clinic start generated code]*/ static PyObject * -_curses_panel_panel_userptr_impl(PyCursesPanelObject *self, - PyTypeObject *cls) -/*[clinic end generated code: output=eea6e6f39ffc0179 input=f22ca4f115e30a80]*/ +_curses_panel_panel_userptr_impl(PyCursesPanelObject *self) +/*[clinic end generated code: output=e849c307b5dc9237 input=f78b7a47aef0fd50]*/ { - _curses_panel_state *state = PyType_GetModuleState(cls); - PyCursesInitialised; PyObject *obj = (PyObject *) panel_userptr(self->pan); if (obj == NULL) { - PyErr_SetString(state->PyCursesError, "no userptr set"); + curses_panel_panel_set_null_error(self, "panel_userptr", "userptr"); return NULL; } @@ -552,18 +678,11 @@ _curses_panel_bottom_panel_impl(PyObject *module) PyCursesInitialised; pan = panel_above(NULL); - - if (pan == NULL) { /* valid output, it means - there's no panel at all */ + if (pan == NULL) { /* valid output: it means no panel exists yet */ Py_RETURN_NONE; } - po = find_po(pan); - if (po == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "panel_above: can't find Panel Object"); - return NULL; - } - return Py_NewRef(po); + po = find_po(pan, "panel_above", "bottom_panel"); + return Py_XNewRef(po); } /*[clinic input] @@ -579,14 +698,13 @@ static PyObject * _curses_panel_new_panel_impl(PyObject *module, PyCursesWindowObject *win) /*[clinic end generated code: output=45e948e0176a9bd2 input=74d4754e0ebe4800]*/ { - _curses_panel_state *state = get_curses_panel_state(module); - PANEL *pan = new_panel(win->win); if (pan == NULL) { - PyErr_SetString(state->PyCursesError, catchall_NULL); + curses_panel_set_null_error(module, "new_panel", NULL); return NULL; } - return (PyObject *)PyCursesPanel_New(state, pan, win); + _curses_panel_state *state = get_curses_panel_state(module); + return PyCursesPanel_New(state, pan, win); } @@ -610,18 +728,11 @@ _curses_panel_top_panel_impl(PyObject *module) PyCursesInitialised; pan = panel_below(NULL); - - if (pan == NULL) { /* valid output, it means - there's no panel at all */ + if (pan == NULL) { /* valid output: it means no panel exists yet */ Py_RETURN_NONE; } - po = find_po(pan); - if (po == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "panel_below: can't find Panel Object"); - return NULL; - } - return Py_NewRef(po); + po = find_po(pan, "panel_below", "top_panel"); + return Py_XNewRef(po); } /*[clinic input] @@ -673,10 +784,10 @@ _curses_panel_exec(PyObject *mod) } /* For exception _curses_panel.error */ - state->PyCursesError = PyErr_NewException( + state->error = PyErr_NewException( "_curses_panel.error", NULL, NULL); - if (PyModule_AddObjectRef(mod, "error", state->PyCursesError) < 0) { + if (PyModule_AddObjectRef(mod, "error", state->error) < 0) { return -1; } diff --git a/Modules/clinic/_curses_panel.c.h b/Modules/clinic/_curses_panel.c.h index 6f4966825ec4bf..75cf067c8aa822 100644 --- a/Modules/clinic/_curses_panel.c.h +++ b/Modules/clinic/_curses_panel.c.h @@ -2,10 +2,7 @@ preserve [clinic start generated code]*/ -#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) -# include "pycore_runtime.h" // _Py_SINGLETON() -#endif -#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() +#include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_curses_panel_panel_bottom__doc__, "bottom($self, /)\n" @@ -14,19 +11,15 @@ PyDoc_STRVAR(_curses_panel_panel_bottom__doc__, "Push the panel to the bottom of the stack."); #define _CURSES_PANEL_PANEL_BOTTOM_METHODDEF \ - {"bottom", _PyCFunction_CAST(_curses_panel_panel_bottom), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _curses_panel_panel_bottom__doc__}, + {"bottom", (PyCFunction)_curses_panel_panel_bottom, METH_NOARGS, _curses_panel_panel_bottom__doc__}, static PyObject * -_curses_panel_panel_bottom_impl(PyCursesPanelObject *self, PyTypeObject *cls); +_curses_panel_panel_bottom_impl(PyCursesPanelObject *self); static PyObject * -_curses_panel_panel_bottom(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_curses_panel_panel_bottom(PyObject *self, PyObject *Py_UNUSED(ignored)) { - if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { - PyErr_SetString(PyExc_TypeError, "bottom() takes no arguments"); - return NULL; - } - return _curses_panel_panel_bottom_impl((PyCursesPanelObject *)self, cls); + return _curses_panel_panel_bottom_impl((PyCursesPanelObject *)self); } PyDoc_STRVAR(_curses_panel_panel_hide__doc__, @@ -38,19 +31,15 @@ PyDoc_STRVAR(_curses_panel_panel_hide__doc__, "This does not delete the object, it just makes the window on screen invisible."); #define _CURSES_PANEL_PANEL_HIDE_METHODDEF \ - {"hide", _PyCFunction_CAST(_curses_panel_panel_hide), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _curses_panel_panel_hide__doc__}, + {"hide", (PyCFunction)_curses_panel_panel_hide, METH_NOARGS, _curses_panel_panel_hide__doc__}, static PyObject * -_curses_panel_panel_hide_impl(PyCursesPanelObject *self, PyTypeObject *cls); +_curses_panel_panel_hide_impl(PyCursesPanelObject *self); static PyObject * -_curses_panel_panel_hide(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_curses_panel_panel_hide(PyObject *self, PyObject *Py_UNUSED(ignored)) { - if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { - PyErr_SetString(PyExc_TypeError, "hide() takes no arguments"); - return NULL; - } - return _curses_panel_panel_hide_impl((PyCursesPanelObject *)self, cls); + return _curses_panel_panel_hide_impl((PyCursesPanelObject *)self); } PyDoc_STRVAR(_curses_panel_panel_show__doc__, @@ -60,19 +49,15 @@ PyDoc_STRVAR(_curses_panel_panel_show__doc__, "Display the panel (which might have been hidden)."); #define _CURSES_PANEL_PANEL_SHOW_METHODDEF \ - {"show", _PyCFunction_CAST(_curses_panel_panel_show), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _curses_panel_panel_show__doc__}, + {"show", (PyCFunction)_curses_panel_panel_show, METH_NOARGS, _curses_panel_panel_show__doc__}, static PyObject * -_curses_panel_panel_show_impl(PyCursesPanelObject *self, PyTypeObject *cls); +_curses_panel_panel_show_impl(PyCursesPanelObject *self); static PyObject * -_curses_panel_panel_show(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_curses_panel_panel_show(PyObject *self, PyObject *Py_UNUSED(ignored)) { - if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { - PyErr_SetString(PyExc_TypeError, "show() takes no arguments"); - return NULL; - } - return _curses_panel_panel_show_impl((PyCursesPanelObject *)self, cls); + return _curses_panel_panel_show_impl((PyCursesPanelObject *)self); } PyDoc_STRVAR(_curses_panel_panel_top__doc__, @@ -82,19 +67,15 @@ PyDoc_STRVAR(_curses_panel_panel_top__doc__, "Push panel to the top of the stack."); #define _CURSES_PANEL_PANEL_TOP_METHODDEF \ - {"top", _PyCFunction_CAST(_curses_panel_panel_top), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _curses_panel_panel_top__doc__}, + {"top", (PyCFunction)_curses_panel_panel_top, METH_NOARGS, _curses_panel_panel_top__doc__}, static PyObject * -_curses_panel_panel_top_impl(PyCursesPanelObject *self, PyTypeObject *cls); +_curses_panel_panel_top_impl(PyCursesPanelObject *self); static PyObject * -_curses_panel_panel_top(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_curses_panel_panel_top(PyObject *self, PyObject *Py_UNUSED(ignored)) { - if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { - PyErr_SetString(PyExc_TypeError, "top() takes no arguments"); - return NULL; - } - return _curses_panel_panel_top_impl((PyCursesPanelObject *)self, cls); + return _curses_panel_panel_top_impl((PyCursesPanelObject *)self); } PyDoc_STRVAR(_curses_panel_panel_above__doc__, @@ -158,36 +139,19 @@ PyDoc_STRVAR(_curses_panel_panel_move__doc__, "Move the panel to the screen coordinates (y, x)."); #define _CURSES_PANEL_PANEL_MOVE_METHODDEF \ - {"move", _PyCFunction_CAST(_curses_panel_panel_move), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _curses_panel_panel_move__doc__}, + {"move", _PyCFunction_CAST(_curses_panel_panel_move), METH_FASTCALL, _curses_panel_panel_move__doc__}, static PyObject * -_curses_panel_panel_move_impl(PyCursesPanelObject *self, PyTypeObject *cls, - int y, int x); +_curses_panel_panel_move_impl(PyCursesPanelObject *self, int y, int x); static PyObject * -_curses_panel_panel_move(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_curses_panel_panel_move(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) - #else - # define KWTUPLE NULL - #endif - - static const char * const _keywords[] = {"", "", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "move", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[2]; int y; int x; - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, - /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); - if (!args) { + if (!_PyArg_CheckPositional("move", nargs, 2, 2)) { goto exit; } y = PyLong_AsInt(args[0]); @@ -198,7 +162,7 @@ _curses_panel_panel_move(PyObject *self, PyTypeObject *cls, PyObject *const *arg if (x == -1 && PyErr_Occurred()) { goto exit; } - return_value = _curses_panel_panel_move_impl((PyCursesPanelObject *)self, cls, y, x); + return_value = _curses_panel_panel_move_impl((PyCursesPanelObject *)self, y, x); exit: return return_value; @@ -229,44 +193,24 @@ PyDoc_STRVAR(_curses_panel_panel_replace__doc__, "Change the window associated with the panel to the window win."); #define _CURSES_PANEL_PANEL_REPLACE_METHODDEF \ - {"replace", _PyCFunction_CAST(_curses_panel_panel_replace), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _curses_panel_panel_replace__doc__}, + {"replace", (PyCFunction)_curses_panel_panel_replace, METH_O, _curses_panel_panel_replace__doc__}, static PyObject * _curses_panel_panel_replace_impl(PyCursesPanelObject *self, - PyTypeObject *cls, PyCursesWindowObject *win); static PyObject * -_curses_panel_panel_replace(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_curses_panel_panel_replace(PyObject *self, PyObject *arg) { PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) - #else - # define KWTUPLE NULL - #endif - - static const char * const _keywords[] = {"", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "replace", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[1]; PyCursesWindowObject *win; - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, - /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); - if (!args) { - goto exit; - } - if (!PyObject_TypeCheck(args[0], &PyCursesWindow_Type)) { - _PyArg_BadArgument("replace", "argument 1", (&PyCursesWindow_Type)->tp_name, args[0]); + if (!PyObject_TypeCheck(arg, &PyCursesWindow_Type)) { + _PyArg_BadArgument("replace", "argument", (&PyCursesWindow_Type)->tp_name, arg); goto exit; } - win = (PyCursesWindowObject *)args[0]; - return_value = _curses_panel_panel_replace_impl((PyCursesPanelObject *)self, cls, win); + win = (PyCursesWindowObject *)arg; + return_value = _curses_panel_panel_replace_impl((PyCursesPanelObject *)self, win); exit: return return_value; @@ -279,41 +223,19 @@ PyDoc_STRVAR(_curses_panel_panel_set_userptr__doc__, "Set the panel\'s user pointer to obj."); #define _CURSES_PANEL_PANEL_SET_USERPTR_METHODDEF \ - {"set_userptr", _PyCFunction_CAST(_curses_panel_panel_set_userptr), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _curses_panel_panel_set_userptr__doc__}, + {"set_userptr", (PyCFunction)_curses_panel_panel_set_userptr, METH_O, _curses_panel_panel_set_userptr__doc__}, static PyObject * _curses_panel_panel_set_userptr_impl(PyCursesPanelObject *self, - PyTypeObject *cls, PyObject *obj); + PyObject *obj); static PyObject * -_curses_panel_panel_set_userptr(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_curses_panel_panel_set_userptr(PyObject *self, PyObject *obj) { PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) - #else - # define KWTUPLE NULL - #endif - - static const char * const _keywords[] = {"", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "set_userptr", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[1]; - PyObject *obj; - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, - /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); - if (!args) { - goto exit; - } - obj = args[0]; - return_value = _curses_panel_panel_set_userptr_impl((PyCursesPanelObject *)self, cls, obj); -exit: + return_value = _curses_panel_panel_set_userptr_impl((PyCursesPanelObject *)self, obj); + return return_value; } @@ -324,20 +246,15 @@ PyDoc_STRVAR(_curses_panel_panel_userptr__doc__, "Return the user pointer for the panel."); #define _CURSES_PANEL_PANEL_USERPTR_METHODDEF \ - {"userptr", _PyCFunction_CAST(_curses_panel_panel_userptr), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _curses_panel_panel_userptr__doc__}, + {"userptr", (PyCFunction)_curses_panel_panel_userptr, METH_NOARGS, _curses_panel_panel_userptr__doc__}, static PyObject * -_curses_panel_panel_userptr_impl(PyCursesPanelObject *self, - PyTypeObject *cls); +_curses_panel_panel_userptr_impl(PyCursesPanelObject *self); static PyObject * -_curses_panel_panel_userptr(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_curses_panel_panel_userptr(PyObject *self, PyObject *Py_UNUSED(ignored)) { - if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { - PyErr_SetString(PyExc_TypeError, "userptr() takes no arguments"); - return NULL; - } - return _curses_panel_panel_userptr_impl((PyCursesPanelObject *)self, cls); + return _curses_panel_panel_userptr_impl((PyCursesPanelObject *)self); } PyDoc_STRVAR(_curses_panel_bottom_panel__doc__, @@ -424,4 +341,4 @@ _curses_panel_update_panels(PyObject *module, PyObject *Py_UNUSED(ignored)) { return _curses_panel_update_panels_impl(module); } -/*[clinic end generated code: output=36853ecb4a979814 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=db2fe491582784aa input=a9049054013a1b77]*/ From bcb6b45cb86a2f9f65b6c41f27c36059ba86a50b Mon Sep 17 00:00:00 2001 From: "Jiucheng(Oliver)" Date: Sun, 8 Jun 2025 03:13:21 -0400 Subject: [PATCH 506/989] gh-134151 Fix `TypeError` in `email.utils.decode_params` when sorting RFC 2231 continuations (#134687) - Fix sorting logic in `email.utils.decode_params` to handle None values. - Update tests for RFC 2231 continuation sorting. --- Lib/email/utils.py | 10 ++++++++-- Lib/test/test_email/test_email.py | 18 ++++++++++++++++++ ...5-05-25-23-23-05.gh-issue-134151.13Wwsb.rst | 2 ++ 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-25-23-23-05.gh-issue-134151.13Wwsb.rst diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 7eab74dc0db9df..3de1f0d24a15b0 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -417,8 +417,14 @@ def decode_params(params): for name, continuations in rfc2231_params.items(): value = [] extended = False - # Sort by number - continuations.sort() + # Sort by number, treating None as 0 if there is no 0, + # and ignore it if there is already a 0. + has_zero = any(x[0] == 0 for x in continuations) + if has_zero: + continuations = [x for x in continuations if x[0] is not None] + else: + continuations = [(x[0] or 0, x[1], x[2]) for x in continuations] + continuations.sort(key=lambda x: x[0]) # And now append all values in numerical order, converting # %-encodings for the encoded segments. If any of the # continuation names ends in a *, then the entire string, after diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 7b14305f997e5d..8765d121fd0813 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -389,6 +389,24 @@ def test_bad_param(self): msg = email.message_from_string("Content-Type: blarg; baz; boo\n") self.assertEqual(msg.get_param('baz'), '') + def test_continuation_sorting_part_order(self): + msg = email.message_from_string( + "Content-Disposition: attachment; " + "filename*=\"ignored\"; " + "filename*0*=\"utf-8''foo%20\"; " + "filename*1*=\"bar.txt\"\n" + ) + filename = msg.get_filename() + self.assertEqual(filename, 'foo bar.txt') + + def test_sorting_no_continuations(self): + msg = email.message_from_string( + "Content-Disposition: attachment; " + "filename*=\"bar.txt\"; " + ) + filename = msg.get_filename() + self.assertEqual(filename, 'bar.txt') + def test_missing_filename(self): msg = email.message_from_string("From: foo\n") self.assertEqual(msg.get_filename(), None) diff --git a/Misc/NEWS.d/next/Library/2025-05-25-23-23-05.gh-issue-134151.13Wwsb.rst b/Misc/NEWS.d/next/Library/2025-05-25-23-23-05.gh-issue-134151.13Wwsb.rst new file mode 100644 index 00000000000000..ecdde240b4aadc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-25-23-23-05.gh-issue-134151.13Wwsb.rst @@ -0,0 +1,2 @@ +:mod:`email`: Fix :exc:`TypeError` in :func:`email.utils.decode_params` +when sorting :rfc:`2231` continuations that contain an unnumbered section. From 4372011928b43d369be727ed3eb6d9d4f9610660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Jun 2025 09:43:32 +0200 Subject: [PATCH 507/989] gh-134531: fix `_hashlib` clinic directive post GH-134626 (#135249) --- Modules/_hashopenssl.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 331275076d7937..42821ebe9f6a54 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -260,7 +260,7 @@ static PyModuleDef _hashlibmodule; typedef struct { PyTypeObject *HASH_type; // based on EVP_MD - PyTypeObject *HMACtype; + PyTypeObject *HMAC_type; #ifdef PY_OPENSSL_HAS_SHAKE PyTypeObject *HASHXOF_type; // based on EVP_MD #endif @@ -300,11 +300,11 @@ typedef struct { #include "clinic/_hashopenssl.c.h" /*[clinic input] module _hashlib -class _hashlib.HASH "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->EVPtype" -class _hashlib.HASHXOF "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->EVPXOFtype" -class _hashlib.HMAC "HMACobject *" "((_hashlibstate *)PyModule_GetState(module))->HMACtype" +class _hashlib.HASH "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASH_type" +class _hashlib.HASHXOF "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASHXOF_type" +class _hashlib.HMAC "HMACobject *" "((_hashlibstate *)PyModule_GetState(module))->HMAC_type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4f6b8873ed13d1ff]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=eb805ce4b90b1b31]*/ /* LCOV_EXCL_START */ @@ -1643,7 +1643,7 @@ _hashlib_hmac_new_impl(PyObject *module, Py_buffer *key, PyObject *msg_obj, } _hashlibstate *state = get_hashlib_state(module); - self = PyObject_New(HMACobject, state->HMACtype); + self = PyObject_New(HMACobject, state->HMAC_type); if (self == NULL) { goto error; } @@ -2204,7 +2204,7 @@ hashlib_traverse(PyObject *m, visitproc visit, void *arg) { _hashlibstate *state = get_hashlib_state(m); Py_VISIT(state->HASH_type); - Py_VISIT(state->HMACtype); + Py_VISIT(state->HMAC_type); #ifdef PY_OPENSSL_HAS_SHAKE Py_VISIT(state->HASHXOF_type); #endif @@ -2218,7 +2218,7 @@ hashlib_clear(PyObject *m) { _hashlibstate *state = get_hashlib_state(m); Py_CLEAR(state->HASH_type); - Py_CLEAR(state->HMACtype); + Py_CLEAR(state->HMAC_type); #ifdef PY_OPENSSL_HAS_SHAKE Py_CLEAR(state->HASHXOF_type); #endif @@ -2296,11 +2296,11 @@ hashlib_init_hmactype(PyObject *module) { _hashlibstate *state = get_hashlib_state(module); - state->HMACtype = (PyTypeObject *)PyType_FromSpec(&HMACtype_spec); - if (state->HMACtype == NULL) { + state->HMAC_type = (PyTypeObject *)PyType_FromSpec(&HMACtype_spec); + if (state->HMAC_type == NULL) { return -1; } - if (PyModule_AddType(module, state->HMACtype) < 0) { + if (PyModule_AddType(module, state->HMAC_type) < 0) { return -1; } return 0; From 1cb716387255a7bdab5b580bcf8ac1b6fa32cc41 Mon Sep 17 00:00:00 2001 From: LamentXU <108666168+LamentXU123@users.noreply.github.com> Date: Sun, 8 Jun 2025 19:46:16 +0800 Subject: [PATCH 508/989] =?UTF-8?q?gh-135244:=20generate=20UUID=20random?= =?UTF-8?q?=20Node=20ID=20with=20a=20CSPRNG=20as=20per=20RFC=209562,=20?= =?UTF-8?q?=C2=A76.10.3=20(#135226)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This aligns with the recommendations of RFC 9562, Section 6.10, paragraph 3 [1]. [1]: https://www.rfc-editor.org/rfc/rfc9562.html#section-6.10-3. --------- Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/uuid.rst | 4 ++++ Lib/uuid.py | 14 ++++++++------ .../2025-06-08-10-22-22.gh-issue-135244.Y2SOTJ.rst | 4 ++++ 3 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-08-10-22-22.gh-issue-135244.Y2SOTJ.rst diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 8cce6b98cbcdb3..747ee3ee0e1951 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -257,6 +257,10 @@ The :mod:`uuid` module defines the following functions: non-specified arguments are substituted for a pseudo-random integer of appropriate size. + By default, *a*, *b* and *c* are generated by a non-cryptographically + secure pseudo-random number generator (CSPRNG). Use :func:`uuid4` when + a UUID needs to be used in a security-sensitive context. + .. versionadded:: 3.14 diff --git a/Lib/uuid.py b/Lib/uuid.py index 06f81a7c338372..313f2fc46cb346 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -656,18 +656,20 @@ def _windll_getnode(): def _random_getnode(): """Get a random node ID.""" - # RFC 4122, $4.1.6 says "For systems with no IEEE address, a randomly or - # pseudo-randomly generated value may be used; see Section 4.5. The - # multicast bit must be set in such addresses, in order that they will - # never conflict with addresses obtained from network cards." + # RFC 9562, §6.10-3 says that + # + # Implementations MAY elect to obtain a 48-bit cryptographic-quality + # random number as per Section 6.9 to use as the Node ID. [...] [and] + # implementations MUST set the least significant bit of the first octet + # of the Node ID to 1. This bit is the unicast or multicast bit, which + # will never be set in IEEE 802 addresses obtained from network cards. # # The "multicast bit" of a MAC address is defined to be "the least # significant bit of the first octet". This works out to be the 41st bit # counting from 1 being the least significant bit, or 1<<40. # # See https://en.wikipedia.org/w/index.php?title=MAC_address&oldid=1128764812#Universal_vs._local_(U/L_bit) - import random - return random.getrandbits(48) | (1 << 40) + return int.from_bytes(os.urandom(6)) | (1 << 40) # _OS_GETTERS, when known, are targeted for a specific OS or platform. diff --git a/Misc/NEWS.d/next/Library/2025-06-08-10-22-22.gh-issue-135244.Y2SOTJ.rst b/Misc/NEWS.d/next/Library/2025-06-08-10-22-22.gh-issue-135244.Y2SOTJ.rst new file mode 100644 index 00000000000000..1f70358e64e2a0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-08-10-22-22.gh-issue-135244.Y2SOTJ.rst @@ -0,0 +1,4 @@ +:mod:`uuid`: when the MAC address cannot be determined, the 48-bit node +ID is now generated with a cryptographically-secure pseudo-random number +generator (CSPRNG) as per :rfc:`RFC 9562, §6.10.3 <9562#section-6.10-3>`. +This affects :func:`~uuid.uuid1` and :func:`~uuid.uuid6`. From 158e5162bfaa8a49178ce2c3f2455c3e03b60157 Mon Sep 17 00:00:00 2001 From: Yongzi Li <204532581+Yzi-Li@users.noreply.github.com> Date: Sun, 8 Jun 2025 20:28:55 +0800 Subject: [PATCH 509/989] gh-134976: document the exception type that can be raised by `s[i]` (#134977) --- Doc/library/stdtypes.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index f0b4b09ff10dce..b75e5ceecf874e 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1018,7 +1018,7 @@ operations have the same priority as the corresponding numeric operations. [3]_ | ``s * n`` or | equivalent to adding *s* to | (2)(7) | | ``n * s`` | itself *n* times | | +--------------------------+--------------------------------+----------+ -| ``s[i]`` | *i*\ th item of *s*, origin 0 | \(3) | +| ``s[i]`` | *i*\ th item of *s*, origin 0 | (3)(9) | +--------------------------+--------------------------------+----------+ | ``s[i:j]`` | slice of *s* from *i* to *j* | (3)(4) | +--------------------------+--------------------------------+----------+ @@ -1150,6 +1150,9 @@ Notes: without copying any data and with the returned index being relative to the start of the sequence rather than the start of the slice. +(9) + An :exc:`IndexError` is raised if *i* is outside the sequence range. + .. _typesseq-immutable: From aee45fd03f23c29aac57ecf747f9f1ee52235e72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Jun 2025 14:34:57 +0200 Subject: [PATCH 510/989] gh-134531: refactor `_hashlib` logic for handling NIDs and EVP_MDs (#135254) --- Modules/_hashopenssl.c | 168 ++++++++++++++++++++++++++--------------- 1 file changed, 106 insertions(+), 62 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 42821ebe9f6a54..50cf3c57491049 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -368,41 +368,83 @@ notify_ssl_error_occurred(void) } /* LCOV_EXCL_STOP */ -static const char * -get_openssl_evp_md_utf8name(const EVP_MD *md) -{ - assert(md != NULL); - int nid = EVP_MD_nid(md); - const char *name = NULL; - const py_hashentry_t *h; +/* + * OpenSSL provides a way to go from NIDs to digest names for hash functions + * but lacks this granularity for MAC objects where it is not possible to get + * the underlying digest name (only the block size and digest size are allowed + * to be recovered). + * + * In addition, OpenSSL aliases pollute the list of known digest names + * as OpenSSL appears to have its own definition of alias. In particular, + * the resulting list still contains duplicate and alternate names for several + * algorithms. + * + * Therefore, digest names, whether they are used by hash functions or HMAC, + * are handled through EVP_MD objects or directly by using some NID. + */ - for (h = py_hashes; h->py_name != NULL; h++) { +/* Get a cached entry by OpenSSL NID. */ +static const py_hashentry_t * +get_hashentry_by_nid(int nid) +{ + for (const py_hashentry_t *h = py_hashes; h->py_name != NULL; h++) { if (h->ossl_nid == nid) { - name = h->py_name; - break; + return h; } } + return NULL; +} + +/* + * Convert the NID to a string via OBJ_nid2*() functions. + * + * If 'nid' cannot be resolved, set an exception and return NULL. + */ +static const char * +get_asn1_utf8name_by_nid(int nid) +{ + const char *name = OBJ_nid2ln(nid); if (name == NULL) { - /* Ignore aliased names and only use long, lowercase name. The aliases - * pollute the list and OpenSSL appears to have its own definition of - * alias as the resulting list still contains duplicate and alternate - * names for several algorithms. - */ - name = OBJ_nid2ln(nid); - if (name == NULL) - name = OBJ_nid2sn(nid); + // In OpenSSL 3.0 and later, OBJ_nid*() are thread-safe and may raise. + assert(ERR_peek_last_error() != 0); + if (ERR_GET_REASON(ERR_peek_last_error()) != OBJ_R_UNKNOWN_NID) { + notify_ssl_error_occurred(); + return NULL; + } + // fallback to short name and unconditionally propagate errors + name = OBJ_nid2sn(nid); + if (name == NULL) { + raise_ssl_error(PyExc_ValueError, "cannot resolve NID %d", nid); + } } return name; } -static PyObject * -get_openssl_evp_md_name(const EVP_MD *md) +/* + * Convert the NID to an OpenSSL digest name. + * + * On error, set an exception and return NULL. + */ +static const char * +get_hashlib_utf8name_by_nid(int nid) +{ + const py_hashentry_t *e = get_hashentry_by_nid(nid); + return e ? e->py_name : get_asn1_utf8name_by_nid(nid); +} + +/* Same as get_hashlib_utf8name_by_nid() but using an EVP_MD object. */ +static const char * +get_hashlib_utf8name_by_evp_md(const EVP_MD *md) { - const char *name = get_openssl_evp_md_utf8name(md); - return PyUnicode_FromString(name); + assert(md != NULL); + return get_hashlib_utf8name_by_nid(EVP_MD_nid(md)); } -/* Get EVP_MD by HID and purpose */ +/* + * Get a new reference to an EVP_MD object described by name and purpose. + * + * If 'name' is an OpenSSL indexed name, the return value is cached. + */ static PY_EVP_MD * get_openssl_evp_md_by_utf8name(PyObject *module, const char *name, Py_hash_type py_ht) @@ -471,42 +513,46 @@ get_openssl_evp_md_by_utf8name(PyObject *module, const char *name, return digest; } -/* Get digest EVP_MD from object +/* + * Raise an exception indicating that 'digestmod' is not supported. + */ +static void +raise_unsupported_digestmod_error(PyObject *module, PyObject *digestmod) +{ + _hashlibstate *state = get_hashlib_state(module); + PyErr_Format(state->unsupported_digestmod_error, + "Unsupported digestmod %R", digestmod); +} + +/* + * Get a new reference to an EVP_MD described by 'digestmod' and purpose. + * + * On error, set an exception and return NULL. * - * * string - * * _hashopenssl builtin function + * Parameters * - * on error returns NULL with exception set. + * digestmod A digest name or a _hashopenssl builtin function + * py_ht The message digest purpose. */ static PY_EVP_MD * -get_openssl_evp_md(PyObject *module, PyObject *digestmod, - Py_hash_type py_ht) +get_openssl_evp_md(PyObject *module, PyObject *digestmod, Py_hash_type py_ht) { - PyObject *name_obj = NULL; const char *name; - if (PyUnicode_Check(digestmod)) { - name_obj = digestmod; - } else { - _hashlibstate *state = get_hashlib_state(module); - // borrowed ref - name_obj = PyDict_GetItemWithError(state->constructs, digestmod); + name = PyUnicode_AsUTF8(digestmod); } - if (name_obj == NULL) { - if (!PyErr_Occurred()) { - _hashlibstate *state = get_hashlib_state(module); - PyErr_Format( - state->unsupported_digestmod_error, - "Unsupported digestmod %R", digestmod); - } - return NULL; + else { + PyObject *dict = get_hashlib_state(module)->constructs; + assert(dict != NULL); + PyObject *borrowed_ref = PyDict_GetItemWithError(dict, digestmod); + name = borrowed_ref == NULL ? NULL : PyUnicode_AsUTF8(borrowed_ref); } - - name = PyUnicode_AsUTF8(name_obj); if (name == NULL) { + if (!PyErr_Occurred()) { + raise_unsupported_digestmod_error(module, digestmod); + } return NULL; } - return get_openssl_evp_md_by_utf8name(module, name, py_ht); } @@ -745,7 +791,9 @@ _hashlib_HASH_get_name(PyObject *op, void *Py_UNUSED(closure)) notify_ssl_error_occurred(); return NULL; } - return get_openssl_evp_md_name(md); + const char *name = get_hashlib_utf8name_by_evp_md(md); + assert(name != NULL || PyErr_Occurred()); + return name == NULL ? NULL : PyUnicode_FromString(name); } static PyGetSetDef HASH_getsets[] = { @@ -1775,20 +1823,15 @@ _hmac_dealloc(PyObject *op) static PyObject * _hmac_repr(PyObject *op) { + const char *digest_name; HMACobject *self = HMACobject_CAST(op); const EVP_MD *md = _hashlib_hmac_get_md(self); - if (md == NULL) { - return NULL; - } - PyObject *digest_name = get_openssl_evp_md_name(md); + digest_name = md == NULL ? NULL : get_hashlib_utf8name_by_evp_md(md); if (digest_name == NULL) { + assert(PyErr_Occurred()); return NULL; } - PyObject *repr = PyUnicode_FromFormat( - "<%U HMAC object @ %p>", digest_name, self - ); - Py_DECREF(digest_name); - return repr; + return PyUnicode_FromFormat("<%s HMAC object @ %p>", digest_name, self); } /*[clinic input] @@ -1900,13 +1943,12 @@ _hashlib_hmac_get_name(PyObject *op, void *Py_UNUSED(closure)) if (md == NULL) { return NULL; } - PyObject *digest_name = get_openssl_evp_md_name(md); + const char *digest_name = get_hashlib_utf8name_by_evp_md(md); if (digest_name == NULL) { + assert(PyErr_Occurred()); return NULL; } - PyObject *name = PyUnicode_FromFormat("hmac-%U", digest_name); - Py_DECREF(digest_name); - return name; + return PyUnicode_FromFormat("hmac-%s", digest_name); } static PyMethodDef HMAC_methods[] = { @@ -1982,7 +2024,9 @@ _openssl_hash_name_mapper(const EVP_MD *md, const char *from, return; } - py_name = get_openssl_evp_md_name(md); + const char *name = get_hashlib_utf8name_by_evp_md(md); + assert(name != NULL || PyErr_Occurred()); + py_name = name == NULL ? NULL : PyUnicode_FromString(name); if (py_name == NULL) { state->error = 1; } else { From 254bdac71121ee8028967388104a3f083250211e Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sun, 8 Jun 2025 17:33:19 +0100 Subject: [PATCH 511/989] Update tutorial for new "Copy" button (#135007) Co-authored-by: Sergey B Kirpichev --- Doc/tutorial/introduction.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Doc/tutorial/introduction.rst b/Doc/tutorial/introduction.rst index cdb35da7bc95ba..9e06e03991bc96 100644 --- a/Doc/tutorial/introduction.rst +++ b/Doc/tutorial/introduction.rst @@ -13,10 +13,9 @@ end a multi-line command. .. only:: html - You can toggle the display of prompts and output by clicking on ``>>>`` - in the upper-right corner of an example box. If you hide the prompts - and output for an example, then you can easily copy and paste the input - lines into your interpreter. + You can use the "Copy" button (it appears in the upper-right corner + when hovering over or tapping a code example), which strips prompts + and omits output, to copy and paste the input lines into your interpreter. .. index:: single: # (hash); comment From aac22ea212849f8fffee9e05af7429c503d973ee Mon Sep 17 00:00:00 2001 From: Alper Date: Sun, 8 Jun 2025 09:55:12 -0700 Subject: [PATCH 512/989] Add compile_commands.json to .gitignore (#135111) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index cdb0352e0a8836..7aa6272cf8e382 100644 --- a/.gitignore +++ b/.gitignore @@ -131,6 +131,7 @@ Tools/unicode/data/ /autom4te.cache /build/ /builddir/ +/compile_commands.json /config.cache /config.log /config.status From 8d6eb0c26276c4013346622580072908d46d2341 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 8 Jun 2025 15:20:20 -0400 Subject: [PATCH 513/989] gh-135276: Refresh `zipfile.Path` from zipp 3.23 (#135277) Apply changes from zipp 3.23 --- Lib/test/test_zipfile/_path/_test_params.py | 2 +- .../test_zipfile/_path/test_complexity.py | 2 - Lib/test/test_zipfile/_path/test_path.py | 22 ++++++++--- Lib/test/test_zipfile/_path/write-alpharep.py | 1 - Lib/zipfile/_path/__init__.py | 38 ++++++++++--------- Lib/zipfile/_path/_functools.py | 20 ++++++++++ Lib/zipfile/_path/glob.py | 1 - ...-06-08-14-50-34.gh-issue-135276.ZLUhV1.rst | 6 +++ 8 files changed, 64 insertions(+), 28 deletions(-) create mode 100644 Lib/zipfile/_path/_functools.py create mode 100644 Misc/NEWS.d/next/Library/2025-06-08-14-50-34.gh-issue-135276.ZLUhV1.rst diff --git a/Lib/test/test_zipfile/_path/_test_params.py b/Lib/test/test_zipfile/_path/_test_params.py index bc95b4ebf4a168..00a9eaf2f99c1a 100644 --- a/Lib/test/test_zipfile/_path/_test_params.py +++ b/Lib/test/test_zipfile/_path/_test_params.py @@ -1,5 +1,5 @@ -import types import functools +import types from ._itertools import always_iterable diff --git a/Lib/test/test_zipfile/_path/test_complexity.py b/Lib/test/test_zipfile/_path/test_complexity.py index b505dd7c376462..7c108fc6ab8191 100644 --- a/Lib/test/test_zipfile/_path/test_complexity.py +++ b/Lib/test/test_zipfile/_path/test_complexity.py @@ -8,10 +8,8 @@ from ._functools import compose from ._itertools import consume - from ._support import import_or_skip - big_o = import_or_skip('big_o') pytest = import_or_skip('pytest') diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py index 0afabc0c6683c4..696134023a56b9 100644 --- a/Lib/test/test_zipfile/_path/test_path.py +++ b/Lib/test/test_zipfile/_path/test_path.py @@ -1,6 +1,6 @@ +import contextlib import io import itertools -import contextlib import pathlib import pickle import stat @@ -9,12 +9,11 @@ import zipfile import zipfile._path -from test.support.os_helper import temp_dir, FakePath +from test.support.os_helper import FakePath, temp_dir from ._functools import compose from ._itertools import Counter - -from ._test_params import parameterize, Invoked +from ._test_params import Invoked, parameterize class jaraco: @@ -193,10 +192,10 @@ def test_encoding_warnings(self, alpharep): """EncodingWarning must blame the read_text and open calls.""" assert sys.flags.warn_default_encoding root = zipfile.Path(alpharep) - with self.assertWarns(EncodingWarning) as wc: + with self.assertWarns(EncodingWarning) as wc: # noqa: F821 (astral-sh/ruff#13296) root.joinpath("a.txt").read_text() assert __file__ == wc.filename - with self.assertWarns(EncodingWarning) as wc: + with self.assertWarns(EncodingWarning) as wc: # noqa: F821 (astral-sh/ruff#13296) root.joinpath("a.txt").open("r").close() assert __file__ == wc.filename @@ -364,6 +363,17 @@ def test_root_name(self, alpharep): root = zipfile.Path(alpharep) assert root.name == 'alpharep.zip' == root.filename.name + @pass_alpharep + def test_root_on_disk(self, alpharep): + """ + The name/stem of the root should match the zipfile on disk. + + This condition must hold across platforms. + """ + root = zipfile.Path(self.zipfile_ondisk(alpharep)) + assert root.name == 'alpharep.zip' == root.filename.name + assert root.stem == 'alpharep' == root.filename.stem + @pass_alpharep def test_suffix(self, alpharep): """ diff --git a/Lib/test/test_zipfile/_path/write-alpharep.py b/Lib/test/test_zipfile/_path/write-alpharep.py index 48c09b537179fd..7418391abadde5 100644 --- a/Lib/test/test_zipfile/_path/write-alpharep.py +++ b/Lib/test/test_zipfile/_path/write-alpharep.py @@ -1,4 +1,3 @@ from . import test_path - __name__ == '__main__' and test_path.build_alpharep_fixture().extractall('alpharep') diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py index 5ae16ec970dda4..faae4c84cae5ed 100644 --- a/Lib/zipfile/_path/__init__.py +++ b/Lib/zipfile/_path/__init__.py @@ -7,19 +7,19 @@ for more detail. """ +import functools import io -import posixpath -import zipfile import itertools -import contextlib import pathlib +import posixpath import re import stat import sys +import zipfile +from ._functools import save_method_args from .glob import Translator - __all__ = ['Path'] @@ -86,13 +86,12 @@ class InitializedState: Mix-in to save the initialization state for pickling. """ + @save_method_args def __init__(self, *args, **kwargs): - self.__args = args - self.__kwargs = kwargs super().__init__(*args, **kwargs) def __getstate__(self): - return self.__args, self.__kwargs + return self._saved___init__.args, self._saved___init__.kwargs def __setstate__(self, state): args, kwargs = state @@ -181,22 +180,27 @@ class FastLookup(CompleteDirs): """ def namelist(self): - with contextlib.suppress(AttributeError): - return self.__names - self.__names = super().namelist() - return self.__names + return self._namelist + + @functools.cached_property + def _namelist(self): + return super().namelist() def _name_set(self): - with contextlib.suppress(AttributeError): - return self.__lookup - self.__lookup = super()._name_set() - return self.__lookup + return self._name_set_prop + + @functools.cached_property + def _name_set_prop(self): + return super()._name_set() def _extract_text_encoding(encoding=None, *args, **kwargs): # compute stack level so that the caller of the caller sees any warning. is_pypy = sys.implementation.name == 'pypy' - stack_level = 3 + is_pypy + # PyPy no longer special cased after 7.3.19 (or maybe 7.3.18) + # See jaraco/zipp#143 + is_old_pypi = is_pypy and sys.pypy_version_info < (7, 3, 19) + stack_level = 3 + is_old_pypi return io.text_encoding(encoding, stack_level), args, kwargs @@ -351,7 +355,7 @@ def open(self, mode='r', *args, pwd=None, **kwargs): return io.TextIOWrapper(stream, encoding, *args, **kwargs) def _base(self): - return pathlib.PurePosixPath(self.at or self.root.filename) + return pathlib.PurePosixPath(self.at) if self.at else self.filename @property def name(self): diff --git a/Lib/zipfile/_path/_functools.py b/Lib/zipfile/_path/_functools.py new file mode 100644 index 00000000000000..7390be21873e4b --- /dev/null +++ b/Lib/zipfile/_path/_functools.py @@ -0,0 +1,20 @@ +import collections +import functools + + +# from jaraco.functools 4.0.2 +def save_method_args(method): + """ + Wrap a method such that when it is called, the args and kwargs are + saved on the method. + """ + args_and_kwargs = collections.namedtuple('args_and_kwargs', 'args kwargs') # noqa: PYI024 + + @functools.wraps(method) + def wrapper(self, /, *args, **kwargs): + attr_name = '_saved_' + method.__name__ + attr = args_and_kwargs(args, kwargs) + setattr(self, attr_name, attr) + return method(self, *args, **kwargs) + + return wrapper diff --git a/Lib/zipfile/_path/glob.py b/Lib/zipfile/_path/glob.py index d7fe45a494717a..bd2839304b7db2 100644 --- a/Lib/zipfile/_path/glob.py +++ b/Lib/zipfile/_path/glob.py @@ -1,7 +1,6 @@ import os import re - _default_seps = os.sep + str(os.altsep) * bool(os.altsep) diff --git a/Misc/NEWS.d/next/Library/2025-06-08-14-50-34.gh-issue-135276.ZLUhV1.rst b/Misc/NEWS.d/next/Library/2025-06-08-14-50-34.gh-issue-135276.ZLUhV1.rst new file mode 100644 index 00000000000000..a8fbd48d08aa37 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-08-14-50-34.gh-issue-135276.ZLUhV1.rst @@ -0,0 +1,6 @@ +Synchronized zipfile.Path with zipp 3.23, including improved performance of +:meth:`zipfile.Path.open` for non-reading modes, rely on +:func:`functools.cached_property` to cache values on the instance. Rely on +``save_method_args`` to save the initialization method arguments. Fixed +``.name``, ``.stem`` and other basename-based properties on Windows when +working with a zipfile on disk. From 8d17a412da7e7d8412efc625d48dcb5eecea50b0 Mon Sep 17 00:00:00 2001 From: Vladyslav Lazoryk <80263725+lazorikv@users.noreply.github.com> Date: Sun, 8 Jun 2025 22:59:26 +0300 Subject: [PATCH 514/989] gh-135263: Fix typo in token.NAME documentation (#135275) --- Doc/library/token.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/token.rst b/Doc/library/token.rst index 1f92b5df4302bf..c228006d4c1e1d 100644 --- a/Doc/library/token.rst +++ b/Doc/library/token.rst @@ -51,7 +51,7 @@ The token constants are: .. data:: NAME Token value that indicates an :ref:`identifier `. - Note that keywords are also initially tokenized an ``NAME`` tokens. + Note that keywords are also initially tokenized as ``NAME`` tokens. .. data:: NUMBER From 2677dd017a033eaaad3b8e1e0eb5664a44e7e231 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Mon, 9 Jun 2025 00:28:37 +0200 Subject: [PATCH 515/989] Use f-strings in csv docs example (#135245) --- Doc/library/csv.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 5297be17bd708e..2e513bff651cd2 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -609,7 +609,7 @@ A slightly more advanced use of the reader --- catching and reporting errors:: for row in reader: print(row) except csv.Error as e: - sys.exit('file {}, line {}: {}'.format(filename, reader.line_num, e)) + sys.exit(f'file {filename}, line {reader.line_num}: {e}') And while the module doesn't directly support parsing strings, it can easily be done:: From 83b94e856e9b55e3ad00e272ccd5e80eafa94443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 9 Jun 2025 09:13:29 +0200 Subject: [PATCH 516/989] gh-135234: improve `_hashlib` exceptions when reporting an OpenSSL function failure (#135250) - Refactor `get_openssl_evp_md_by_utf8name` error branches. - Refactor `HASH.{digest,hexdigest}` computations. - Refactor `_hashlib_HASH_copy_locked` and `locked_HMAC_CTX_copy`. --- ...-06-08-11-11-07.gh-issue-135234.wJCdh0.rst | 3 + Modules/_hashopenssl.c | 318 +++++++++++------- 2 files changed, 199 insertions(+), 122 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-08-11-11-07.gh-issue-135234.wJCdh0.rst diff --git a/Misc/NEWS.d/next/Library/2025-06-08-11-11-07.gh-issue-135234.wJCdh0.rst b/Misc/NEWS.d/next/Library/2025-06-08-11-11-07.gh-issue-135234.wJCdh0.rst new file mode 100644 index 00000000000000..e1c11e46735f0b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-08-11-11-07.gh-issue-135234.wJCdh0.rst @@ -0,0 +1,3 @@ +:mod:`hashlib`: improve exception messages when an OpenSSL function failed. +When memory allocation fails on OpenSSL's side, a :exc:`MemoryError` is +raised instead of a :exc:`ValueError`. Patch by Bénédikt Tran. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 50cf3c57491049..ce9603d5db841f 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -311,8 +311,9 @@ class _hashlib.HMAC "HMACobject *" "((_hashlibstate *)PyModule_GetState(module)) /* Set an exception of given type using the given OpenSSL error code. */ static void -set_ssl_exception_from_errcode(PyObject *exc, unsigned long errcode) +set_ssl_exception_from_errcode(PyObject *exc_type, unsigned long errcode) { + assert(exc_type != NULL); assert(errcode != 0); /* ERR_ERROR_STRING(3) ensures that the messages below are ASCII */ @@ -321,13 +322,29 @@ set_ssl_exception_from_errcode(PyObject *exc, unsigned long errcode) const char *reason = ERR_reason_error_string(errcode); if (lib && func) { - PyErr_Format(exc, "[%s: %s] %s", lib, func, reason); + PyErr_Format(exc_type, "[%s: %s] %s", lib, func, reason); } else if (lib) { - PyErr_Format(exc, "[%s] %s", lib, reason); + PyErr_Format(exc_type, "[%s] %s", lib, reason); } else { - PyErr_SetString(exc, reason); + PyErr_SetString(exc_type, reason); + } +} + +/* + * Get an appropriate exception type for the given OpenSSL error code. + * + * The exception type depends on the error code reason. + */ +static PyObject * +get_smart_ssl_exception_type(unsigned long errcode, PyObject *default_exc_type) +{ + switch (ERR_GET_REASON(errcode)) { + case ERR_R_MALLOC_FAILURE: + return PyExc_MemoryError; + default: + return default_exc_type; } } @@ -335,36 +352,83 @@ set_ssl_exception_from_errcode(PyObject *exc, unsigned long errcode) * Set an exception of given type. * * By default, the exception's message is constructed by using the last SSL - * error that occurred. If no error occurred, the 'fallback_format' is used - * to create a C-style formatted fallback message. + * error that occurred. If no error occurred, the 'fallback_message' is used + * to create an exception message. */ static void -raise_ssl_error(PyObject *exc, const char *fallback_format, ...) +raise_ssl_error(PyObject *exc_type, const char *fallback_message) +{ + assert(fallback_message != NULL); + unsigned long errcode = ERR_peek_last_error(); + if (errcode) { + ERR_clear_error(); + set_ssl_exception_from_errcode(exc_type, errcode); + } + else { + PyErr_SetString(exc_type, fallback_message); + } +} + +/* Same as raise_ssl_error() but with a C-style formatted message. */ +static void +raise_ssl_error_f(PyObject *exc_type, const char *fallback_format, ...) { assert(fallback_format != NULL); unsigned long errcode = ERR_peek_last_error(); if (errcode) { ERR_clear_error(); - set_ssl_exception_from_errcode(exc, errcode); + set_ssl_exception_from_errcode(exc_type, errcode); } else { va_list vargs; va_start(vargs, fallback_format); - PyErr_FormatV(exc, fallback_format, vargs); + PyErr_FormatV(exc_type, fallback_format, vargs); + va_end(vargs); + } +} + +/* Same as raise_ssl_error_f() with smart exception types. */ +static void +raise_smart_ssl_error_f(PyObject *exc_type, const char *fallback_format, ...) +{ + unsigned long errcode = ERR_peek_last_error(); + if (errcode) { + ERR_clear_error(); + exc_type = get_smart_ssl_exception_type(errcode, exc_type); + set_ssl_exception_from_errcode(exc_type, errcode); + } + else { + va_list vargs; + va_start(vargs, fallback_format); + PyErr_FormatV(exc_type, fallback_format, vargs); va_end(vargs); } } /* - * Set an exception with a generic default message after an error occurred. - * - * It can also be used without previous calls to SSL built-in functions, - * in which case a generic error message is provided. + * Raise a ValueError with a default message after an error occurred. + * It can also be used without previous calls to SSL built-in functions. */ static inline void -notify_ssl_error_occurred(void) +notify_ssl_error_occurred(const char *message) +{ + raise_ssl_error(PyExc_ValueError, message); +} + +/* Same as notify_ssl_error_occurred() for failed OpenSSL functions. */ +static inline void +notify_ssl_error_occurred_in(const char *funcname) { - raise_ssl_error(PyExc_ValueError, "no reason supplied"); + raise_ssl_error_f(PyExc_ValueError, + "error in OpenSSL function %s()", funcname); +} + +/* Same as notify_ssl_error_occurred_in() with smart exception types. */ +static inline void +notify_smart_ssl_error_occurred_in(const char *funcname) +{ + raise_smart_ssl_error_f(PyExc_ValueError, + "error in OpenSSL function %s()", funcname); } /* LCOV_EXCL_STOP */ @@ -408,16 +472,19 @@ get_asn1_utf8name_by_nid(int nid) // In OpenSSL 3.0 and later, OBJ_nid*() are thread-safe and may raise. assert(ERR_peek_last_error() != 0); if (ERR_GET_REASON(ERR_peek_last_error()) != OBJ_R_UNKNOWN_NID) { - notify_ssl_error_occurred(); - return NULL; + goto error; } // fallback to short name and unconditionally propagate errors name = OBJ_nid2sn(nid); if (name == NULL) { - raise_ssl_error(PyExc_ValueError, "cannot resolve NID %d", nid); + goto error; } } return name; + +error: + raise_ssl_error_f(PyExc_ValueError, "cannot resolve NID %d", nid); + return NULL; } /* @@ -449,8 +516,7 @@ static PY_EVP_MD * get_openssl_evp_md_by_utf8name(PyObject *module, const char *name, Py_hash_type py_ht) { - PY_EVP_MD *digest = NULL; - PY_EVP_MD *other_digest = NULL; + PY_EVP_MD *digest = NULL, *other_digest = NULL; _hashlibstate *state = get_hashlib_state(module); py_hashentry_t *entry = (py_hashentry_t *)_Py_hashtable_get( state->hashtable, (const void*)name @@ -484,15 +550,16 @@ get_openssl_evp_md_by_utf8name(PyObject *module, const char *name, #endif } break; + default: + goto invalid_hash_type; } // if another thread same thing at same time make sure we got same ptr assert(other_digest == NULL || other_digest == digest); - if (digest != NULL) { - if (other_digest == NULL) { - PY_EVP_MD_up_ref(digest); - } + if (digest != NULL && other_digest == NULL) { + PY_EVP_MD_up_ref(digest); } - } else { + } + else { // Fall back for looking up an unindexed OpenSSL specific name. switch (py_ht) { case Py_ht_evp: @@ -503,14 +570,21 @@ get_openssl_evp_md_by_utf8name(PyObject *module, const char *name, case Py_ht_evp_nosecurity: digest = PY_EVP_MD_fetch(name, "-fips"); break; + default: + goto invalid_hash_type; } } if (digest == NULL) { - raise_ssl_error(state->unsupported_digestmod_error, - "unsupported hash type %s", name); + raise_ssl_error_f(state->unsupported_digestmod_error, + "unsupported digest name: %s", name); return NULL; } return digest; + +invalid_hash_type: + assert(digest == NULL); + PyErr_Format(PyExc_SystemError, "unsupported hash type %d", py_ht); + return NULL; } /* @@ -556,6 +630,22 @@ get_openssl_evp_md(PyObject *module, PyObject *digestmod, Py_hash_type py_ht) return get_openssl_evp_md_by_utf8name(module, name, py_ht); } +// --- OpenSSL HASH wrappers -------------------------------------------------- + +/* Thin wrapper around EVP_MD_CTX_new() which sets an exception on failure. */ +static EVP_MD_CTX * +py_wrapper_EVP_MD_CTX_new(void) +{ + EVP_MD_CTX *ctx = EVP_MD_CTX_new(); + if (ctx == NULL) { + PyErr_NoMemory(); + return NULL; + } + return ctx; +} + +// --- HASH interface --------------------------------------------------------- + static HASHobject * new_hash_object(PyTypeObject *type) { @@ -565,10 +655,9 @@ new_hash_object(PyTypeObject *type) } HASHLIB_INIT_MUTEX(retval); - retval->ctx = EVP_MD_CTX_new(); + retval->ctx = py_wrapper_EVP_MD_CTX_new(); if (retval->ctx == NULL) { Py_DECREF(retval); - PyErr_NoMemory(); return NULL; } @@ -586,7 +675,7 @@ _hashlib_HASH_hash(HASHobject *self, const void *vp, Py_ssize_t len) else process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int); if (!EVP_DigestUpdate(self->ctx, (const void*)cp, process)) { - notify_ssl_error_occurred(); + notify_ssl_error_occurred_in(Py_STRINGIFY(EVP_DigestUpdate)); return -1; } len -= process; @@ -614,7 +703,11 @@ _hashlib_HASH_copy_locked(HASHobject *self, EVP_MD_CTX *new_ctx_p) ENTER_HASHLIB(self); result = EVP_MD_CTX_copy(new_ctx_p, self->ctx); LEAVE_HASHLIB(self); - return result; + if (result == 0) { + notify_smart_ssl_error_occurred_in(Py_STRINGIFY(EVP_MD_CTX_copy)); + return -1; + } + return 0; } /* External methods for a hash object */ @@ -634,14 +727,36 @@ _hashlib_HASH_copy_impl(HASHobject *self) if ((newobj = new_hash_object(Py_TYPE(self))) == NULL) return NULL; - if (!_hashlib_HASH_copy_locked(self, newobj->ctx)) { + if (_hashlib_HASH_copy_locked(self, newobj->ctx) < 0) { Py_DECREF(newobj); - notify_ssl_error_occurred(); return NULL; } return (PyObject *)newobj; } +static Py_ssize_t +_hashlib_HASH_digest_compute(HASHobject *self, unsigned char *digest) +{ + EVP_MD_CTX *ctx = py_wrapper_EVP_MD_CTX_new(); + if (ctx == NULL) { + return -1; + } + if (_hashlib_HASH_copy_locked(self, ctx) < 0) { + goto error; + } + Py_ssize_t digest_size = EVP_MD_CTX_size(ctx); + if (!EVP_DigestFinal(ctx, digest, NULL)) { + notify_ssl_error_occurred_in(Py_STRINGIFY(EVP_DigestFinal)); + goto error; + } + EVP_MD_CTX_free(ctx); + return digest_size; + +error: + EVP_MD_CTX_free(ctx); + return -1; +} + /*[clinic input] _hashlib.HASH.digest @@ -653,32 +768,8 @@ _hashlib_HASH_digest_impl(HASHobject *self) /*[clinic end generated code: output=3fc6f9671d712850 input=d8d528d6e50af0de]*/ { unsigned char digest[EVP_MAX_MD_SIZE]; - EVP_MD_CTX *temp_ctx; - PyObject *retval; - unsigned int digest_size; - - temp_ctx = EVP_MD_CTX_new(); - if (temp_ctx == NULL) { - PyErr_NoMemory(); - return NULL; - } - - if (!_hashlib_HASH_copy_locked(self, temp_ctx)) { - goto error; - } - digest_size = EVP_MD_CTX_size(temp_ctx); - if (!EVP_DigestFinal(temp_ctx, digest, NULL)) { - goto error; - } - - retval = PyBytes_FromStringAndSize((const char *)digest, digest_size); - EVP_MD_CTX_free(temp_ctx); - return retval; - -error: - EVP_MD_CTX_free(temp_ctx); - notify_ssl_error_occurred(); - return NULL; + Py_ssize_t n = _hashlib_HASH_digest_compute(self, digest); + return n < 0 ? NULL : PyBytes_FromStringAndSize((const char *)digest, n); } /*[clinic input] @@ -692,32 +783,8 @@ _hashlib_HASH_hexdigest_impl(HASHobject *self) /*[clinic end generated code: output=1b8e60d9711e7f4d input=ae7553f78f8372d8]*/ { unsigned char digest[EVP_MAX_MD_SIZE]; - EVP_MD_CTX *temp_ctx; - unsigned int digest_size; - - temp_ctx = EVP_MD_CTX_new(); - if (temp_ctx == NULL) { - PyErr_NoMemory(); - return NULL; - } - - /* Get the raw (binary) digest value */ - if (!_hashlib_HASH_copy_locked(self, temp_ctx)) { - goto error; - } - digest_size = EVP_MD_CTX_size(temp_ctx); - if (!EVP_DigestFinal(temp_ctx, digest, NULL)) { - goto error; - } - - EVP_MD_CTX_free(temp_ctx); - - return _Py_strhex((const char *)digest, (Py_ssize_t)digest_size); - -error: - EVP_MD_CTX_free(temp_ctx); - notify_ssl_error_occurred(); - return NULL; + Py_ssize_t n = _hashlib_HASH_digest_compute(self, digest); + return n < 0 ? NULL : _Py_strhex((const char *)digest, n); } /*[clinic input] @@ -788,7 +855,7 @@ _hashlib_HASH_get_name(PyObject *op, void *Py_UNUSED(closure)) HASHobject *self = HASHobject_CAST(op); const EVP_MD *md = EVP_MD_CTX_md(self->ctx); if (md == NULL) { - notify_ssl_error_occurred(); + notify_ssl_error_occurred("missing EVP_MD for HASH context"); return NULL; } const char *name = get_hashlib_utf8name_by_evp_md(md); @@ -877,20 +944,20 @@ _hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length) return NULL; } - temp_ctx = EVP_MD_CTX_new(); + temp_ctx = py_wrapper_EVP_MD_CTX_new(); if (temp_ctx == NULL) { Py_DECREF(retval); - PyErr_NoMemory(); return NULL; } - if (!_hashlib_HASH_copy_locked(self, temp_ctx)) { + if (_hashlib_HASH_copy_locked(self, temp_ctx) < 0) { goto error; } if (!EVP_DigestFinalXOF(temp_ctx, (unsigned char*)PyBytes_AS_STRING(retval), length)) { + notify_ssl_error_occurred_in(Py_STRINGIFY(EVP_DigestFinalXOF)); goto error; } @@ -900,7 +967,6 @@ _hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length) error: Py_DECREF(retval); EVP_MD_CTX_free(temp_ctx); - notify_ssl_error_occurred(); return NULL; } @@ -926,18 +992,18 @@ _hashlib_HASHXOF_hexdigest_impl(HASHobject *self, Py_ssize_t length) return NULL; } - temp_ctx = EVP_MD_CTX_new(); + temp_ctx = py_wrapper_EVP_MD_CTX_new(); if (temp_ctx == NULL) { PyMem_Free(digest); - PyErr_NoMemory(); return NULL; } /* Get the raw (binary) digest value */ - if (!_hashlib_HASH_copy_locked(self, temp_ctx)) { + if (_hashlib_HASH_copy_locked(self, temp_ctx) < 0) { goto error; } if (!EVP_DigestFinalXOF(temp_ctx, digest, length)) { + notify_ssl_error_occurred_in(Py_STRINGIFY(EVP_DigestFinalXOF)); goto error; } @@ -950,7 +1016,6 @@ _hashlib_HASHXOF_hexdigest_impl(HASHobject *self, Py_ssize_t length) error: PyMem_Free(digest); EVP_MD_CTX_free(temp_ctx); - notify_ssl_error_occurred(); return NULL; } @@ -1054,7 +1119,7 @@ _hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, int result = EVP_DigestInit_ex(self->ctx, digest, NULL); if (!result) { - notify_ssl_error_occurred(); + notify_ssl_error_occurred_in(Py_STRINGIFY(EVP_DigestInit_ex)); Py_CLEAR(self); goto exit; } @@ -1463,7 +1528,7 @@ pbkdf2_hmac_impl(PyObject *module, const char *hash_name, if (!retval) { Py_CLEAR(key_obj); - notify_ssl_error_occurred(); + notify_ssl_error_occurred_in(Py_STRINGIFY(PKCS5_PBKDF2_HMAC)); goto end; } @@ -1539,8 +1604,8 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, /* let OpenSSL validate the rest */ retval = EVP_PBE_scrypt(NULL, 0, NULL, 0, n, r, p, maxmem, NULL, 0); if (!retval) { - raise_ssl_error(PyExc_ValueError, - "Invalid parameter combination for n, r, p, maxmem."); + notify_ssl_error_occurred( + "Invalid parameter combination for n, r, p, maxmem."); return NULL; } @@ -1561,7 +1626,7 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, if (!retval) { Py_CLEAR(key_obj); - notify_ssl_error_occurred(); + notify_ssl_error_occurred_in(Py_STRINGIFY(EVP_PBE_scrypt)); return NULL; } return key_obj; @@ -1618,7 +1683,7 @@ _hashlib_hmac_singleshot_impl(PyObject *module, Py_buffer *key, PY_EVP_MD_free(evp); if (result == NULL) { - notify_ssl_error_occurred(); + notify_ssl_error_occurred_in(Py_STRINGIFY(HMAC)); return NULL; } return PyBytes_FromStringAndSize((const char*)md, md_len); @@ -1627,6 +1692,18 @@ _hashlib_hmac_singleshot_impl(PyObject *module, Py_buffer *key, /* OpenSSL-based HMAC implementation */ +/* Thin wrapper around HMAC_CTX_new() which sets an exception on failure. */ +static HMAC_CTX * +py_openssl_wrapper_HMAC_CTX_new(void) +{ + HMAC_CTX *ctx = HMAC_CTX_new(); + if (ctx == NULL) { + PyErr_NoMemory(); + return NULL; + } + return ctx; +} + static int _hmac_update(HMACobject*, PyObject*); static const EVP_MD * @@ -1634,7 +1711,7 @@ _hashlib_hmac_get_md(HMACobject *self) { const EVP_MD *md = HMAC_CTX_get_md(self->ctx); if (md == NULL) { - raise_ssl_error(PyExc_ValueError, "missing EVP_MD for HMAC context"); + notify_ssl_error_occurred("missing EVP_MD for HMAC context"); } return md; } @@ -1676,17 +1753,16 @@ _hashlib_hmac_new_impl(PyObject *module, Py_buffer *key, PyObject *msg_obj, return NULL; } - ctx = HMAC_CTX_new(); + ctx = py_openssl_wrapper_HMAC_CTX_new(); if (ctx == NULL) { PY_EVP_MD_free(digest); - PyErr_NoMemory(); goto error; } r = HMAC_Init_ex(ctx, key->buf, (int)key->len, digest, NULL /* impl */); PY_EVP_MD_free(digest); if (r == 0) { - notify_ssl_error_occurred(); + notify_ssl_error_occurred_in(Py_STRINGIFY(HMAC_Init_ex)); goto error; } @@ -1721,7 +1797,11 @@ locked_HMAC_CTX_copy(HMAC_CTX *new_ctx_p, HMACobject *self) ENTER_HASHLIB(self); result = HMAC_CTX_copy(new_ctx_p, self->ctx); LEAVE_HASHLIB(self); - return result; + if (result == 0) { + notify_smart_ssl_error_occurred_in(Py_STRINGIFY(HMAC_CTX_copy)); + return -1; + } + return 0; } /* returning 0 means that an error occurred and an exception is set */ @@ -1735,7 +1815,7 @@ _hashlib_hmac_digest_size(HMACobject *self) unsigned int digest_size = EVP_MD_size(md); assert(digest_size <= EVP_MAX_MD_SIZE); if (digest_size == 0) { - raise_ssl_error(PyExc_ValueError, "invalid digest size"); + notify_ssl_error_occurred("invalid digest size"); } return digest_size; } @@ -1768,7 +1848,7 @@ _hmac_update(HMACobject *self, PyObject *obj) PyBuffer_Release(&view); if (r == 0) { - notify_ssl_error_occurred(); + notify_ssl_error_occurred_in(Py_STRINGIFY(HMAC_Update)); return 0; } return 1; @@ -1786,13 +1866,12 @@ _hashlib_HMAC_copy_impl(HMACobject *self) { HMACobject *retval; - HMAC_CTX *ctx = HMAC_CTX_new(); + HMAC_CTX *ctx = py_openssl_wrapper_HMAC_CTX_new(); if (ctx == NULL) { - return PyErr_NoMemory(); + return NULL; } - if (!locked_HMAC_CTX_copy(ctx, self)) { + if (locked_HMAC_CTX_copy(ctx, self) < 0) { HMAC_CTX_free(ctx); - notify_ssl_error_occurred(); return NULL; } @@ -1854,20 +1933,18 @@ _hashlib_HMAC_update_impl(HMACobject *self, PyObject *msg) static int _hmac_digest(HMACobject *self, unsigned char *buf, unsigned int len) { - HMAC_CTX *temp_ctx = HMAC_CTX_new(); + HMAC_CTX *temp_ctx = py_openssl_wrapper_HMAC_CTX_new(); if (temp_ctx == NULL) { - (void)PyErr_NoMemory(); return 0; } - if (!locked_HMAC_CTX_copy(temp_ctx, self)) { + if (locked_HMAC_CTX_copy(temp_ctx, self) < 0) { HMAC_CTX_free(temp_ctx); - notify_ssl_error_occurred(); return 0; } int r = HMAC_Final(temp_ctx, buf, &len); HMAC_CTX_free(temp_ctx); if (r == 0) { - notify_ssl_error_occurred(); + notify_ssl_error_occurred_in(Py_STRINGIFY(HMAC_Final)); return 0; } return 1; @@ -2088,16 +2165,13 @@ _hashlib_get_fips_mode_impl(PyObject *module) #else ERR_clear_error(); int result = FIPS_mode(); - if (result == 0) { + if (result == 0 && ERR_peek_last_error()) { // "If the library was built without support of the FIPS Object Module, // then the function will return 0 with an error code of // CRYPTO_R_FIPS_MODE_NOT_SUPPORTED (0x0f06d065)." // But 0 is also a valid result value. - unsigned long errcode = ERR_peek_last_error(); - if (errcode) { - notify_ssl_error_occurred(); - return -1; - } + notify_ssl_error_occurred_in(Py_STRINGIFY(FIPS_mode)); + return -1; } return result; #endif From 3cb109796dd4b07625b21773bbc04697c65bdf20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 9 Jun 2025 09:17:43 +0200 Subject: [PATCH 517/989] gh-135004: rewrite and cleanup `blake2module.c` (#135006) * Cleanup imports and update module docstring. * Simplify detection of SIMD support. * Correctly guard `update()` cases. * Rewrite `py_blake2b_or_s_new` and rename it to `py_blake2_new`. * Rewrite `blake2_blake2b_copy_locked` and `py_blake2_clear`. * Refactor computations of `digest` and `hexdigest`. * Simplify `py_blake2b_get_name` and `py_blake2b_get_block_size`. * Add `hacl_get_blake2_info` to extract static BLAKE-2 information. This new helper is used by `py_blake2b_get_digest_size`, but can be later used to expose `key_length` more easily. --- ...-06-01-14-18-48.gh-issue-135004.cq3-fp.rst | 3 + Modules/blake2module.c | 725 +++++++++--------- Modules/hmacmodule.c | 4 +- 3 files changed, 364 insertions(+), 368 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-01-14-18-48.gh-issue-135004.cq3-fp.rst diff --git a/Misc/NEWS.d/next/Library/2025-06-01-14-18-48.gh-issue-135004.cq3-fp.rst b/Misc/NEWS.d/next/Library/2025-06-01-14-18-48.gh-issue-135004.cq3-fp.rst new file mode 100644 index 00000000000000..4c59b0f8e19e6b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-01-14-18-48.gh-issue-135004.cq3-fp.rst @@ -0,0 +1,3 @@ +Rewrite and cleanup the internal :mod:`!_blake2` module. Some exception +messages were changed but their types were left untouched. Patch by Bénédikt +Tran. diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 07aa89f573f05f..2ce8c0cd3d7b6f 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -13,7 +13,6 @@ # define Py_BUILD_CORE_MODULE 1 #endif -#include "pyconfig.h" #include "Python.h" #include "hashlib.h" #include "pycore_strhex.h" // _Py_strhex() @@ -51,96 +50,19 @@ # undef HACL_CAN_COMPILE_SIMD256 #endif -// ECX -#define ECX_SSE3 (1 << 0) -#define ECX_SSSE3 (1 << 9) -#define ECX_SSE4_1 (1 << 19) -#define ECX_SSE4_2 (1 << 20) -#define ECX_AVX (1 << 28) - -// EBX -#define EBX_AVX2 (1 << 5) - -// EDX -#define EDX_SSE (1 << 25) -#define EDX_SSE2 (1 << 26) -#define EDX_CMOV (1 << 15) - -// zero-initialized by default -typedef struct { - bool sse, sse2, sse3, sse41, sse42, cmov, avx, avx2; - bool done; -} cpu_flags; - -void detect_cpu_features(cpu_flags *flags) { - if (!flags->done) { - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; -#if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); -#elif defined(_M_X64) - int info1[4] = { 0 }; - int info7[4] = { 0 }; - __cpuidex(info1, 1, 0); - __cpuidex(info7, 7, 0); - eax1 = info1[0]; - ebx1 = info1[1]; - ecx1 = info1[2]; - edx1 = info1[3]; - eax7 = info7[0]; - ebx7 = info7[1]; - ecx7 = info7[2]; - edx7 = info7[3]; -#endif - (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; - (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; - - - flags->avx = (ecx1 & ECX_AVX) != 0; - - flags->avx2 = (ebx7 & EBX_AVX2) != 0; - - flags->sse = (edx1 & EDX_SSE) != 0; - flags->sse2 = (edx1 & EDX_SSE2) != 0; - flags->cmov = (edx1 & EDX_CMOV) != 0; - - flags->sse3 = (ecx1 & ECX_SSE3) != 0; - /* ssse3 = (ecx1 & ECX_SSSE3) != 0; */ - flags->sse41 = (ecx1 & ECX_SSE4_1) != 0; - flags->sse42 = (ecx1 & ECX_SSE4_2) != 0; - - flags->done = true; - } -} - -#ifdef HACL_CAN_COMPILE_SIMD128 -static inline bool has_simd128(cpu_flags *flags) { - // For now this is Intel-only, could conceivably be #ifdef'd to something - // else. - return flags->sse && flags->sse2 && flags->sse3 && flags->sse41 && flags->sse42 && flags->cmov; -} -#endif - -#ifdef HACL_CAN_COMPILE_SIMD256 -static inline bool has_simd256(cpu_flags *flags) { - return flags->avx && flags->avx2; -} -#endif - // Small mismatch between the variable names Python defines as part of configure // at the ones HACL* expects to be set in order to enable those headers. #define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 #define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 -#include "_hacl/Hacl_Hash_Blake2b.h" #include "_hacl/Hacl_Hash_Blake2s.h" -#if HACL_CAN_COMPILE_SIMD256 -#include "_hacl/Hacl_Hash_Blake2b_Simd256.h" -#endif +#include "_hacl/Hacl_Hash_Blake2b.h" #if HACL_CAN_COMPILE_SIMD128 #include "_hacl/Hacl_Hash_Blake2s_Simd128.h" #endif +#if HACL_CAN_COMPILE_SIMD256 +#include "_hacl/Hacl_Hash_Blake2b_Simd256.h" +#endif // MODULE TYPE SLOTS @@ -148,16 +70,16 @@ static PyType_Spec blake2b_type_spec; static PyType_Spec blake2s_type_spec; PyDoc_STRVAR(blake2mod__doc__, -"_blake2b provides BLAKE2b for hashlib\n" -); + "_blake2 provides BLAKE2b and BLAKE2s for hashlib\n"); typedef struct { - PyTypeObject* blake2b_type; - PyTypeObject* blake2s_type; - cpu_flags flags; + PyTypeObject *blake2b_type; + PyTypeObject *blake2s_type; + bool can_run_simd128; + bool can_run_simd256; } Blake2State; -static inline Blake2State* +static inline Blake2State * blake2_get_state(PyObject *module) { void *state = _PyModule_GetState(module); @@ -166,7 +88,7 @@ blake2_get_state(PyObject *module) } #if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) -static inline Blake2State* +static inline Blake2State * blake2_get_state_from_type(PyTypeObject *module) { void *state = _PyType_GetModuleState(module); @@ -203,31 +125,107 @@ _blake2_free(void *module) (void)_blake2_clear((PyObject *)module); } -#define ADD_INT(d, name, value) do { \ - PyObject *x = PyLong_FromLong(value); \ - if (!x) \ - return -1; \ - if (PyDict_SetItemString(d, name, x) < 0) { \ - Py_DECREF(x); \ - return -1; \ - } \ - Py_DECREF(x); \ -} while(0) - -#define ADD_INT_CONST(NAME, VALUE) do { \ - if (PyModule_AddIntConstant(m, NAME, VALUE) < 0) { \ - return -1; \ - } \ -} while (0) +static void +blake2module_init_cpu_features(Blake2State *state) +{ + /* This must be kept in sync with hmacmodule_init_cpu_features() + * in hmacmodule.c */ + int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; + int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; +#if defined(__x86_64__) && defined(__GNUC__) + __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); + __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); +#elif defined(_M_X64) + int info1[4] = {0}; + __cpuidex(info1, 1, 0); + eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3]; + + int info7[4] = {0}; + __cpuidex(info7, 7, 0); + eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3]; +#endif + // fmt: off + (void)eax1; (void)ebx1; (void)ecx1; (void)edx1; + (void)eax7; (void)ebx7; (void)ecx7; (void)edx7; + // fmt: on + +#define EBX_AVX2 (1 << 5) +#define ECX_SSE3 (1 << 0) +#define ECX_SSSE3 (1 << 9) +#define ECX_SSE4_1 (1 << 19) +#define ECX_SSE4_2 (1 << 20) +#define ECX_AVX (1 << 28) +#define EDX_SSE (1 << 25) +#define EDX_SSE2 (1 << 26) +#define EDX_CMOV (1 << 15) + + bool avx = (ecx1 & ECX_AVX) != 0; + bool avx2 = (ebx7 & EBX_AVX2) != 0; + + bool sse = (edx1 & EDX_SSE) != 0; + bool sse2 = (edx1 & EDX_SSE2) != 0; + bool cmov = (edx1 & EDX_CMOV) != 0; + + bool sse3 = (ecx1 & ECX_SSE3) != 0; + bool sse41 = (ecx1 & ECX_SSE4_1) != 0; + bool sse42 = (ecx1 & ECX_SSE4_2) != 0; + +#undef EDX_CMOV +#undef EDX_SSE2 +#undef EDX_SSE +#undef ECX_AVX +#undef ECX_SSE4_2 +#undef ECX_SSE4_1 +#undef ECX_SSSE3 +#undef ECX_SSE3 +#undef EBX_AVX2 + +#if HACL_CAN_COMPILE_SIMD128 + // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection + state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; +#else + // fmt: off + (void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov; + // fmt: on + state->can_run_simd128 = false; +#endif + +#if HACL_CAN_COMPILE_SIMD256 + // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection + state->can_run_simd256 = state->can_run_simd128 && avx && avx2; +#else + // fmt: off + (void)avx; (void)avx2; + // fmt: on + state->can_run_simd256 = false; +#endif +} static int blake2_exec(PyObject *m) { - Blake2State* st = blake2_get_state(m); - - // This is called at module initialization-time, and so appears to be as - // good a place as any to probe the CPU flags. - detect_cpu_features(&st->flags); + Blake2State *st = blake2_get_state(m); + blake2module_init_cpu_features(st); + +#define ADD_INT(DICT, NAME, VALUE) \ + do { \ + PyObject *x = PyLong_FromLong(VALUE); \ + if (x == NULL) { \ + return -1; \ + } \ + int rc = PyDict_SetItemString(DICT, NAME, x); \ + Py_DECREF(x); \ + if (rc < 0) { \ + return -1; \ + } \ + } while(0) + +#define ADD_INT_CONST(NAME, VALUE) \ + do { \ + if (PyModule_AddIntConstant(m, NAME, VALUE) < 0) { \ + return -1; \ + } \ + } while (0) ADD_INT_CONST("_GIL_MINSIZE", HASHLIB_GIL_MINSIZE); @@ -237,7 +235,6 @@ blake2_exec(PyObject *m) if (st->blake2b_type == NULL) { return -1; } - /* BLAKE2b */ if (PyModule_AddType(m, st->blake2b_type) < 0) { return -1; } @@ -257,9 +254,9 @@ blake2_exec(PyObject *m) st->blake2s_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &blake2s_type_spec, NULL); - if (NULL == st->blake2s_type) + if (st->blake2s_type == NULL) { return -1; - + } if (PyModule_AddType(m, st->blake2s_type) < 0) { return -1; } @@ -275,12 +272,11 @@ blake2_exec(PyObject *m) ADD_INT_CONST("BLAKE2S_MAX_KEY_SIZE", HACL_HASH_BLAKE2S_KEY_BYTES); ADD_INT_CONST("BLAKE2S_MAX_DIGEST_SIZE", HACL_HASH_BLAKE2S_OUT_BYTES); +#undef ADD_INT_CONST +#undef ADD_INT return 0; } -#undef ADD_INT -#undef ADD_INT_CONST - static PyModuleDef_Slot _blake2_slots[] = { {Py_mod_exec, blake2_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, @@ -320,35 +316,39 @@ PyInit__blake2(void) // set. typedef enum { Blake2s, Blake2b, Blake2s_128, Blake2b_256 } blake2_impl; -static inline bool is_blake2b(blake2_impl impl) { - return impl == Blake2b || impl == Blake2b_256; +static inline bool +is_blake2b(blake2_impl impl) +{ + return impl == Blake2b || impl == Blake2b_256; } -static inline bool is_blake2s(blake2_impl impl) { - return !is_blake2b(impl); +static inline bool +is_blake2s(blake2_impl impl) +{ + return impl == Blake2s || impl == Blake2s_128; } -static inline blake2_impl type_to_impl(PyTypeObject *type) { +static inline blake2_impl +type_to_impl(PyTypeObject *type) +{ #if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) - Blake2State* st = blake2_get_state_from_type(type); + Blake2State *st = blake2_get_state_from_type(type); #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { -#ifdef HACL_CAN_COMPILE_SIMD256 - if (has_simd256(&st->flags)) - return Blake2b_256; - else -#endif +#if HACL_CAN_COMPILE_SIMD256 + return st->can_run_simd256 ? Blake2b_256 : Blake2b; +#else return Blake2b; - } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { -#ifdef HACL_CAN_COMPILE_SIMD128 - if (has_simd128(&st->flags)) - return Blake2s_128; - else #endif + } + else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { +#if HACL_CAN_COMPILE_SIMD128 + return st->can_run_simd128 ? Blake2s_128 : Blake2s; +#else return Blake2s; - } else { - Py_UNREACHABLE(); +#endif } + Py_UNREACHABLE(); } typedef struct { @@ -356,10 +356,10 @@ typedef struct { union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; -#ifdef HACL_CAN_COMPILE_SIMD128 +#if HACL_CAN_COMPILE_SIMD128 Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; #endif -#ifdef HACL_CAN_COMPILE_SIMD256 +#if HACL_CAN_COMPILE_SIMD256 Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; #endif }; @@ -425,39 +425,124 @@ static void update(Blake2Object *self, uint8_t *buf, Py_ssize_t len) { switch (self->impl) { - // These need to be ifdef'd out otherwise it's an unresolved symbol at - // link-time. -#ifdef HACL_CAN_COMPILE_SIMD256 + // blake2b_256_state and blake2s_128_state must be if'd since + // otherwise this results in an unresolved symbol at link-time. +#if HACL_CAN_COMPILE_SIMD256 case Blake2b_256: - HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update,self->blake2b_256_state, buf, len); + HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update, + self->blake2b_256_state, buf, len); return; #endif -#ifdef HACL_CAN_COMPILE_SIMD128 +#if HACL_CAN_COMPILE_SIMD128 case Blake2s_128: - HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update,self->blake2s_128_state, buf, len); + HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update, + self->blake2s_128_state, buf, len); return; #endif case Blake2b: - HACL_UPDATE(Hacl_Hash_Blake2b_update,self->blake2b_state, buf, len); + HACL_UPDATE(Hacl_Hash_Blake2b_update, + self->blake2b_state, buf, len); return; case Blake2s: - HACL_UPDATE(Hacl_Hash_Blake2s_update,self->blake2s_state, buf, len); + HACL_UPDATE(Hacl_Hash_Blake2s_update, + self->blake2s_state, buf, len); return; default: Py_UNREACHABLE(); } } -static PyObject * -py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, - Py_buffer *key, Py_buffer *salt, Py_buffer *person, - int fanout, int depth, unsigned long leaf_size, - unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity) +#define BLAKE2_IMPLNAME(SELF) \ + (is_blake2b((SELF)->impl) ? "blake2b" : "blake2s") +#define GET_BLAKE2_CONST(SELF, NAME) \ + (is_blake2b((SELF)->impl) \ + ? HACL_HASH_BLAKE2B_ ## NAME \ + : HACL_HASH_BLAKE2S_ ## NAME) + +#define MAX_OUT_BYTES(SELF) GET_BLAKE2_CONST(SELF, OUT_BYTES) +#define MAX_SALT_LENGTH(SELF) GET_BLAKE2_CONST(SELF, SALT_BYTES) +#define MAX_KEY_BYTES(SELF) GET_BLAKE2_CONST(SELF, KEY_BYTES) +#define MAX_PERSONAL_BYTES(SELF) GET_BLAKE2_CONST(SELF, PERSONAL_BYTES) +static int +py_blake2_validate_params(Blake2Object *self, + int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, unsigned long leaf_size, + unsigned long long node_offset, int node_depth, + int inner_size) +{ + /* Validate digest size. */ + if (digest_size <= 0 || (unsigned int)digest_size > MAX_OUT_BYTES(self)) { + PyErr_Format( + PyExc_ValueError, + "digest_size for %s must be between 1 and %d bytes, got %d", + BLAKE2_IMPLNAME(self), MAX_OUT_BYTES(self), digest_size + ); + goto error; + } + +#define CHECK_LENGTH(NAME, VALUE, MAX) \ + do { \ + if ((size_t)(VALUE) > (size_t)(MAX)) { \ + PyErr_Format(PyExc_ValueError, \ + "maximum %s length is %zu bytes, got %zd", \ + (NAME), (size_t)(MAX), (Py_ssize_t)(VALUE)); \ + goto error; \ + } \ + } while (0) + /* Validate key parameter. */ + if (key->obj && key->len) { + CHECK_LENGTH("key", key->len, MAX_KEY_BYTES(self)); + } + /* Validate salt parameter. */ + if (salt->obj && salt->len) { + CHECK_LENGTH("salt", salt->len, MAX_SALT_LENGTH(self)); + } + /* Validate personalization parameter. */ + if (person->obj && person->len) { + CHECK_LENGTH("person", person->len, MAX_PERSONAL_BYTES(self)); + } +#undef CHECK_LENGTH +#define CHECK_TREE(NAME, VALUE, MIN, MAX) \ + do { \ + if ((VALUE) < (MIN) || (size_t)(VALUE) > (size_t)(MAX)) { \ + PyErr_Format(PyExc_ValueError, \ + "'%s' must be between %zu and %zu", \ + (NAME), (size_t)(MIN), (size_t)(MAX)); \ + goto error; \ + } \ + } while (0) + /* Validate tree parameters. */ + CHECK_TREE("fanout", fanout, 0, 255); + CHECK_TREE("depth", depth, 1, 255); + CHECK_TREE("node_depth", node_depth, 0, 255); + CHECK_TREE("inner_size", inner_size, 0, MAX_OUT_BYTES(self)); +#undef CHECK_TREE + if (leaf_size > 0xFFFFFFFFU) { + /* maximum: 2**32 - 1 */ + PyErr_SetString(PyExc_OverflowError, "'leaf_size' is too large"); + goto error; + } + if (is_blake2s(self->impl) && node_offset > 0xFFFFFFFFFFFFULL) { + /* maximum: 2**48 - 1 */ + PyErr_SetString(PyExc_OverflowError, "'node_offset' is too large"); + goto error; + } + return 0; +error: + return -1; +} + + +static PyObject * +py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, unsigned long leaf_size, + unsigned long long node_offset, int node_depth, + int inner_size, int last_node, int usedforsecurity) { Blake2Object *self = NULL; - Py_buffer buf; self = new_Blake2Object(type); if (self == NULL) { @@ -487,96 +572,31 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, default: Py_UNREACHABLE(); } - // Using Blake2b because we statically know that these are greater than the - // Blake2s sizes -- this avoids a VLA. - uint8_t salt_[HACL_HASH_BLAKE2B_SALT_BYTES] = { 0 }; - uint8_t personal_[HACL_HASH_BLAKE2B_PERSONAL_BYTES] = { 0 }; - /* Validate digest size. */ - if (digest_size <= 0 || - (unsigned) digest_size > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_OUT_BYTES : HACL_HASH_BLAKE2S_OUT_BYTES)) + // Unlike the state types, the parameters share a single (client-friendly) + // structure. + if (py_blake2_validate_params(self, + digest_size, + key, salt, person, + fanout, depth, leaf_size, + node_offset, node_depth, inner_size) < 0) { - PyErr_Format(PyExc_ValueError, - "digest_size for %s must be between 1 and %d bytes, here it is %d", - is_blake2b(self->impl) ? "Blake2b" : "Blake2s", - is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_OUT_BYTES : HACL_HASH_BLAKE2S_OUT_BYTES, - digest_size); - goto error; - } - - /* Validate salt parameter. */ - if ((salt->obj != NULL) && salt->len) { - if ((size_t)salt->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_SALT_BYTES : HACL_HASH_BLAKE2S_SALT_BYTES)) { - PyErr_Format(PyExc_ValueError, - "maximum salt length is %d bytes", - (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_SALT_BYTES : HACL_HASH_BLAKE2S_SALT_BYTES)); - goto error; - } - memcpy(salt_, salt->buf, salt->len); - } - - /* Validate personalization parameter. */ - if ((person->obj != NULL) && person->len) { - if ((size_t)person->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_PERSONAL_BYTES : HACL_HASH_BLAKE2S_PERSONAL_BYTES)) { - PyErr_Format(PyExc_ValueError, - "maximum person length is %d bytes", - (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_PERSONAL_BYTES : HACL_HASH_BLAKE2S_PERSONAL_BYTES)); - goto error; - } - memcpy(personal_, person->buf, person->len); - } - - /* Validate tree parameters. */ - if (fanout < 0 || fanout > 255) { - PyErr_SetString(PyExc_ValueError, - "fanout must be between 0 and 255"); - goto error; - } - - if (depth <= 0 || depth > 255) { - PyErr_SetString(PyExc_ValueError, - "depth must be between 1 and 255"); - goto error; - } - - if (leaf_size > 0xFFFFFFFFU) { - PyErr_SetString(PyExc_OverflowError, "leaf_size is too large"); - goto error; - } - - if (is_blake2s(self->impl) && node_offset > 0xFFFFFFFFFFFFULL) { - /* maximum 2**48 - 1 */ - PyErr_SetString(PyExc_OverflowError, "node_offset is too large"); - goto error; - } - - if (node_depth < 0 || node_depth > 255) { - PyErr_SetString(PyExc_ValueError, - "node_depth must be between 0 and 255"); goto error; } - if (inner_size < 0 || - (unsigned) inner_size > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_OUT_BYTES : HACL_HASH_BLAKE2S_OUT_BYTES)) { - PyErr_Format(PyExc_ValueError, - "inner_size must be between 0 and is %d", - (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_OUT_BYTES : HACL_HASH_BLAKE2S_OUT_BYTES)); - goto error; + // Using Blake2b because we statically know that these are greater than the + // Blake2s sizes -- this avoids a VLA. + uint8_t salt_buffer[HACL_HASH_BLAKE2B_SALT_BYTES] = {0}; + uint8_t personal_buffer[HACL_HASH_BLAKE2B_PERSONAL_BYTES] = {0}; + if (salt->obj != NULL) { + assert(salt->buf != NULL); + memcpy(salt_buffer, salt->buf, salt->len); } - - /* Set key length. */ - if ((key->obj != NULL) && key->len) { - if ((size_t)key->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_KEY_BYTES : HACL_HASH_BLAKE2S_KEY_BYTES)) { - PyErr_Format(PyExc_ValueError, - "maximum key length is %d bytes", - (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_KEY_BYTES : HACL_HASH_BLAKE2S_KEY_BYTES)); - goto error; - } + if (person->obj != NULL) { + assert(person->buf != NULL); + memcpy(personal_buffer, person->buf, person->len); } - // Unlike the state types, the parameters share a single (client-friendly) - // structure. - Hacl_Hash_Blake2b_blake2_params params = { .digest_length = digest_size, .key_length = (uint8_t)key->len, @@ -586,55 +606,46 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, .node_offset = node_offset, .node_depth = node_depth, .inner_length = inner_size, - .salt = salt_, - .personal = personal_ + .salt = salt_buffer, + .personal = personal_buffer }; +#define BLAKE2_MALLOC(TYPE, STATE) \ + do { \ + STATE = Hacl_Hash_ ## TYPE ## _malloc_with_params_and_key( \ + ¶ms, last_node, key->buf); \ + if (STATE == NULL) { \ + (void)PyErr_NoMemory(); \ + goto error; \ + } \ + } while (0) + switch (self->impl) { #if HACL_CAN_COMPILE_SIMD256 - case Blake2b_256: { - self->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(¶ms, last_node, key->buf); - if (self->blake2b_256_state == NULL) { - (void)PyErr_NoMemory(); - goto error; - } + case Blake2b_256: + BLAKE2_MALLOC(Blake2b_Simd256, self->blake2b_256_state); break; - } #endif #if HACL_CAN_COMPILE_SIMD128 - case Blake2s_128: { - self->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(¶ms, last_node, key->buf); - if (self->blake2s_128_state == NULL) { - (void)PyErr_NoMemory(); - goto error; - } + case Blake2s_128: + BLAKE2_MALLOC(Blake2s_Simd128, self->blake2s_128_state); break; - } #endif - case Blake2b: { - self->blake2b_state = Hacl_Hash_Blake2b_malloc_with_params_and_key(¶ms, last_node, key->buf); - if (self->blake2b_state == NULL) { - (void)PyErr_NoMemory(); - goto error; - } + case Blake2b: + BLAKE2_MALLOC(Blake2b, self->blake2b_state); break; - } - case Blake2s: { - self->blake2s_state = Hacl_Hash_Blake2s_malloc_with_params_and_key(¶ms, last_node, key->buf); - if (self->blake2s_state == NULL) { - (void)PyErr_NoMemory(); - goto error; - } + case Blake2s: + BLAKE2_MALLOC(Blake2s, self->blake2s_state); break; - } default: Py_UNREACHABLE(); } +#undef BLAKE2_MALLOC /* Process initial data if any. */ if (data != NULL) { + Py_buffer buf; GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - if (buf.len >= HASHLIB_GIL_MINSIZE) { Py_BEGIN_ALLOW_THREADS update(self, buf.buf, buf.len); @@ -687,7 +698,9 @@ py_blake2b_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { return NULL; } - return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); + return py_blake2_new(type, data, digest_size, key, salt, person, + fanout, depth, leaf_size, node_offset, node_depth, + inner_size, last_node, usedforsecurity); } /*[clinic input] @@ -725,49 +738,44 @@ py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { return NULL; } - return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); + return py_blake2_new(type, data, digest_size, key, salt, person, + fanout, depth, leaf_size, node_offset, node_depth, + inner_size, last_node, usedforsecurity); } static int blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) { assert(cpy != NULL); +#define BLAKE2_COPY(TYPE, STATE_ATTR) \ + do { \ + cpy->STATE_ATTR = Hacl_Hash_ ## TYPE ## _copy(self->STATE_ATTR); \ + if (cpy->STATE_ATTR == NULL) { \ + goto error; \ + } \ + } while (0) + switch (self->impl) { #if HACL_CAN_COMPILE_SIMD256 - case Blake2b_256: { - cpy->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_copy(self->blake2b_256_state); - if (cpy->blake2b_256_state == NULL) { - goto error; - } + case Blake2b_256: + BLAKE2_COPY(Blake2b_Simd256, blake2b_256_state); break; - } #endif #if HACL_CAN_COMPILE_SIMD128 - case Blake2s_128: { - cpy->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_copy(self->blake2s_128_state); - if (cpy->blake2s_128_state == NULL) { - goto error; - } + case Blake2s_128: + BLAKE2_COPY(Blake2s_Simd128, blake2s_128_state); break; - } #endif - case Blake2b: { - cpy->blake2b_state = Hacl_Hash_Blake2b_copy(self->blake2b_state); - if (cpy->blake2b_state == NULL) { - goto error; - } + case Blake2b: + BLAKE2_COPY(Blake2b, blake2b_state); break; - } - case Blake2s: { - cpy->blake2s_state = Hacl_Hash_Blake2s_copy(self->blake2s_state); - if (cpy->blake2s_state == NULL) { - goto error; - } + case Blake2s: + BLAKE2_COPY(Blake2s, blake2s_state); break; - } default: Py_UNREACHABLE(); } +#undef BLAKE2_COPY cpy->impl = self->impl; return 0; @@ -829,7 +837,8 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data) update(self, buf.buf, buf.len); PyMutex_Unlock(&self->mutex); Py_END_ALLOW_THREADS - } else { + } + else { update(self, buf.buf, buf.len); } @@ -838,40 +847,42 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data) Py_RETURN_NONE; } -/*[clinic input] -_blake2.blake2b.digest - -Return the digest value as a bytes object. -[clinic start generated code]*/ - -static PyObject * -_blake2_blake2b_digest_impl(Blake2Object *self) -/*[clinic end generated code: output=31ab8ad477f4a2f7 input=7d21659e9c5fff02]*/ +static uint8_t +blake2_blake2b_compute_digest(Blake2Object *self, uint8_t *digest) { - uint8_t digest[HACL_HASH_BLAKE2B_OUT_BYTES]; - - ENTER_HASHLIB(self); - uint8_t digest_length = 0; switch (self->impl) { #if HACL_CAN_COMPILE_SIMD256 case Blake2b_256: - digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); - break; + return Hacl_Hash_Blake2b_Simd256_digest( + self->blake2b_256_state, digest); #endif #if HACL_CAN_COMPILE_SIMD128 case Blake2s_128: - digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); - break; + return Hacl_Hash_Blake2s_Simd128_digest( + self->blake2s_128_state, digest); #endif case Blake2b: - digest_length = Hacl_Hash_Blake2b_digest(self->blake2b_state, digest); - break; + return Hacl_Hash_Blake2b_digest(self->blake2b_state, digest); case Blake2s: - digest_length = Hacl_Hash_Blake2s_digest(self->blake2s_state, digest); - break; + return Hacl_Hash_Blake2s_digest(self->blake2s_state, digest); default: Py_UNREACHABLE(); } +} + +/*[clinic input] +_blake2.blake2b.digest + +Return the digest value as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_blake2_blake2b_digest_impl(Blake2Object *self) +/*[clinic end generated code: output=31ab8ad477f4a2f7 input=7d21659e9c5fff02]*/ +{ + uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES]; + ENTER_HASHLIB(self); + digest_length = blake2_blake2b_compute_digest(self, digest); LEAVE_HASHLIB(self); return PyBytes_FromStringAndSize((const char *)digest, digest_length); } @@ -886,30 +897,9 @@ static PyObject * _blake2_blake2b_hexdigest_impl(Blake2Object *self) /*[clinic end generated code: output=5ef54b138db6610a input=76930f6946351f56]*/ { - uint8_t digest[HACL_HASH_BLAKE2B_OUT_BYTES]; - + uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES]; ENTER_HASHLIB(self); - uint8_t digest_length = 0; - switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 - case Blake2b_256: - digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); - break; -#endif -#if HACL_CAN_COMPILE_SIMD128 - case Blake2s_128: - digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); - break; -#endif - case Blake2b: - digest_length = Hacl_Hash_Blake2b_digest(self->blake2b_state, digest); - break; - case Blake2s: - digest_length = Hacl_Hash_Blake2s_digest(self->blake2s_state, digest); - break; - default: - Py_UNREACHABLE(); - } + digest_length = blake2_blake2b_compute_digest(self, digest); LEAVE_HASHLIB(self); return _Py_strhex((const char *)digest, digest_length); } @@ -928,43 +918,49 @@ static PyObject * py_blake2b_get_name(PyObject *op, void *Py_UNUSED(closure)) { Blake2Object *self = _Blake2Object_CAST(op); - return PyUnicode_FromString(is_blake2b(self->impl) ? "blake2b" : "blake2s"); + return PyUnicode_FromString(BLAKE2_IMPLNAME(self)); } - static PyObject * py_blake2b_get_block_size(PyObject *op, void *Py_UNUSED(closure)) { Blake2Object *self = _Blake2Object_CAST(op); - return PyLong_FromLong(is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_BLOCK_BYTES : HACL_HASH_BLAKE2S_BLOCK_BYTES); + return PyLong_FromLong(GET_BLAKE2_CONST(self, BLOCK_BYTES)); } - -static PyObject * -py_blake2b_get_digest_size(PyObject *op, void *Py_UNUSED(closure)) +static Hacl_Hash_Blake2b_index +hacl_get_blake2_info(Blake2Object *self) { - Blake2Object *self = _Blake2Object_CAST(op); switch (self->impl) { #if HACL_CAN_COMPILE_SIMD256 case Blake2b_256: - return PyLong_FromLong(Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state).digest_length); + return Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state); #endif #if HACL_CAN_COMPILE_SIMD128 case Blake2s_128: - return PyLong_FromLong(Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state).digest_length); + return Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state); #endif case Blake2b: - return PyLong_FromLong(Hacl_Hash_Blake2b_info(self->blake2b_state).digest_length); + return Hacl_Hash_Blake2b_info(self->blake2b_state); case Blake2s: - return PyLong_FromLong(Hacl_Hash_Blake2s_info(self->blake2s_state).digest_length); + return Hacl_Hash_Blake2s_info(self->blake2s_state); default: Py_UNREACHABLE(); } } +static PyObject * +py_blake2b_get_digest_size(PyObject *op, void *Py_UNUSED(closure)) +{ + Blake2Object *self = _Blake2Object_CAST(op); + Hacl_Hash_Blake2b_index info = hacl_get_blake2_info(self); + return PyLong_FromLong(info.digest_length); +} + + static PyGetSetDef py_blake2b_getsetters[] = { {"name", py_blake2b_get_name, NULL, NULL, NULL}, {"block_size", py_blake2b_get_block_size, NULL, NULL, NULL}, @@ -981,38 +977,35 @@ py_blake2_clear(PyObject *op) // initializes the HACL* internal state to NULL before allocating // it. If an error occurs in the constructor, we should only free // states that were allocated (i.e. that are not NULL). +#define BLAKE2_FREE(TYPE, STATE) \ + do { \ + if (STATE != NULL) { \ + Hacl_Hash_ ## TYPE ## _free(STATE); \ + STATE = NULL; \ + } \ + } while (0) + switch (self->impl) { #if HACL_CAN_COMPILE_SIMD256 case Blake2b_256: - if (self->blake2b_256_state != NULL) { - Hacl_Hash_Blake2b_Simd256_free(self->blake2b_256_state); - self->blake2b_256_state = NULL; - } + BLAKE2_FREE(Blake2b_Simd256, self->blake2b_256_state); break; #endif #if HACL_CAN_COMPILE_SIMD128 case Blake2s_128: - if (self->blake2s_128_state != NULL) { - Hacl_Hash_Blake2s_Simd128_free(self->blake2s_128_state); - self->blake2s_128_state = NULL; - } + BLAKE2_FREE(Blake2s_Simd128, self->blake2s_128_state); break; #endif case Blake2b: - if (self->blake2b_state != NULL) { - Hacl_Hash_Blake2b_free(self->blake2b_state); - self->blake2b_state = NULL; - } + BLAKE2_FREE(Blake2b, self->blake2b_state); break; case Blake2s: - if (self->blake2s_state != NULL) { - Hacl_Hash_Blake2s_free(self->blake2s_state); - self->blake2s_state = NULL; - } + BLAKE2_FREE(Blake2s, self->blake2s_state); break; default: Py_UNREACHABLE(); } +#undef BLAKE2_FREE return 0; } @@ -1041,7 +1034,7 @@ static PyType_Slot blake2b_type_slots[] = { {Py_tp_methods, py_blake2b_methods}, {Py_tp_getset, py_blake2b_getsetters}, {Py_tp_new, py_blake2b_new}, - {0,0} + {0, 0} }; static PyType_Slot blake2s_type_slots[] = { @@ -1054,12 +1047,12 @@ static PyType_Slot blake2s_type_slots[] = { // only the constructor differs, so that it can receive a clinic-generated // default digest length suitable for blake2s {Py_tp_new, py_blake2s_new}, - {0,0} + {0, 0} }; static PyType_Spec blake2b_type_spec = { .name = "_blake2.blake2b", - .basicsize = sizeof(Blake2Object), + .basicsize = sizeof(Blake2Object), .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HEAPTYPE, .slots = blake2b_type_slots @@ -1067,7 +1060,7 @@ static PyType_Spec blake2b_type_spec = { static PyType_Spec blake2s_type_spec = { .name = "_blake2.blake2s", - .basicsize = sizeof(Blake2Object), + .basicsize = sizeof(Blake2Object), .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HEAPTYPE, .slots = blake2s_type_slots diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index c7b49d4dee3d0a..b404d5732ec857 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -1715,11 +1715,11 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); #elif defined(_M_X64) - int info1[4] = { 0 }; + int info1[4] = {0}; __cpuidex(info1, 1, 0); eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3]; - int info7[4] = { 0 }; + int info7[4] = {0}; __cpuidex(info7, 7, 0); eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3]; #endif From 8441b263af964f353bf02d56c32a4fc547cdc330 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 9 Jun 2025 04:56:32 -0400 Subject: [PATCH 518/989] bpo-45210: Document that error indicator may be set in tp_dealloc (#28358) Signed-off-by: Edward Z. Yang Signed-off-by: Edward Z. Yang Co-authored-by: Victor Stinner --- Doc/c-api/typeobj.rst | 20 +++++++++++++++++++ .../2021-09-15-13-07-25.bpo-45210.RtGk7i.rst | 2 ++ 2 files changed, 22 insertions(+) create mode 100644 Misc/NEWS.d/next/Documentation/2021-09-15-13-07-25.bpo-45210.RtGk7i.rst diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 91046c0e6f18ae..af2bead3bb5004 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -686,6 +686,26 @@ and :c:data:`PyType_Type` effectively act as defaults.) instance, and call the type's :c:member:`~PyTypeObject.tp_free` function to free the object itself. + If you may call functions that may set the error indicator, you must use + :c:func:`PyErr_GetRaisedException` and :c:func:`PyErr_SetRaisedException` + to ensure you don't clobber a preexisting error indicator (the deallocation + could have occurred while processing a different error): + + .. code-block:: c + + static void + foo_dealloc(foo_object *self) + { + PyObject *et, *ev, *etb; + PyObject *exc = PyErr_GetRaisedException(); + ... + PyErr_SetRaisedException(exc); + } + + The dealloc handler itself must not raise an exception; if it hits an error + case it should call :c:func:`PyErr_FormatUnraisable` to log (and clear) an + unraisable exception. + No guarantees are made about when an object is destroyed, except: * Python will destroy an object immediately or some time after the final diff --git a/Misc/NEWS.d/next/Documentation/2021-09-15-13-07-25.bpo-45210.RtGk7i.rst b/Misc/NEWS.d/next/Documentation/2021-09-15-13-07-25.bpo-45210.RtGk7i.rst new file mode 100644 index 00000000000000..ce3eba154ba6aa --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2021-09-15-13-07-25.bpo-45210.RtGk7i.rst @@ -0,0 +1,2 @@ +Document that error indicator may be set in tp_dealloc, and how to avoid +clobbering it. From c19e36cc4eabacaacc359e8b2550b10c2965a31a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 9 Jun 2025 11:10:32 +0200 Subject: [PATCH 519/989] gh-131316: fix invalid `DECREF` in `_md5.md5.copy()` (#135291) This amends commit 261633bd3f48607478f50d12d8025cd4bb36f6f4. --- Modules/md5module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index 9b5ea2d6e02605..08dbcd2cbce844 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -120,7 +120,7 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) newobj->hash_state = Hacl_Hash_MD5_copy(self->hash_state); LEAVE_HASHLIB(self); if (newobj->hash_state == NULL) { - Py_DECREF(self); + Py_DECREF(newobj); return PyErr_NoMemory(); } return (PyObject *)newobj; From b150b6aca7b17efe1bb13c3058d61cdefb83237e Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Mon, 9 Jun 2025 13:33:18 +0300 Subject: [PATCH 520/989] gh-131798: Optimize `_UNARY_INVERT` (GH-135222) --- Lib/test/test_capi/test_opt.py | 15 +++++++++++++++ ...2025-06-06-01-09-44.gh-issue-131798.1SuxO9.rst | 1 + Python/optimizer_bytecodes.c | 9 +++++++++ Python/optimizer_cases.c.h | 9 ++++++++- 4 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-01-09-44.gh-issue-131798.1SuxO9.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 8a3819dabe44ce..bf22ef2a5922e7 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2275,6 +2275,21 @@ def f(n): self.assertIn("_UNPACK_SEQUENCE_TWO_TUPLE", uops) self.assertNotIn("_GUARD_TOS_TUPLE", uops) + def test_unary_invert_long_type(self): + def testfunc(n): + for _ in range(n): + a = 9397 + x = ~a + ~a + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertNotIn("_GUARD_TOS_INT", uops) + self.assertNotIn("_GUARD_NOS_INT", uops) + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-01-09-44.gh-issue-131798.1SuxO9.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-01-09-44.gh-issue-131798.1SuxO9.rst new file mode 100644 index 00000000000000..a377526230659f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-01-09-44.gh-issue-131798.1SuxO9.rst @@ -0,0 +1 @@ +Optimize ``_UNARY_INVERT`` in JIT-compiled code. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index fbf4dfd3db629c..a9d5e92ca02a59 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -467,6 +467,15 @@ dummy_func(void) { res = sym_new_truthiness(ctx, value, false); } + op(_UNARY_INVERT, (value -- res)) { + if (sym_matches_type(value, &PyLong_Type)) { + res = sym_new_type(ctx, &PyLong_Type); + } + else { + res = sym_new_not_null(ctx); + } + } + op(_COMPARE_OP, (left, right -- res)) { if (oparg & 16) { res = sym_new_type(ctx, &PyBool_Type); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index b42f47c75eaf50..4780e492f61d74 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -285,8 +285,15 @@ } case _UNARY_INVERT: { + JitOptSymbol *value; JitOptSymbol *res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + if (sym_matches_type(value, &PyLong_Type)) { + res = sym_new_type(ctx, &PyLong_Type); + } + else { + res = sym_new_not_null(ctx); + } stack_pointer[-1] = res; break; } From 0045100ccbc3919e8990fa59bc413fe38d21b075 Mon Sep 17 00:00:00 2001 From: Chris Eibl <138194463+chris-eibl@users.noreply.github.com> Date: Mon, 9 Jun 2025 13:52:38 +0200 Subject: [PATCH 521/989] GH-135287: clang-cl PGO builds on Windows fail with `could not open '/GENPROFILE'` (GH-135289) --- PCbuild/pyproject.props | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index ce51e342241686..cf35e705f355a7 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -100,8 +100,8 @@ advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;%(AdditionalDependencies) /OPT:REF,NOICF %(AdditionalOptions) -d2:-pattern-opt-disable:-932189325 %(AdditionalOptions) - /GENPROFILE %(AdditionalOptions) - /USEPROFILE %(AdditionalOptions) + /GENPROFILE %(AdditionalOptions) + /USEPROFILE %(AdditionalOptions) false From aaad2e81cecae01d3e7e12b2bbd2e1a8533ff5d4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 9 Jun 2025 14:47:02 +0200 Subject: [PATCH 522/989] gh-133968: Update PyUnicodeWriter_WriteASCII() documentation (#135297) The function was added to Python 3.14. --- Doc/c-api/unicode.rst | 2 +- Doc/whatsnew/3.15.rst | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 07fdcfd9729ee0..84fee05cb4ce20 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1827,7 +1827,7 @@ object. On success, return ``0``. On error, set an exception, leave the writer unchanged, and return ``-1``. - .. versionadded:: next + .. versionadded:: 3.14 .. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index e5bccf17eb63e2..88e7462f688e70 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -298,12 +298,6 @@ New features functions as replacements for :c:func:`PySys_GetObject`. (Contributed by Serhiy Storchaka in :gh:`108512`.) -* Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string - into a :c:type:`PyUnicodeWriter`. The function is faster than - :c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the - input string contains non-ASCII characters. - (Contributed by Victor Stinner in :gh:`133968`.) - * Add :c:type:`PyUnstable_Unicode_GET_CACHED_HASH` to get the cached hash of a string. See the documentation for caveats. (Contributed by Petr Viktorin in :gh:`131510`) From f90483e13ad48227767cd6d69bc4857843710a5c Mon Sep 17 00:00:00 2001 From: Wulian233 <1055917385@qq.com> Date: Mon, 9 Jun 2025 21:40:09 +0800 Subject: [PATCH 523/989] gh-135001: Explicitly specify the encoding parameter value of `calendar.HTMLCalendar` as 'utf-8' (#135002) --- Doc/library/calendar.rst | 2 +- Lib/calendar.py | 6 +++--- Lib/test/test_calendar.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/calendar.rst b/Doc/library/calendar.rst index 39090e36ed9c0d..b292d828841f2f 100644 --- a/Doc/library/calendar.rst +++ b/Doc/library/calendar.rst @@ -251,7 +251,7 @@ interpreted as prescribed by the ISO 8601 standard. Year 0 is 1 BC, year -1 is 3) specifies the number of months per row. *css* is the name for the cascading style sheet to be used. :const:`None` can be passed if no style sheet should be used. *encoding* specifies the encoding to be used for the - output (defaulting to the system default encoding). + output (defaulting to ``'utf-8'``). .. method:: formatmonthname(theyear, themonth, withyear=True) diff --git a/Lib/calendar.py b/Lib/calendar.py index 18f76d52ff8581..3be1b50500eb07 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -565,7 +565,7 @@ def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None): Return a formatted year as a complete HTML page. """ if encoding is None: - encoding = sys.getdefaultencoding() + encoding = 'utf-8' v = [] a = v.append a('\n' % encoding) @@ -846,7 +846,7 @@ def main(args=None): parser.add_argument( "-e", "--encoding", default=None, - help="encoding to use for output" + help="encoding to use for output (default utf-8)" ) parser.add_argument( "-t", "--type", @@ -890,7 +890,7 @@ def main(args=None): cal.setfirstweekday(options.first_weekday) encoding = options.encoding if encoding is None: - encoding = sys.getdefaultencoding() + encoding = 'utf-8' optdict = dict(encoding=encoding, css=options.css) write = sys.stdout.buffer.write if options.year is None: diff --git a/Lib/test/test_calendar.py b/Lib/test/test_calendar.py index 7ade4271b7a156..bc39c86b8cf62d 100644 --- a/Lib/test/test_calendar.py +++ b/Lib/test/test_calendar.py @@ -417,7 +417,7 @@ def test_output_htmlcalendar_encoding_utf8(self): self.check_htmlcalendar_encoding('utf-8', 'utf-8') def test_output_htmlcalendar_encoding_default(self): - self.check_htmlcalendar_encoding(None, sys.getdefaultencoding()) + self.check_htmlcalendar_encoding(None, 'utf-8') def test_yeardatescalendar(self): def shrink(cal): From 28d91d06f13ceda3df0cefacec497c370eff4802 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 9 Jun 2025 15:50:11 +0200 Subject: [PATCH 524/989] gh-127833: Reword and expand the Notation section (GH-134443) Prepare the docs for using the notation used in the `python.gram` file. If we want to sync the two, the meta-syntax should be the same. Link the Full Grammar docs here; keep only a few extras. Also, remove the distinction between lexical and syntactic rules, except for whitespace handling. With f- and t-strings, the line between the two is blurry. Co-authored-by: Blaise Pabon Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Lysandros Nikolaou Co-authored-by: Colin Marquardt --- Doc/reference/grammar.rst | 18 ++-- Doc/reference/introduction.rst | 160 ++++++++++++++++++++++++--------- 2 files changed, 128 insertions(+), 50 deletions(-) diff --git a/Doc/reference/grammar.rst b/Doc/reference/grammar.rst index b9cca4444c9141..55c148801d8559 100644 --- a/Doc/reference/grammar.rst +++ b/Doc/reference/grammar.rst @@ -8,15 +8,15 @@ used to generate the CPython parser (see :source:`Grammar/python.gram`). The version here omits details related to code generation and error recovery. -The notation is a mixture of `EBNF -`_ -and `PEG `_. -In particular, ``&`` followed by a symbol, token or parenthesized -group indicates a positive lookahead (i.e., is required to match but -not consumed), while ``!`` indicates a negative lookahead (i.e., is -required *not* to match). We use the ``|`` separator to mean PEG's -"ordered choice" (written as ``/`` in traditional PEG grammars). See -:pep:`617` for more details on the grammar's syntax. +The notation used here is the same as in the preceding docs, +and is described in the :ref:`notation ` section, +except for a few extra complications: + +* ``&e``: a positive lookahead (that is, ``e`` is required to match but + not consumed) +* ``!e``: a negative lookahead (that is, ``e`` is required *not* to match) +* ``~`` ("cut"): commit to the current alternative and fail the rule + even if this fails to parse .. literalinclude:: ../../Grammar/python.gram :language: peg diff --git a/Doc/reference/introduction.rst b/Doc/reference/introduction.rst index b7b70e6be5a5b7..444acac374a690 100644 --- a/Doc/reference/introduction.rst +++ b/Doc/reference/introduction.rst @@ -90,44 +90,122 @@ Notation .. index:: BNF, grammar, syntax, notation -The descriptions of lexical analysis and syntax use a modified -`Backus–Naur form (BNF) `_ grammar -notation. This uses the following style of definition: - -.. productionlist:: notation - name: `lc_letter` (`lc_letter` | "_")* - lc_letter: "a"..."z" - -The first line says that a ``name`` is an ``lc_letter`` followed by a sequence -of zero or more ``lc_letter``\ s and underscores. An ``lc_letter`` in turn is -any of the single characters ``'a'`` through ``'z'``. (This rule is actually -adhered to for the names defined in lexical and grammar rules in this document.) - -Each rule begins with a name (which is the name defined by the rule) and -``::=``. A vertical bar (``|``) is used to separate alternatives; it is the -least binding operator in this notation. A star (``*``) means zero or more -repetitions of the preceding item; likewise, a plus (``+``) means one or more -repetitions, and a phrase enclosed in square brackets (``[ ]``) means zero or -one occurrences (in other words, the enclosed phrase is optional). The ``*`` -and ``+`` operators bind as tightly as possible; parentheses are used for -grouping. Literal strings are enclosed in quotes. White space is only -meaningful to separate tokens. Rules are normally contained on a single line; -rules with many alternatives may be formatted alternatively with each line after -the first beginning with a vertical bar. - -.. index:: lexical definitions, ASCII - -In lexical definitions (as the example above), two more conventions are used: -Two literal characters separated by three dots mean a choice of any single -character in the given (inclusive) range of ASCII characters. A phrase between -angular brackets (``<...>``) gives an informal description of the symbol -defined; e.g., this could be used to describe the notion of 'control character' -if needed. - -Even though the notation used is almost the same, there is a big difference -between the meaning of lexical and syntactic definitions: a lexical definition -operates on the individual characters of the input source, while a syntax -definition operates on the stream of tokens generated by the lexical analysis. -All uses of BNF in the next chapter ("Lexical Analysis") are lexical -definitions; uses in subsequent chapters are syntactic definitions. - +The descriptions of lexical analysis and syntax use a grammar notation that +is a mixture of +`EBNF `_ +and `PEG `_. +For example: + +.. grammar-snippet:: + :group: notation + + name: `letter` (`letter` | `digit` | "_")* + letter: "a"..."z" | "A"..."Z" + digit: "0"..."9" + +In this example, the first line says that a ``name`` is a ``letter`` followed +by a sequence of zero or more ``letter``\ s, ``digit``\ s, and underscores. +A ``letter`` in turn is any of the single characters ``'a'`` through +``'z'`` and ``A`` through ``Z``; a ``digit`` is a single character from ``0`` +to ``9``. + +Each rule begins with a name (which identifies the rule that's being defined) +followed by a colon, ``:``. +The definition to the right of the colon uses the following syntax elements: + +* ``name``: A name refers to another rule. + Where possible, it is a link to the rule's definition. + + * ``TOKEN``: An uppercase name refers to a :term:`token`. + For the purposes of grammar definitions, tokens are the same as rules. + +* ``"text"``, ``'text'``: Text in single or double quotes must match literally + (without the quotes). The type of quote is chosen according to the meaning + of ``text``: + + * ``'if'``: A name in single quotes denotes a :ref:`keyword `. + * ``"case"``: A name in double quotes denotes a + :ref:`soft-keyword `. + * ``'@'``: A non-letter symbol in single quotes denotes an + :py:data:`~token.OP` token, that is, a :ref:`delimiter ` or + :ref:`operator `. + +* ``e1 e2``: Items separated only by whitespace denote a sequence. + Here, ``e1`` must be followed by ``e2``. +* ``e1 | e2``: A vertical bar is used to separate alternatives. + It denotes PEG's "ordered choice": if ``e1`` matches, ``e2`` is + not considered. + In traditional PEG grammars, this is written as a slash, ``/``, rather than + a vertical bar. + See :pep:`617` for more background and details. +* ``e*``: A star means zero or more repetitions of the preceding item. +* ``e+``: Likewise, a plus means one or more repetitions. +* ``[e]``: A phrase enclosed in square brackets means zero or + one occurrences. In other words, the enclosed phrase is optional. +* ``e?``: A question mark has exactly the same meaning as square brackets: + the preceding item is optional. +* ``(e)``: Parentheses are used for grouping. +* ``"a"..."z"``: Two literal characters separated by three dots mean a choice + of any single character in the given (inclusive) range of ASCII characters. + This notation is only used in + :ref:`lexical definitions `. +* ``<...>``: A phrase between angular brackets gives an informal description + of the matched symbol (for example, ````), + or an abbreviation that is defined in nearby text (for example, ````). + This notation is only used in + :ref:`lexical definitions `. + +The unary operators (``*``, ``+``, ``?``) bind as tightly as possible; +the vertical bar (``|``) binds most loosely. + +White space is only meaningful to separate tokens. + +Rules are normally contained on a single line, but rules that are too long +may be wrapped: + +.. grammar-snippet:: + :group: notation + + literal: stringliteral | bytesliteral + | integer | floatnumber | imagnumber + +Alternatively, rules may be formatted with the first line ending at the colon, +and each alternative beginning with a vertical bar on a new line. +For example: + + +.. grammar-snippet:: + :group: notation-alt + + literal: + | stringliteral + | bytesliteral + | integer + | floatnumber + | imagnumber + +This does *not* mean that there is an empty first alternative. + +.. index:: lexical definitions + +.. _notation-lexical-vs-syntactic: + +Lexical and Syntactic definitions +--------------------------------- + +There is some difference between *lexical* and *syntactic* analysis: +the :term:`lexical analyzer` operates on the individual characters of the +input source, while the *parser* (syntactic analyzer) operates on the stream +of :term:`tokens ` generated by the lexical analysis. +However, in some cases the exact boundary between the two phases is a +CPython implementation detail. + +The practical difference between the two is that in *lexical* definitions, +all whitespace is significant. +The lexical analyzer :ref:`discards ` all whitespace that is not +converted to tokens like :data:`token.INDENT` or :data:`~token.NEWLINE`. +*Syntactic* definitions then use these tokens, rather than source characters. + +This documentation uses the same BNF grammar for both styles of definitions. +All uses of BNF in the next chapter (:ref:`lexical`) are lexical definitions; +uses in subsequent chapters are syntactic definitions. From cc8e6d27031dcf2582bd287b14ab9b6e200d5e92 Mon Sep 17 00:00:00 2001 From: Chris Eibl <138194463+chris-eibl@users.noreply.github.com> Date: Mon, 9 Jun 2025 18:38:41 +0200 Subject: [PATCH 525/989] GH-131296: Suppress clang-cl warnings in socketmodule.c (GH-131821) --- Modules/socketmodule.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 92c9aa8b510dca..85c72779bac607 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -716,12 +716,6 @@ select_error(void) # define SOCK_INPROGRESS_ERR EINPROGRESS #endif -#ifdef _MSC_VER -# define SUPPRESS_DEPRECATED_CALL __pragma(warning(suppress: 4996)) -#else -# define SUPPRESS_DEPRECATED_CALL -#endif - /* Convenience function to raise an error according to errno and return a NULL pointer from a function. */ @@ -3366,7 +3360,7 @@ sock_setsockopt(PyObject *self, PyObject *args) &level, &optname, &flag)) { #ifdef MS_WINDOWS if (optname == SIO_TCP_SET_ACK_FREQUENCY) { - int dummy; + DWORD dummy; res = WSAIoctl(get_sock_fd(s), SIO_TCP_SET_ACK_FREQUENCY, &flag, sizeof(flag), NULL, 0, &dummy, NULL, NULL); if (res >= 0) { @@ -6195,8 +6189,10 @@ socket_gethostbyname_ex(PyObject *self, PyObject *args) #ifdef USE_GETHOSTBYNAME_LOCK PyThread_acquire_lock(netdb_lock, 1); #endif - SUPPRESS_DEPRECATED_CALL + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS h = gethostbyname(name); + _Py_COMP_DIAG_POP #endif /* HAVE_GETHOSTBYNAME_R */ Py_END_ALLOW_THREADS /* Some C libraries would require addr.__ss_family instead of @@ -6300,8 +6296,10 @@ socket_gethostbyaddr(PyObject *self, PyObject *args) #ifdef USE_GETHOSTBYNAME_LOCK PyThread_acquire_lock(netdb_lock, 1); #endif - SUPPRESS_DEPRECATED_CALL + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS h = gethostbyaddr(ap, al, af); + _Py_COMP_DIAG_POP #endif /* HAVE_GETHOSTBYNAME_R */ Py_END_ALLOW_THREADS ret = gethost_common(state, h, SAS2SA(&addr), sizeof(addr), af); @@ -6718,8 +6716,10 @@ _socket_inet_aton_impl(PyObject *module, const char *ip_addr) packed_addr = INADDR_BROADCAST; } else { - SUPPRESS_DEPRECATED_CALL + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS packed_addr = inet_addr(ip_addr); + _Py_COMP_DIAG_POP if (packed_addr == INADDR_NONE) { /* invalid address */ PyErr_SetString(PyExc_OSError, @@ -6762,8 +6762,10 @@ _socket_inet_ntoa_impl(PyObject *module, Py_buffer *packed_ip) memcpy(&packed_addr, packed_ip->buf, packed_ip->len); PyBuffer_Release(packed_ip); - SUPPRESS_DEPRECATED_CALL + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS return PyUnicode_FromString(inet_ntoa(packed_addr)); + _Py_COMP_DIAG_POP } #endif // HAVE_INET_NTOA From a58026a5e3da9ca2d09ef51aa90fe217f9a975ec Mon Sep 17 00:00:00 2001 From: Alper Date: Mon, 9 Jun 2025 10:57:29 -0700 Subject: [PATCH 526/989] gh-116738: Make _heapq module thread-safe (#135036) Use critical sections to make heapq methods that update the heap thread-safe when the GIL is disabled. --------- Co-authored-by: mpage --- Lib/test/test_free_threading/test_heapq.py | 240 ++++++++++++++++++ ...-06-02-13-57-40.gh-issue-116738.ycJsL8.rst | 1 + Modules/_heapqmodule.c | 54 +++- Modules/clinic/_heapqmodule.c.h | 23 +- 4 files changed, 303 insertions(+), 15 deletions(-) create mode 100644 Lib/test/test_free_threading/test_heapq.py create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-13-57-40.gh-issue-116738.ycJsL8.rst diff --git a/Lib/test/test_free_threading/test_heapq.py b/Lib/test/test_free_threading/test_heapq.py new file mode 100644 index 00000000000000..f75fb264c8ac0f --- /dev/null +++ b/Lib/test/test_free_threading/test_heapq.py @@ -0,0 +1,240 @@ +import unittest + +import heapq + +from enum import Enum +from threading import Thread, Barrier +from random import shuffle, randint + +from test.support import threading_helper +from test import test_heapq + + +NTHREADS = 10 +OBJECT_COUNT = 5_000 + + +class Heap(Enum): + MIN = 1 + MAX = 2 + + +@threading_helper.requires_working_threading() +class TestHeapq(unittest.TestCase): + def setUp(self): + self.test_heapq = test_heapq.TestHeapPython() + + def test_racing_heapify(self): + heap = list(range(OBJECT_COUNT)) + shuffle(heap) + + self.run_concurrently( + worker_func=heapq.heapify, args=(heap,), nthreads=NTHREADS + ) + self.test_heapq.check_invariant(heap) + + def test_racing_heappush(self): + heap = [] + + def heappush_func(heap): + for item in reversed(range(OBJECT_COUNT)): + heapq.heappush(heap, item) + + self.run_concurrently( + worker_func=heappush_func, args=(heap,), nthreads=NTHREADS + ) + self.test_heapq.check_invariant(heap) + + def test_racing_heappop(self): + heap = self.create_heap(OBJECT_COUNT, Heap.MIN) + + # Each thread pops (OBJECT_COUNT / NTHREADS) items + self.assertEqual(OBJECT_COUNT % NTHREADS, 0) + per_thread_pop_count = OBJECT_COUNT // NTHREADS + + def heappop_func(heap, pop_count): + local_list = [] + for _ in range(pop_count): + item = heapq.heappop(heap) + local_list.append(item) + + # Each local list should be sorted + self.assertTrue(self.is_sorted_ascending(local_list)) + + self.run_concurrently( + worker_func=heappop_func, + args=(heap, per_thread_pop_count), + nthreads=NTHREADS, + ) + self.assertEqual(len(heap), 0) + + def test_racing_heappushpop(self): + heap = self.create_heap(OBJECT_COUNT, Heap.MIN) + pushpop_items = self.create_random_list(-5_000, 10_000, OBJECT_COUNT) + + def heappushpop_func(heap, pushpop_items): + for item in pushpop_items: + popped_item = heapq.heappushpop(heap, item) + self.assertTrue(popped_item <= item) + + self.run_concurrently( + worker_func=heappushpop_func, + args=(heap, pushpop_items), + nthreads=NTHREADS, + ) + self.assertEqual(len(heap), OBJECT_COUNT) + self.test_heapq.check_invariant(heap) + + def test_racing_heapreplace(self): + heap = self.create_heap(OBJECT_COUNT, Heap.MIN) + replace_items = self.create_random_list(-5_000, 10_000, OBJECT_COUNT) + + def heapreplace_func(heap, replace_items): + for item in replace_items: + heapq.heapreplace(heap, item) + + self.run_concurrently( + worker_func=heapreplace_func, + args=(heap, replace_items), + nthreads=NTHREADS, + ) + self.assertEqual(len(heap), OBJECT_COUNT) + self.test_heapq.check_invariant(heap) + + def test_racing_heapify_max(self): + max_heap = list(range(OBJECT_COUNT)) + shuffle(max_heap) + + self.run_concurrently( + worker_func=heapq.heapify_max, args=(max_heap,), nthreads=NTHREADS + ) + self.test_heapq.check_max_invariant(max_heap) + + def test_racing_heappush_max(self): + max_heap = [] + + def heappush_max_func(max_heap): + for item in range(OBJECT_COUNT): + heapq.heappush_max(max_heap, item) + + self.run_concurrently( + worker_func=heappush_max_func, args=(max_heap,), nthreads=NTHREADS + ) + self.test_heapq.check_max_invariant(max_heap) + + def test_racing_heappop_max(self): + max_heap = self.create_heap(OBJECT_COUNT, Heap.MAX) + + # Each thread pops (OBJECT_COUNT / NTHREADS) items + self.assertEqual(OBJECT_COUNT % NTHREADS, 0) + per_thread_pop_count = OBJECT_COUNT // NTHREADS + + def heappop_max_func(max_heap, pop_count): + local_list = [] + for _ in range(pop_count): + item = heapq.heappop_max(max_heap) + local_list.append(item) + + # Each local list should be sorted + self.assertTrue(self.is_sorted_descending(local_list)) + + self.run_concurrently( + worker_func=heappop_max_func, + args=(max_heap, per_thread_pop_count), + nthreads=NTHREADS, + ) + self.assertEqual(len(max_heap), 0) + + def test_racing_heappushpop_max(self): + max_heap = self.create_heap(OBJECT_COUNT, Heap.MAX) + pushpop_items = self.create_random_list(-5_000, 10_000, OBJECT_COUNT) + + def heappushpop_max_func(max_heap, pushpop_items): + for item in pushpop_items: + popped_item = heapq.heappushpop_max(max_heap, item) + self.assertTrue(popped_item >= item) + + self.run_concurrently( + worker_func=heappushpop_max_func, + args=(max_heap, pushpop_items), + nthreads=NTHREADS, + ) + self.assertEqual(len(max_heap), OBJECT_COUNT) + self.test_heapq.check_max_invariant(max_heap) + + def test_racing_heapreplace_max(self): + max_heap = self.create_heap(OBJECT_COUNT, Heap.MAX) + replace_items = self.create_random_list(-5_000, 10_000, OBJECT_COUNT) + + def heapreplace_max_func(max_heap, replace_items): + for item in replace_items: + heapq.heapreplace_max(max_heap, item) + + self.run_concurrently( + worker_func=heapreplace_max_func, + args=(max_heap, replace_items), + nthreads=NTHREADS, + ) + self.assertEqual(len(max_heap), OBJECT_COUNT) + self.test_heapq.check_max_invariant(max_heap) + + @staticmethod + def is_sorted_ascending(lst): + """ + Check if the list is sorted in ascending order (non-decreasing). + """ + return all(lst[i - 1] <= lst[i] for i in range(1, len(lst))) + + @staticmethod + def is_sorted_descending(lst): + """ + Check if the list is sorted in descending order (non-increasing). + """ + return all(lst[i - 1] >= lst[i] for i in range(1, len(lst))) + + @staticmethod + def create_heap(size, heap_kind): + """ + Create a min/max heap where elements are in the range (0, size - 1) and + shuffled before heapify. + """ + heap = list(range(OBJECT_COUNT)) + shuffle(heap) + if heap_kind == Heap.MIN: + heapq.heapify(heap) + else: + heapq.heapify_max(heap) + + return heap + + @staticmethod + def create_random_list(a, b, size): + """ + Create a list of random numbers between a and b (inclusive). + """ + return [randint(-a, b) for _ in range(size)] + + def run_concurrently(self, worker_func, args, nthreads): + """ + Run the worker function concurrently in multiple threads. + """ + barrier = Barrier(nthreads) + + def wrapper_func(*args): + # Wait for all threads to reach this point before proceeding. + barrier.wait() + worker_func(*args) + + with threading_helper.catch_threading_exception() as cm: + workers = ( + Thread(target=wrapper_func, args=args) for _ in range(nthreads) + ) + with threading_helper.start_threads(workers): + pass + + # Worker threads should not raise any exceptions + self.assertIsNone(cm.exc_value) + + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-13-57-40.gh-issue-116738.ycJsL8.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-13-57-40.gh-issue-116738.ycJsL8.rst new file mode 100644 index 00000000000000..506eefdb21aa9a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-13-57-40.gh-issue-116738.ycJsL8.rst @@ -0,0 +1 @@ +Make methods in :mod:`heapq` thread-safe on the :term:`free threaded ` build. diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 095866eec7d75a..7784cdcd9ffa24 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -11,7 +11,7 @@ annotated by François Pinard, and converted to C by Raymond Hettinger. #endif #include "Python.h" -#include "pycore_list.h" // _PyList_ITEMS() +#include "pycore_list.h" // _PyList_ITEMS(), _PyList_AppendTakeRef() #include "clinic/_heapqmodule.c.h" @@ -117,6 +117,7 @@ siftup(PyListObject *heap, Py_ssize_t pos) } /*[clinic input] +@critical_section heap _heapq.heappush heap: object(subclass_of='&PyList_Type') @@ -128,13 +129,22 @@ Push item onto heap, maintaining the heap invariant. static PyObject * _heapq_heappush_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=912c094f47663935 input=7c69611f3698aceb]*/ +/*[clinic end generated code: output=912c094f47663935 input=f7a4f03ef8d52e67]*/ { - if (PyList_Append(heap, item)) + if (item == NULL) { + PyErr_BadInternalCall(); return NULL; + } + + // In a free-threaded build, the heap is locked at this point. + // Therefore, calling _PyList_AppendTakeRef() is safe and no overhead. + if (_PyList_AppendTakeRef((PyListObject *)heap, Py_NewRef(item))) { + return NULL; + } - if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) + if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) { return NULL; + } Py_RETURN_NONE; } @@ -171,6 +181,7 @@ heappop_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) } /*[clinic input] +@critical_section heap _heapq.heappop heap: object(subclass_of='&PyList_Type') @@ -181,7 +192,7 @@ Pop the smallest item off the heap, maintaining the heap invariant. static PyObject * _heapq_heappop_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=96dfe82d37d9af76 input=91487987a583c856]*/ +/*[clinic end generated code: output=96dfe82d37d9af76 input=ed396461b153dd51]*/ { return heappop_internal(heap, siftup); } @@ -207,6 +218,7 @@ heapreplace_internal(PyObject *heap, PyObject *item, int siftup_func(PyListObjec /*[clinic input] +@critical_section heap _heapq.heapreplace heap: object(subclass_of='&PyList_Type') @@ -226,12 +238,13 @@ this routine unless written as part of a conditional replacement: static PyObject * _heapq_heapreplace_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=82ea55be8fbe24b4 input=719202ac02ba10c8]*/ +/*[clinic end generated code: output=82ea55be8fbe24b4 input=9be1678b817ef1a9]*/ { return heapreplace_internal(heap, item, siftup); } /*[clinic input] +@critical_section heap _heapq.heappushpop heap: object(subclass_of='&PyList_Type') @@ -246,7 +259,7 @@ a separate call to heappop(). static PyObject * _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=67231dc98ed5774f input=5dc701f1eb4a4aa7]*/ +/*[clinic end generated code: output=67231dc98ed5774f input=db05c81b1dd92c44]*/ { PyObject *returnitem; int cmp; @@ -371,6 +384,7 @@ heapify_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) } /*[clinic input] +@critical_section heap _heapq.heapify heap: object(subclass_of='&PyList_Type') @@ -381,7 +395,7 @@ Transform list into a heap, in-place, in O(len(heap)) time. static PyObject * _heapq_heapify_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=e63a636fcf83d6d0 input=53bb7a2166febb73]*/ +/*[clinic end generated code: output=e63a636fcf83d6d0 input=aaaaa028b9b6af08]*/ { return heapify_internal(heap, siftup); } @@ -481,6 +495,7 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) } /*[clinic input] +@critical_section heap _heapq.heappush_max heap: object(subclass_of='&PyList_Type') @@ -492,9 +507,16 @@ Push item onto max heap, maintaining the heap invariant. static PyObject * _heapq_heappush_max_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=c869d5f9deb08277 input=4743d7db137b6e2b]*/ +/*[clinic end generated code: output=c869d5f9deb08277 input=c437e3d1ff8dcb70]*/ { - if (PyList_Append(heap, item)) { + if (item == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + // In a free-threaded build, the heap is locked at this point. + // Therefore, calling _PyList_AppendTakeRef() is safe and no overhead. + if (_PyList_AppendTakeRef((PyListObject *)heap, Py_NewRef(item))) { return NULL; } @@ -506,6 +528,7 @@ _heapq_heappush_max_impl(PyObject *module, PyObject *heap, PyObject *item) } /*[clinic input] +@critical_section heap _heapq.heappop_max heap: object(subclass_of='&PyList_Type') @@ -516,12 +539,13 @@ Maxheap variant of heappop. static PyObject * _heapq_heappop_max_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=2f051195ab404b77 input=e62b14016a5a26de]*/ +/*[clinic end generated code: output=2f051195ab404b77 input=5d70c997798aec64]*/ { return heappop_internal(heap, siftup_max); } /*[clinic input] +@critical_section heap _heapq.heapreplace_max heap: object(subclass_of='&PyList_Type') @@ -533,12 +557,13 @@ Maxheap variant of heapreplace. static PyObject * _heapq_heapreplace_max_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=8770778b5a9cbe9b input=21a3d28d757c881c]*/ +/*[clinic end generated code: output=8770778b5a9cbe9b input=fe70175356e4a649]*/ { return heapreplace_internal(heap, item, siftup_max); } /*[clinic input] +@critical_section heap _heapq.heapify_max heap: object(subclass_of='&PyList_Type') @@ -549,12 +574,13 @@ Maxheap variant of heapify. static PyObject * _heapq_heapify_max_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=8401af3856529807 input=edda4255728c431e]*/ +/*[clinic end generated code: output=8401af3856529807 input=4eee63231e7d1573]*/ { return heapify_internal(heap, siftup_max); } /*[clinic input] +@critical_section heap _heapq.heappushpop_max heap: object(subclass_of='&PyList_Type') @@ -569,7 +595,7 @@ a separate call to heappop_max(). static PyObject * _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=ff0019f0941aca0d input=525a843013cbd6c0]*/ +/*[clinic end generated code: output=ff0019f0941aca0d input=24d0defa6fd6df4a]*/ { PyObject *returnitem; int cmp; diff --git a/Modules/clinic/_heapqmodule.c.h b/Modules/clinic/_heapqmodule.c.h index 81d108627265ab..b43155b6c24e3c 100644 --- a/Modules/clinic/_heapqmodule.c.h +++ b/Modules/clinic/_heapqmodule.c.h @@ -2,6 +2,7 @@ preserve [clinic start generated code]*/ +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_heapq_heappush__doc__, @@ -32,7 +33,9 @@ _heapq_heappush(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappush_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -61,7 +64,9 @@ _heapq_heappop(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappop_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -103,7 +108,9 @@ _heapq_heapreplace(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapreplace_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -140,7 +147,9 @@ _heapq_heappushpop(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappushpop_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -169,7 +178,9 @@ _heapq_heapify(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapify_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -203,7 +214,9 @@ _heapq_heappush_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappush_max_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -232,7 +245,9 @@ _heapq_heappop_max(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappop_max_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -266,7 +281,9 @@ _heapq_heapreplace_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapreplace_max_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -295,7 +312,9 @@ _heapq_heapify_max(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapify_max_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -332,9 +351,11 @@ _heapq_heappushpop_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappushpop_max_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; } -/*[clinic end generated code: output=f55d8595ce150c76 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=e83d50002c29a96d input=a9049054013a1b77]*/ From 2e1ad6eb26871a379e5d3aa626d6fc93eba72a86 Mon Sep 17 00:00:00 2001 From: Zanie Blue Date: Mon, 9 Jun 2025 19:03:55 -0500 Subject: [PATCH 527/989] Fix definition of `_Py_RemoteDebug_` symbols for static linking (#135146) --- Python/remote_debug.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Python/remote_debug.h b/Python/remote_debug.h index 0a817bdbd488e0..23fd3815e9d4e2 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -13,6 +13,16 @@ If you need to add a new function ensure that is declared 'static'. extern "C" { #endif +#ifdef __clang__ + #define UNUSED __attribute__((unused)) +#elif defined(__GNUC__) + #define UNUSED __attribute__((unused)) +#elif defined(_MSC_VER) + #define UNUSED __pragma(warning(suppress: 4505)) +#else + #define UNUSED +#endif + #if !defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) # error "this header requires Py_BUILD_CORE or Py_BUILD_CORE_MODULE define" #endif @@ -133,7 +143,7 @@ _Py_RemoteDebug_FreePageCache(proc_handle_t *handle) } } -void +UNUSED static void _Py_RemoteDebug_ClearCache(proc_handle_t *handle) { for (int i = 0; i < MAX_PAGES; i++) { @@ -1059,7 +1069,7 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address #endif } -int +UNUSED static int _Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle, uintptr_t addr, size_t size, From 49fc1f215aeb0f71445505191ccb65517b58a5aa Mon Sep 17 00:00:00 2001 From: Chris Eibl <138194463+chris-eibl@users.noreply.github.com> Date: Tue, 10 Jun 2025 02:05:06 +0200 Subject: [PATCH 528/989] Fix warnings `set but not used [-Wunused-but-set-variable]` in remote_debug.h (#135290) --- Python/remote_debug.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Python/remote_debug.h b/Python/remote_debug.h index 23fd3815e9d4e2..8f9b6cd4c4960f 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -684,8 +684,6 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c } uintptr_t retval = 0; - int lines_processed = 0; - int matches_found = 0; while (fgets(line + linelen, linesz - linelen, maps_file) != NULL) { linelen = strlen(line); @@ -710,7 +708,6 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c line[linelen - 1] = '\0'; // and prepare to read the next line into the start of the buffer. linelen = 0; - lines_processed++; unsigned long start = 0; unsigned long path_pos = 0; @@ -731,7 +728,6 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c } if (strstr(filename, substr)) { - matches_found++; retval = search_elf_file_for_section(handle, secname, start, path); if (retval) { break; @@ -850,15 +846,10 @@ search_windows_map_for_section(proc_handle_t* handle, const char* secname, const MODULEENTRY32W moduleEntry; moduleEntry.dwSize = sizeof(moduleEntry); void* runtime_addr = NULL; - int modules_examined = 0; - int matches_found = 0; for (BOOL hasModule = Module32FirstW(hProcSnap, &moduleEntry); hasModule; hasModule = Module32NextW(hProcSnap, &moduleEntry)) { - modules_examined++; - // Look for either python executable or DLL if (wcsstr(moduleEntry.szModule, substr)) { - matches_found++; runtime_addr = analyze_pe(moduleEntry.szExePath, moduleEntry.modBaseAddr, secname); if (runtime_addr != NULL) { break; From 754e7c9b5187fcad22acf7555479603f173a4a09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Jun 2025 02:08:30 +0200 Subject: [PATCH 529/989] gh-133157: remove usage of `_Py_NO_SANITIZE_UNDEFINED` in `Parser/pegen.c` (#134048) --- Parser/parser.c | 106 +++++++++++------------ Parser/pegen.c | 66 ++++++-------- Parser/pegen.h | 7 +- Tools/peg_generator/pegen/c_generator.py | 58 ++++++++----- 4 files changed, 121 insertions(+), 116 deletions(-) diff --git a/Parser/parser.c b/Parser/parser.c index 82311b4f40eebf..ee0aeb4e1875cb 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -1679,7 +1679,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('import' | 'from') import_stmt")); stmt_ty import_stmt_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_5_rule, p) + _PyPegen_lookahead(1, _tmp_5_rule, p) && (import_stmt_var = import_stmt_rule(p)) // import_stmt ) @@ -1917,7 +1917,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('def' | '@' | 'async') function_def")); stmt_ty function_def_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_6_rule, p) + _PyPegen_lookahead(1, _tmp_6_rule, p) && (function_def_var = function_def_rule(p)) // function_def ) @@ -1959,7 +1959,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('class' | '@') class_def")); stmt_ty class_def_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_7_rule, p) + _PyPegen_lookahead(1, _tmp_7_rule, p) && (class_def_var = class_def_rule(p)) // class_def ) @@ -1980,7 +1980,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('with' | 'async') with_stmt")); stmt_ty with_stmt_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_8_rule, p) + _PyPegen_lookahead(1, _tmp_8_rule, p) && (with_stmt_var = with_stmt_rule(p)) // with_stmt ) @@ -2001,7 +2001,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('for' | 'async') for_stmt")); stmt_ty for_stmt_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_9_rule, p) + _PyPegen_lookahead(1, _tmp_9_rule, p) && (for_stmt_var = for_stmt_rule(p)) // for_stmt ) @@ -3234,7 +3234,7 @@ del_stmt_rule(Parser *p) && (a = del_targets_rule(p)) // del_targets && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_16_rule, p) + _PyPegen_lookahead(1, _tmp_16_rule, p) ) { D(fprintf(stderr, "%*c+ del_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'del' del_targets &(';' | NEWLINE)")); @@ -6877,7 +6877,7 @@ with_item_rule(Parser *p) && (t = star_target_rule(p)) // star_target && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_36_rule, p) + _PyPegen_lookahead(1, _tmp_36_rule, p) ) { D(fprintf(stderr, "%*c+ with_item[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression 'as' star_target &(',' | ')' | ':')")); @@ -8466,7 +8466,7 @@ literal_pattern_rule(Parser *p) if ( (value = signed_number_rule(p)) // signed_number && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_42_rule, p) + _PyPegen_lookahead(0, _tmp_42_rule, p) ) { D(fprintf(stderr, "%*c+ literal_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "signed_number !('+' | '-')")); @@ -8700,7 +8700,7 @@ literal_expr_rule(Parser *p) if ( (signed_number_var = signed_number_rule(p)) // signed_number && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_42_rule, p) + _PyPegen_lookahead(0, _tmp_42_rule, p) ) { D(fprintf(stderr, "%*c+ literal_expr[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "signed_number !('+' | '-')")); @@ -8738,7 +8738,7 @@ literal_expr_rule(Parser *p) D(fprintf(stderr, "%*c> literal_expr[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&(STRING | FSTRING_START | TSTRING_START) strings")); expr_ty strings_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_43_rule, p) + _PyPegen_lookahead(1, _tmp_43_rule, p) && (strings_var = strings_rule(p)) // strings ) @@ -9302,7 +9302,7 @@ pattern_capture_target_rule(Parser *p) && (name = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_44_rule, p) + _PyPegen_lookahead(0, _tmp_44_rule, p) ) { D(fprintf(stderr, "%*c+ pattern_capture_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!\"_\" NAME !('.' | '(' | '=')")); @@ -9417,7 +9417,7 @@ value_pattern_rule(Parser *p) if ( (attr = attr_rule(p)) // attr && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_44_rule, p) + _PyPegen_lookahead(0, _tmp_44_rule, p) ) { D(fprintf(stderr, "%*c+ value_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "attr !('.' | '(' | '=')")); @@ -15070,7 +15070,7 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&(STRING | FSTRING_START | TSTRING_START) strings")); expr_ty strings_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_43_rule, p) + _PyPegen_lookahead(1, _tmp_43_rule, p) && (strings_var = strings_rule(p)) // strings ) @@ -19099,7 +19099,7 @@ target_with_star_atom_rule(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ target_with_star_atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME !t_lookahead")); @@ -19143,7 +19143,7 @@ target_with_star_atom_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ target_with_star_atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' !t_lookahead")); @@ -19490,7 +19490,7 @@ single_subscript_attribute_target_rule(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ single_subscript_attribute_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME !t_lookahead")); @@ -19534,7 +19534,7 @@ single_subscript_attribute_target_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ single_subscript_attribute_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' !t_lookahead")); @@ -19644,7 +19644,7 @@ t_primary_raw(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME &t_lookahead")); @@ -19688,7 +19688,7 @@ t_primary_raw(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' &t_lookahead")); @@ -19726,7 +19726,7 @@ t_primary_raw(Parser *p) && (b = genexp_rule(p)) // genexp && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary genexp &t_lookahead")); @@ -19770,7 +19770,7 @@ t_primary_raw(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 8)) // token=')' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '(' arguments? ')' &t_lookahead")); @@ -19805,7 +19805,7 @@ t_primary_raw(Parser *p) if ( (a = atom_rule(p)) // atom && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "atom &t_lookahead")); @@ -19995,7 +19995,7 @@ del_target_rule(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ del_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME !t_lookahead")); @@ -20039,7 +20039,7 @@ del_target_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ del_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' !t_lookahead")); @@ -20527,7 +20527,7 @@ func_type_comment_rule(Parser *p) && (t = _PyPegen_expect_token(p, TYPE_COMMENT)) // token='TYPE_COMMENT' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_104_rule, p) + _PyPegen_lookahead(1, _tmp_104_rule, p) ) { D(fprintf(stderr, "%*c+ func_type_comment[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NEWLINE TYPE_COMMENT &(NEWLINE INDENT)")); @@ -20721,7 +20721,7 @@ invalid_arguments_rule(Parser *p) && (b = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_110_rule, p) + _PyPegen_lookahead(1, _tmp_110_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "[(args ',')] NAME '=' &(',' | ')')")); @@ -20919,7 +20919,7 @@ invalid_kwarg_rule(Parser *p) expr_ty a; Token * b; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_112_rule, p) + _PyPegen_lookahead(0, _tmp_112_rule, p) && (a = expression_rule(p)) // expression && @@ -21294,7 +21294,7 @@ invalid_expression_rule(Parser *p) expr_ty a; expr_ty b; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_114_rule, p) + _PyPegen_lookahead(0, _tmp_114_rule, p) && (a = disjunction_rule(p)) // disjunction && @@ -21330,7 +21330,7 @@ invalid_expression_rule(Parser *p) && (b = disjunction_rule(p)) // disjunction && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_115_rule, p) + _PyPegen_lookahead(0, _tmp_115_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "disjunction 'if' disjunction !('else' | ':')")); @@ -21365,7 +21365,7 @@ invalid_expression_rule(Parser *p) && (_keyword_1 = _PyPegen_expect_token(p, 690)) // token='else' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) expression_rule, p) + _PyPegen_lookahead_for_expr(0, expression_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "disjunction 'if' disjunction 'else' !expression")); @@ -21555,7 +21555,7 @@ invalid_named_expression_rule(Parser *p) && (b = bitwise_or_rule(p)) // bitwise_or && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_117_rule, p) + _PyPegen_lookahead(0, _tmp_117_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_named_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME '=' bitwise_or !('=' | ':=')")); @@ -21581,7 +21581,7 @@ invalid_named_expression_rule(Parser *p) Token * b; expr_ty bitwise_or_var; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_118_rule, p) + _PyPegen_lookahead(0, _tmp_118_rule, p) && (a = bitwise_or_rule(p)) // bitwise_or && @@ -21589,7 +21589,7 @@ invalid_named_expression_rule(Parser *p) && (bitwise_or_var = bitwise_or_rule(p)) // bitwise_or && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_117_rule, p) + _PyPegen_lookahead(0, _tmp_117_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_named_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!(list | tuple | genexp | 'True' | 'None' | 'False') bitwise_or '=' bitwise_or !('=' | ':=')")); @@ -22499,7 +22499,7 @@ invalid_default_rule(Parser *p) if ( (a = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_125_rule, p) + _PyPegen_lookahead(1, _tmp_125_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'=' &(')' | ',')")); @@ -23448,7 +23448,7 @@ invalid_with_item_rule(Parser *p) && (a = expression_rule(p)) // expression && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_36_rule, p) + _PyPegen_lookahead(1, _tmp_36_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_with_item[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression 'as' expression &(',' | ')' | ':')")); @@ -23760,7 +23760,7 @@ invalid_dotted_as_name_rule(Parser *p) && (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_138_rule, p) + _PyPegen_lookahead(0, _tmp_138_rule, p) && (a = expression_rule(p)) // expression ) @@ -23811,7 +23811,7 @@ invalid_import_from_as_name_rule(Parser *p) && (_keyword = _PyPegen_expect_token(p, 684)) // token='as' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_138_rule, p) + _PyPegen_lookahead(0, _tmp_138_rule, p) && (a = expression_rule(p)) // expression ) @@ -24181,7 +24181,7 @@ invalid_try_stmt_rule(Parser *p) && (block_var = block_rule(p)) // block && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_143_rule, p) + _PyPegen_lookahead(0, _tmp_143_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_try_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'try' ':' block !('except' | 'finally')")); @@ -25973,7 +25973,7 @@ invalid_double_starred_kvpairs_rule(Parser *p) && (a = _PyPegen_expect_token(p, 11)) // token=':' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_148_rule, p) + _PyPegen_lookahead(1, _tmp_148_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_double_starred_kvpairs[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ':' &('}' | ',')")); @@ -26083,7 +26083,7 @@ invalid_kvpair_rule(Parser *p) && (a = _PyPegen_expect_token(p, 11)) // token=':' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_148_rule, p) + _PyPegen_lookahead(1, _tmp_148_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_kvpair[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ':' &('}' | ',')")); @@ -26342,7 +26342,7 @@ invalid_fstring_replacement_field_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 25)) // token='{' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) annotated_rhs_rule, p) + _PyPegen_lookahead_for_expr(0, annotated_rhs_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' !annotated_rhs")); @@ -26371,7 +26371,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (annotated_rhs_var = annotated_rhs_rule(p)) // annotated_rhs && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_149_rule, p) + _PyPegen_lookahead(0, _tmp_149_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs !('=' | '!' | ':' | '}')")); @@ -26403,7 +26403,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_150_rule, p) + _PyPegen_lookahead(0, _tmp_150_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '=' !('!' | ':' | '}')")); @@ -26469,7 +26469,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(0, _tmp_152_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '='? ['!' NAME] !(':' | '}')")); @@ -26594,7 +26594,7 @@ invalid_fstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(1, _tmp_152_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' &(':' | '}')")); @@ -26620,7 +26620,7 @@ invalid_fstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) + _PyPegen_lookahead_for_expr(0, _PyPegen_name_token, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' !NAME")); @@ -26784,7 +26784,7 @@ invalid_tstring_replacement_field_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 25)) // token='{' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) annotated_rhs_rule, p) + _PyPegen_lookahead_for_expr(0, annotated_rhs_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' !annotated_rhs")); @@ -26813,7 +26813,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (annotated_rhs_var = annotated_rhs_rule(p)) // annotated_rhs && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_149_rule, p) + _PyPegen_lookahead(0, _tmp_149_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs !('=' | '!' | ':' | '}')")); @@ -26845,7 +26845,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_150_rule, p) + _PyPegen_lookahead(0, _tmp_150_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '=' !('!' | ':' | '}')")); @@ -26911,7 +26911,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(0, _tmp_152_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '='? ['!' NAME] !(':' | '}')")); @@ -27036,7 +27036,7 @@ invalid_tstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(1, _tmp_152_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' &(':' | '}')")); @@ -27062,7 +27062,7 @@ invalid_tstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) + _PyPegen_lookahead_for_expr(0, _PyPegen_name_token, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' !NAME")); @@ -37501,7 +37501,7 @@ _tmp_168_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_168[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "!STRING expression_without_invalid")); expr_ty expression_without_invalid_var; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _PyPegen_string_token, p) + _PyPegen_lookahead(0, _PyPegen_string_token, p) && (expression_without_invalid_var = expression_without_invalid_rule(p)) // expression_without_invalid ) diff --git a/Parser/pegen.c b/Parser/pegen.c index 81aad47010181c..0919ff417f1916 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -379,44 +379,34 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres) return 0; } -int -_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p) -{ - int mark = p->mark; - void *res = func(p); - p->mark = mark; - return (res != NULL) == positive; -} - -int -_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg) -{ - int mark = p->mark; - void *res = func(p, arg); - p->mark = mark; - return (res != NULL) == positive; -} - -int -_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg) -{ - int mark = p->mark; - void *res = func(p, arg); - p->mark = mark; - return (res != NULL) == positive; -} - -// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED to disable sanitizer checks on -// undefined behavior (UBsan) in this function, rather than changing 'func' -// callback API. -int _Py_NO_SANITIZE_UNDEFINED -_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p) -{ - int mark = p->mark; - void *res = func(p); - p->mark = mark; - return (res != NULL) == positive; -} +#define LOOKAHEAD1(NAME, RES_TYPE) \ + int \ + NAME (int positive, RES_TYPE (func)(Parser *), Parser *p) \ + { \ + int mark = p->mark; \ + void *res = func(p); \ + p->mark = mark; \ + return (res != NULL) == positive; \ + } + +LOOKAHEAD1(_PyPegen_lookahead, void *) +LOOKAHEAD1(_PyPegen_lookahead_for_expr, expr_ty) +LOOKAHEAD1(_PyPegen_lookahead_for_stmt, stmt_ty) +#undef LOOKAHEAD1 + +#define LOOKAHEAD2(NAME, RES_TYPE, T) \ + int \ + NAME (int positive, RES_TYPE (func)(Parser *, T), Parser *p, T arg) \ + { \ + int mark = p->mark; \ + void *res = func(p, arg); \ + p->mark = mark; \ + return (res != NULL) == positive; \ + } + +LOOKAHEAD2(_PyPegen_lookahead_with_int, Token *, int) +LOOKAHEAD2(_PyPegen_lookahead_with_string, expr_ty, const char *) +#undef LOOKAHEAD2 Token * _PyPegen_expect_token(Parser *p, int type) diff --git a/Parser/pegen.h b/Parser/pegen.h index 1862fd7407ec3c..804f931871aec8 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -145,10 +145,11 @@ int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node); int _PyPegen_update_memo(Parser *p, int mark, int type, void *node); int _PyPegen_is_memoized(Parser *p, int type, void *pres); -int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *); -int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int); -int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*); int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *); +int _PyPegen_lookahead_for_expr(int, expr_ty (func)(Parser *), Parser *); +int _PyPegen_lookahead_for_stmt(int, stmt_ty (func)(Parser *), Parser *); +int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int); +int _PyPegen_lookahead_with_string(int, expr_ty (func)(Parser *, const char*), Parser *, const char*); Token *_PyPegen_expect_token(Parser *p, int type); void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected); diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 09c5651f24a3bb..04f66eec1a0154 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -214,33 +214,47 @@ def visit_NamedItem(self, node: NamedItem) -> FunctionCall: call.assigned_variable_type = node.type return call + def assert_no_undefined_behavior( + self, call: FunctionCall, wrapper: str, expected_rtype: str | None, + ) -> None: + if call.return_type != expected_rtype: + raise RuntimeError( + f"{call.function} return type is incompatible with {wrapper}: " + f"expect: {expected_rtype}, actual: {call.return_type}" + ) + def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall: call = self.generate_call(node.node) - if call.nodetype == NodeTypes.NAME_TOKEN: - return FunctionCall( - function=f"_PyPegen_lookahead_with_name", - arguments=[positive, call.function, *call.arguments], - return_type="int", - ) + comment = None + if call.nodetype is NodeTypes.NAME_TOKEN: + function = "_PyPegen_lookahead_for_expr" + self.assert_no_undefined_behavior(call, function, "expr_ty") + elif call.nodetype is NodeTypes.STRING_TOKEN: + # _PyPegen_string_token() returns 'void *' instead of 'Token *'; + # in addition, the overall function call would return 'expr_ty'. + assert call.function == "_PyPegen_string_token" + function = "_PyPegen_lookahead" + self.assert_no_undefined_behavior(call, function, "expr_ty") elif call.nodetype == NodeTypes.SOFT_KEYWORD: - return FunctionCall( - function=f"_PyPegen_lookahead_with_string", - arguments=[positive, call.function, *call.arguments], - return_type="int", - ) + function = "_PyPegen_lookahead_with_string" + self.assert_no_undefined_behavior(call, function, "expr_ty") elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: - return FunctionCall( - function=f"_PyPegen_lookahead_with_int", - arguments=[positive, call.function, *call.arguments], - return_type="int", - comment=f"token={node.node}", - ) + function = "_PyPegen_lookahead_with_int" + self.assert_no_undefined_behavior(call, function, "Token *") + comment = f"token={node.node}" + elif call.return_type == "expr_ty": + function = "_PyPegen_lookahead_for_expr" + elif call.return_type == "stmt_ty": + function = "_PyPegen_lookahead_for_stmt" else: - return FunctionCall( - function=f"_PyPegen_lookahead", - arguments=[positive, f"(void *(*)(Parser *)) {call.function}", *call.arguments], - return_type="int", - ) + function = "_PyPegen_lookahead" + self.assert_no_undefined_behavior(call, function, None) + return FunctionCall( + function=function, + arguments=[positive, call.function, *call.arguments], + return_type="int", + comment=comment, + ) def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall: return self.lookahead_call_helper(node, 1) From c23eec29602eb090d6f3e854e41c22c1e2c6dfc2 Mon Sep 17 00:00:00 2001 From: Alexander Shadchin Date: Tue, 10 Jun 2025 13:35:37 +0300 Subject: [PATCH 530/989] Docs: fix docstring of `email.message.Message.add_header` (#134355) --- Lib/email/message.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/message.py b/Lib/email/message.py index 87fcab68868d46..41fcc2b9778798 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -564,7 +564,7 @@ def add_header(self, _name, _value, **_params): msg.add_header('content-disposition', 'attachment', filename='bud.gif') msg.add_header('content-disposition', 'attachment', - filename=('utf-8', '', Fußballer.ppt')) + filename=('utf-8', '', 'Fußballer.ppt')) msg.add_header('content-disposition', 'attachment', filename='Fußballer.ppt')) """ From ee7345d507884aaaa000e51858c64852ac4f6d02 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 10 Jun 2025 13:24:33 +0200 Subject: [PATCH 531/989] gh-118928: Amend sqlite3 execute*() deprecation notes (#135163) --- Doc/library/sqlite3.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index 2d0f9a740c685a..12761baf792a6b 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -1482,7 +1482,9 @@ Cursor objects :type parameters: :class:`dict` | :term:`sequence` :raises ProgrammingError: - If *sql* contains more than one SQL statement. + When *sql* contains more than one SQL statement. + When :ref:`named placeholders ` are used + and *parameters* is a sequence instead of a :class:`dict`. If :attr:`~Connection.autocommit` is :data:`LEGACY_TRANSACTION_CONTROL`, @@ -1491,13 +1493,11 @@ Cursor objects and there is no open transaction, a transaction is implicitly opened before executing *sql*. - .. deprecated-removed:: 3.12 3.14 + .. versionchanged:: 3.14 - :exc:`DeprecationWarning` is emitted if + :exc:`ProgrammingError` is emitted if :ref:`named placeholders ` are used and *parameters* is a sequence instead of a :class:`dict`. - Starting with Python 3.14, :exc:`ProgrammingError` will - be raised instead. Use :meth:`executescript` to execute multiple SQL statements. @@ -1519,8 +1519,10 @@ Cursor objects :type parameters: :term:`iterable` :raises ProgrammingError: - If *sql* contains more than one SQL statement, - or is not a DML statement. + When *sql* contains more than one SQL statement + or is not a DML statement, + When :ref:`named placeholders ` are used + and the items in *parameters* are sequences instead of :class:`dict`\s. Example: @@ -1544,14 +1546,12 @@ Cursor objects .. _RETURNING clauses: https://www.sqlite.org/lang_returning.html - .. deprecated-removed:: 3.12 3.14 + .. versionchanged:: 3.14 - :exc:`DeprecationWarning` is emitted if + :exc:`ProgrammingError` is emitted if :ref:`named placeholders ` are used and the items in *parameters* are sequences instead of :class:`dict`\s. - Starting with Python 3.14, :exc:`ProgrammingError` will - be raised instead. .. method:: executescript(sql_script, /) From 598aa7cc98bc1b39f10ec41decddd8dd88799fe1 Mon Sep 17 00:00:00 2001 From: Ajay Kamdar <140011370+ogbiggles@users.noreply.github.com> Date: Tue, 10 Jun 2025 07:28:31 -0400 Subject: [PATCH 532/989] gh-132969: Fix error/hang when shutdown(wait=False) and task exited abnormally (GH-133222) When shutdown is called with wait=False, the executor thread keeps running even after the ProcessPoolExecutor's state is reset. The executor then tries to replenish the worker processes pool resulting in an error and a potential hang when it comes across a worker that has died. Fixed the issue by having _adjust_process_count() return without doing anything if the ProcessPoolExecutor's state has been reset. Added unit tests to validate two scenarios: max_workers < num_tasks (exception) max_workers > num_tasks (exception + hang) --- Lib/concurrent/futures/process.py | 5 ++ .../test_concurrent_futures/test_shutdown.py | 58 +++++++++++++++++++ ...-04-30-19-32-18.gh-issue-132969.EagQ3G.rst | 7 +++ 3 files changed, 70 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-04-30-19-32-18.gh-issue-132969.EagQ3G.rst diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index 76b7b2abe836d8..a14650bf5fa47c 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -755,6 +755,11 @@ def _start_executor_manager_thread(self): self._executor_manager_thread_wakeup def _adjust_process_count(self): + # gh-132969: avoid error when state is reset and executor is still running, + # which will happen when shutdown(wait=False) is called. + if self._processes is None: + return + # if there's an idle process, we don't need to spawn a new one. if self._idle_worker_semaphore.acquire(blocking=False): return diff --git a/Lib/test/test_concurrent_futures/test_shutdown.py b/Lib/test/test_concurrent_futures/test_shutdown.py index 7a4065afd46fc8..99b315b47e2530 100644 --- a/Lib/test/test_concurrent_futures/test_shutdown.py +++ b/Lib/test/test_concurrent_futures/test_shutdown.py @@ -330,6 +330,64 @@ def test_shutdown_no_wait(self): # shutdown. assert all([r == abs(v) for r, v in zip(res, range(-5, 5))]) + @classmethod + def _failing_task_gh_132969(cls, n): + raise ValueError("failing task") + + @classmethod + def _good_task_gh_132969(cls, n): + time.sleep(0.1 * n) + return n + + def _run_test_issue_gh_132969(self, max_workers): + # max_workers=2 will repro exception + # max_workers=4 will repro exception and then hang + + # Repro conditions + # max_tasks_per_child=1 + # a task ends abnormally + # shutdown(wait=False) is called + start_method = self.get_context().get_start_method() + if (start_method == "fork" or + (start_method == "forkserver" and sys.platform.startswith("win"))): + self.skipTest(f"Skipping test for {start_method = }") + executor = futures.ProcessPoolExecutor( + max_workers=max_workers, + max_tasks_per_child=1, + mp_context=self.get_context()) + f1 = executor.submit(ProcessPoolShutdownTest._good_task_gh_132969, 1) + f2 = executor.submit(ProcessPoolShutdownTest._failing_task_gh_132969, 2) + f3 = executor.submit(ProcessPoolShutdownTest._good_task_gh_132969, 3) + result = 0 + try: + result += f1.result() + result += f2.result() + result += f3.result() + except ValueError: + # stop processing results upon first exception + pass + + # Ensure that the executor cleans up after called + # shutdown with wait=False + executor_manager_thread = executor._executor_manager_thread + executor.shutdown(wait=False) + time.sleep(0.2) + executor_manager_thread.join() + return result + + def test_shutdown_gh_132969_case_1(self): + # gh-132969: test that exception "object of type 'NoneType' has no len()" + # is not raised when shutdown(wait=False) is called. + result = self._run_test_issue_gh_132969(2) + self.assertEqual(result, 1) + + def test_shutdown_gh_132969_case_2(self): + # gh-132969: test that process does not hang and + # exception "object of type 'NoneType' has no len()" is not raised + # when shutdown(wait=False) is called. + result = self._run_test_issue_gh_132969(4) + self.assertEqual(result, 1) + create_executor_tests(globals(), ProcessPoolShutdownTest, executor_mixins=(ProcessPoolForkMixin, diff --git a/Misc/NEWS.d/next/Library/2025-04-30-19-32-18.gh-issue-132969.EagQ3G.rst b/Misc/NEWS.d/next/Library/2025-04-30-19-32-18.gh-issue-132969.EagQ3G.rst new file mode 100644 index 00000000000000..7364c425941233 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-30-19-32-18.gh-issue-132969.EagQ3G.rst @@ -0,0 +1,7 @@ +Prevent the :class:`~concurrent.futures.ProcessPoolExecutor` executor thread, +which remains running when :meth:`shutdown(wait=False) +`, from +attempting to adjust the pool's worker processes after the object state has already been reset during shutdown. +A combination of conditions, including a worker process having terminated abormally, +resulted in an exception and a potential hang when the still-running executor thread +attempted to replace dead workers within the pool. From ff2b5f40c2bf5c71255caac8a743c09ba0758c02 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 10 Jun 2025 14:19:03 +0100 Subject: [PATCH 533/989] gh-130077: Properly match full soft keywords in the parser (#135317) --- Lib/test/test_syntax.py | 7 +++++++ .../2025-06-09-23-57-37.gh-issue-130077.MHknDB.rst | 2 ++ Parser/pegen.c | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-09-23-57-37.gh-issue-130077.MHknDB.rst diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 13aaba405e3204..b09e524a756ca8 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -382,6 +382,13 @@ Traceback (most recent call last): SyntaxError: invalid syntax +# But prefixes of soft keywords should +# still raise specialized errors + +>>> (mat x) +Traceback (most recent call last): +SyntaxError: invalid syntax. Perhaps you forgot a comma? + From compiler_complex_args(): >>> def f(None=1): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-09-23-57-37.gh-issue-130077.MHknDB.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-09-23-57-37.gh-issue-130077.MHknDB.rst new file mode 100644 index 00000000000000..a7d02426b6fc13 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-09-23-57-37.gh-issue-130077.MHknDB.rst @@ -0,0 +1,2 @@ +Properly raise custom syntax errors when incorrect syntax containing names +that are prefixes of soft keywords is encountered. Patch by Pablo Galindo. diff --git a/Parser/pegen.c b/Parser/pegen.c index 0919ff417f1916..50641de27d37fd 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -610,7 +610,8 @@ expr_ty _PyPegen_soft_keyword_token(Parser *p) { Py_ssize_t size; PyBytes_AsStringAndSize(t->bytes, &the_token, &size); for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) { - if (strncmp(*keyword, the_token, (size_t)size) == 0) { + if (strlen(*keyword) == (size_t)size && + strncmp(*keyword, the_token, (size_t)size) == 0) { return _PyPegen_name_from_token(p, t); } } From 0f866cbfefd797b4dae25962457c5579bb90dde5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 10 Jun 2025 16:38:32 +0300 Subject: [PATCH 534/989] gh-133967: Do not normalize locale name 'C.UTF-8' to 'en_US.UTF-8' (#135347) --- Lib/locale.py | 5 ++++- Lib/test/test_locale.py | 4 ++++ .../Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst | 1 + Tools/i18n/makelocalealias.py | 3 +++ 4 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst diff --git a/Lib/locale.py b/Lib/locale.py index 2feb10e59c96a3..dfedc6386cb891 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -883,6 +883,10 @@ def getpreferredencoding(do_setlocale=True): # updated 'sr@latn' -> 'sr_CS.UTF-8@latin' to 'sr_RS.UTF-8@latin' # removed 'univ' # removed 'universal' +# +# SS 2025-06-10: +# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist +# on all platforms. locale_alias = { 'a3': 'az_AZ.KOI8-C', @@ -962,7 +966,6 @@ def getpreferredencoding(do_setlocale=True): 'c.ascii': 'C', 'c.en': 'C', 'c.iso88591': 'en_US.ISO8859-1', - 'c.utf8': 'en_US.UTF-8', 'c_c': 'C', 'c_c.c': 'C', 'ca': 'ca_ES.ISO8859-1', diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 455d2af37efdc8..55b502e52ca454 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -387,6 +387,10 @@ def test_c(self): self.check('c', 'C') self.check('posix', 'C') + def test_c_utf8(self): + self.check('c.utf8', 'C.UTF-8') + self.check('C.UTF-8', 'C.UTF-8') + def test_english(self): self.check('en', 'en_US.ISO8859-1') self.check('EN', 'en_US.ISO8859-1') diff --git a/Misc/NEWS.d/next/Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst b/Misc/NEWS.d/next/Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst new file mode 100644 index 00000000000000..1976981727e235 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst @@ -0,0 +1 @@ +Do not normalize :mod:`locale` name 'C.UTF-8' to 'en_US.UTF-8'. diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py index b407a8a643be7c..02af1caff7d499 100755 --- a/Tools/i18n/makelocalealias.py +++ b/Tools/i18n/makelocalealias.py @@ -140,6 +140,9 @@ def check(data): data = locale.locale_alias.copy() data.update(parse_glibc_supported(args.glibc_supported)) data.update(parse(args.locale_alias)) + # Hardcode 'c.utf8' -> 'C.UTF-8' because 'en_US.UTF-8' does not exist + # on all platforms. + data['c.utf8'] = 'C.UTF-8' while True: # Repeat optimization while the size is decreased. n = len(data) From 5ae669fc4e674968529cc32f7f31d14dddd76607 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 11 Jun 2025 04:51:08 +0300 Subject: [PATCH 535/989] gh-135326: Test support of __index__ in random.getrandbits() (#135356) --- Lib/test/test_random.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 54910cd8054a1f..31ebcb3b8b0b00 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -14,6 +14,15 @@ from fractions import Fraction from collections import abc, Counter + +class MyIndex: + def __init__(self, value): + self.value = value + + def __index__(self): + return self.value + + class TestBasicOps: # Superclass with tests common to all generators. # Subclasses must arrange for self.gen to retrieve the Random instance @@ -809,6 +818,9 @@ def test_getrandbits(self): self.gen.seed(1234567) self.assertEqual(self.gen.getrandbits(100), 97904845777343510404718956115) + self.gen.seed(1234567) + self.assertEqual(self.gen.getrandbits(MyIndex(100)), + 97904845777343510404718956115) def test_getrandbits_2G_bits(self): size = 2**31 From 2b8b4774d29a707330d463f226630185cbd3ceff Mon Sep 17 00:00:00 2001 From: Justin Applegate <70449145+Legoclones@users.noreply.github.com> Date: Wed, 11 Jun 2025 06:15:12 -0400 Subject: [PATCH 536/989] gh-135321: Always raise a correct exception for BINSTRING argument > 0x7fffffff in pickle (GH-135322) Co-authored-by: Serhiy Storchaka --- Lib/test/pickletester.py | 5 +++++ .../2025-06-10-00-42-30.gh-issue-135321.UHh9jT.rst | 1 + Modules/_pickle.c | 9 ++++----- 3 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-10-00-42-30.gh-issue-135321.UHh9jT.rst diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 9d6ae3e4d00ece..9a3a26a8400844 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1100,6 +1100,11 @@ def test_large_32b_binunicode8(self): self.check_unpickling_error((pickle.UnpicklingError, OverflowError), dumped) + def test_large_binstring(self): + errmsg = 'BINSTRING pickle has negative byte count' + with self.assertRaisesRegex(pickle.UnpicklingError, errmsg): + self.loads(b'T\0\0\0\x80') + def test_get(self): pickled = b'((lp100000\ng100000\nt.' unpickled = self.loads(pickled) diff --git a/Misc/NEWS.d/next/Library/2025-06-10-00-42-30.gh-issue-135321.UHh9jT.rst b/Misc/NEWS.d/next/Library/2025-06-10-00-42-30.gh-issue-135321.UHh9jT.rst new file mode 100644 index 00000000000000..9e63d8e28b7696 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-10-00-42-30.gh-issue-135321.UHh9jT.rst @@ -0,0 +1 @@ +Raise a correct exception for values greater than 0x7fffffff for the ``BINSTRING`` opcode in the C implementation of :mod:`pickle`. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 86d8b38620cb7f..cf3ceb43fb3f3f 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -5543,17 +5543,16 @@ static int load_counted_binstring(PickleState *st, UnpicklerObject *self, int nbytes) { PyObject *obj; - Py_ssize_t size; + long size; char *s; if (_Unpickler_Read(self, st, &s, nbytes) < 0) return -1; - size = calc_binsize(s, nbytes); + size = calc_binint(s, nbytes); if (size < 0) { - PyErr_Format(st->UnpicklingError, - "BINSTRING exceeds system's maximum size of %zd bytes", - PY_SSIZE_T_MAX); + PyErr_SetString(st->UnpicklingError, + "BINSTRING pickle has negative byte count"); return -1; } From 49d72365cd2d6c09a154a9a061efef4130e2c758 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 11 Jun 2025 12:44:58 +0200 Subject: [PATCH 537/989] gh-127545: Add _Py_ALIGNED_DEF(N, T) and use it for PyObject (GH-135209) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Replace _Py_ALIGN_AS(V) by _Py_ALIGNED_DEF(N, T) This is now a common façade for the various `_Alignas` alternatives, which behave in interesting ways -- see the source comment. The new macro (and MSVC's `__declspec(align)`) should not be used on a variable/member declaration that includes a struct declaraton. A workaround is to separate the struct definition. Do that for `PyASCIIObject.state`. * Specify minimum PyGC_Head and PyObject alignment As documented in InternalDocs/garbage_collector.md, the garbage collector stores flags in the least significant two bits of the _gc_prev pointer in struct PyGC_Head. Consequently, this pointer is only capable of storing a location that's aligned to a 4-byte boundary. Encode this requirement using _Py_ALIGNED_DEF. This patch fixes a segfault in m68k, which was previously investigated by Adrian Glaubitz here: https://lists.debian.org/debian-68k/2024/11/msg00020.html https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1087600 Original patch (using the GCC-only Py_ALIGNED) by Finn Thain. Co-authored-by: Finn Thain Co-authored-by: Victor Stinner Co-authored-by: John Paul Adrian Glaubitz --- Include/Python.h | 13 -- Include/cpython/unicodeobject.h | 120 +++++++++--------- Include/internal/pycore_interp_structs.h | 3 +- Include/object.h | 9 +- Include/pymacro.h | 82 +++++++----- ...-12-04-10-00-35.gh-issue-127545.t0THjE.rst | 1 + 6 files changed, 122 insertions(+), 106 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-12-04-10-00-35.gh-issue-127545.t0THjE.rst diff --git a/Include/Python.h b/Include/Python.h index f34d581f0b4c91..64be80145890a3 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -59,14 +59,6 @@ # include // __readgsqword() #endif -// Suppress known warnings in Python header files. -#if defined(_MSC_VER) -// Warning that alignas behaviour has changed. Doesn't affect us, because we -// never relied on the old behaviour. -#pragma warning(push) -#pragma warning(disable: 5274) -#endif - // Include Python header files #include "pyport.h" #include "pymacro.h" @@ -146,9 +138,4 @@ #include "cpython/pyfpe.h" #include "cpython/tracemalloc.h" -// Restore warning filter -#ifdef _MSC_VER -#pragma warning(pop) -#endif - #endif /* !Py_PYTHON_H */ diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 7c1aac9696dec9..86c502730f478c 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -47,6 +47,63 @@ static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) { /* --- Unicode Type ------------------------------------------------------- */ +struct _PyUnicodeObject_state { + /* If interned is non-zero, the two references from the + dictionary to this object are *not* counted in ob_refcnt. + The possible values here are: + 0: Not Interned + 1: Interned + 2: Interned and Immortal + 3: Interned, Immortal, and Static + This categorization allows the runtime to determine the right + cleanup mechanism at runtime shutdown. */ +#ifdef Py_GIL_DISABLED + // Needs to be accessed atomically, so can't be a bit field. + unsigned char interned; +#else + unsigned int interned:2; +#endif + /* Character size: + + - PyUnicode_1BYTE_KIND (1): + + * character type = Py_UCS1 (8 bits, unsigned) + * all characters are in the range U+0000-U+00FF (latin1) + * if ascii is set, all characters are in the range U+0000-U+007F + (ASCII), otherwise at least one character is in the range + U+0080-U+00FF + + - PyUnicode_2BYTE_KIND (2): + + * character type = Py_UCS2 (16 bits, unsigned) + * all characters are in the range U+0000-U+FFFF (BMP) + * at least one character is in the range U+0100-U+FFFF + + - PyUnicode_4BYTE_KIND (4): + + * character type = Py_UCS4 (32 bits, unsigned) + * all characters are in the range U+0000-U+10FFFF + * at least one character is in the range U+10000-U+10FFFF + */ + unsigned int kind:3; + /* Compact is with respect to the allocation scheme. Compact unicode + objects only require one memory block while non-compact objects use + one block for the PyUnicodeObject struct and another for its data + buffer. */ + unsigned int compact:1; + /* The string only contains characters in the range U+0000-U+007F (ASCII) + and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is + set, use the PyASCIIObject structure. */ + unsigned int ascii:1; + /* The object is statically allocated. */ + unsigned int statically_allocated:1; +#ifndef Py_GIL_DISABLED + /* Historical: padding to ensure that PyUnicode_DATA() is always aligned to + 4 bytes (see issue gh-63736 on m68k) */ + unsigned int :24; +#endif +}; + /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject structure. state.ascii and state.compact are set, and the data immediately follow the structure. utf8_length can be found @@ -99,67 +156,8 @@ typedef struct { PyObject_HEAD Py_ssize_t length; /* Number of code points in the string */ Py_hash_t hash; /* Hash value; -1 if not set */ -#ifdef Py_GIL_DISABLED - /* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. - In the non-free-threaded build, we'll use explicit padding instead */ - _Py_ALIGN_AS(4) -#endif - struct { - /* If interned is non-zero, the two references from the - dictionary to this object are *not* counted in ob_refcnt. - The possible values here are: - 0: Not Interned - 1: Interned - 2: Interned and Immortal - 3: Interned, Immortal, and Static - This categorization allows the runtime to determine the right - cleanup mechanism at runtime shutdown. */ -#ifdef Py_GIL_DISABLED - // Needs to be accessed atomically, so can't be a bit field. - unsigned char interned; -#else - unsigned int interned:2; -#endif - /* Character size: - - - PyUnicode_1BYTE_KIND (1): - - * character type = Py_UCS1 (8 bits, unsigned) - * all characters are in the range U+0000-U+00FF (latin1) - * if ascii is set, all characters are in the range U+0000-U+007F - (ASCII), otherwise at least one character is in the range - U+0080-U+00FF - - - PyUnicode_2BYTE_KIND (2): - - * character type = Py_UCS2 (16 bits, unsigned) - * all characters are in the range U+0000-U+FFFF (BMP) - * at least one character is in the range U+0100-U+FFFF - - - PyUnicode_4BYTE_KIND (4): - - * character type = Py_UCS4 (32 bits, unsigned) - * all characters are in the range U+0000-U+10FFFF - * at least one character is in the range U+10000-U+10FFFF - */ - unsigned int kind:3; - /* Compact is with respect to the allocation scheme. Compact unicode - objects only require one memory block while non-compact objects use - one block for the PyUnicodeObject struct and another for its data - buffer. */ - unsigned int compact:1; - /* The string only contains characters in the range U+0000-U+007F (ASCII) - and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is - set, use the PyASCIIObject structure. */ - unsigned int ascii:1; - /* The object is statically allocated. */ - unsigned int statically_allocated:1; -#ifndef Py_GIL_DISABLED - /* Padding to ensure that PyUnicode_DATA() is always aligned to - 4 bytes (see issue gh-63736 on m68k) */ - unsigned int :24; -#endif - } state; + /* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. */ + _Py_ALIGNED_DEF(4, struct _PyUnicodeObject_state) state; } PyASCIIObject; /* Non-ASCII strings allocated through PyUnicode_New use the diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f25f5847b3b307..f1f427d99dea69 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -159,10 +159,11 @@ struct atexit_state { typedef struct { // Tagged pointer to next object in the list. // 0 means the object is not tracked - uintptr_t _gc_next; + _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) _gc_next; // Tagged pointer to previous object in the list. // Lowest two bits are used for flags documented later. + // Those bits are made available by the struct's minimum alignment. uintptr_t _gc_prev; } PyGC_Head; diff --git a/Include/object.h b/Include/object.h index 42aed614d4ad8e..c75e9db0cbd935 100644 --- a/Include/object.h +++ b/Include/object.h @@ -101,6 +101,12 @@ whose size is determined when the object is allocated. #define PyObject_VAR_HEAD PyVarObject ob_base; #define Py_INVALID_SIZE (Py_ssize_t)-1 +/* PyObjects are given a minimum alignment so that the least significant bits + * of an object pointer become available for other purposes. + * This must be an integer literal with the value (1 << _PyGC_PREV_SHIFT), number of bytes. + */ +#define _PyObject_MIN_ALIGNMENT 4 + /* Nothing is actually declared to be a PyObject, but every pointer to * a Python object can be cast to a PyObject*. This is inheritance built * by hand. Similarly every pointer to a variable-size Python object can, @@ -136,6 +142,7 @@ struct _object { #else Py_ssize_t ob_refcnt; #endif + _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner; }; #ifdef _MSC_VER __pragma(warning(pop)) @@ -153,7 +160,7 @@ struct _object { // ob_tid stores the thread id (or zero). It is also used by the GC and the // trashcan mechanism as a linked list pointer and by the GC to store the // computed "gc_refs" refcount. - uintptr_t ob_tid; + _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) ob_tid; uint16_t ob_flags; PyMutex ob_mutex; // per-object lock uint8_t ob_gc_bits; // gc-related state diff --git a/Include/pymacro.h b/Include/pymacro.h index d410645034d848..bfe660e8303d0f 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -24,44 +24,66 @@ #endif -// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword, -// We currently use alignas for free-threaded builds only; additional compat -// checking would be great before we add it to the default build. -// Standards/compiler support: +// _Py_ALIGNED_DEF(N, T): Define a variable/member with increased alignment +// +// `N`: the desired minimum alignment, an integer literal, number of bytes +// `T`: the type of the defined variable +// (or a type with at least the defined variable's alignment) +// +// May not be used on a struct definition. +// +// Standards/compiler support for `alignas` alternatives: // - `alignas` is a keyword in C23 and C++11. // - `_Alignas` is a keyword in C11 // - GCC & clang has __attribute__((aligned)) // (use that for older standards in pedantic mode) // - MSVC has __declspec(align) // - `_Alignas` is common C compiler extension -// Older compilers may name it differently; to allow compilation on such -// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already +// Older compilers may name `alignas` differently; to allow compilation on such +// unsupported platforms, we don't redefine _Py_ALIGNED_DEF if it's already // defined. Note that defining it wrong (including defining it to nothing) will // cause ABI incompatibilities. -#ifdef Py_GIL_DISABLED -# ifndef _Py_ALIGN_AS -# ifdef __cplusplus -# if __cplusplus >= 201103L -# define _Py_ALIGN_AS(V) alignas(V) -# elif defined(__GNUC__) || defined(__clang__) -# define _Py_ALIGN_AS(V) __attribute__((aligned(V))) -# elif defined(_MSC_VER) -# define _Py_ALIGN_AS(V) __declspec(align(V)) -# else -# define _Py_ALIGN_AS(V) alignas(V) -# endif -# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L -# define _Py_ALIGN_AS(V) alignas(V) -# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define _Py_ALIGN_AS(V) _Alignas(V) -# elif (defined(__GNUC__) || defined(__clang__)) -# define _Py_ALIGN_AS(V) __attribute__((aligned(V))) -# elif defined(_MSC_VER) -# define _Py_ALIGN_AS(V) __declspec(align(V)) -# else -# define _Py_ALIGN_AS(V) _Alignas(V) -# endif -# endif +// +// Behavior of `alignas` alternatives: +// - `alignas` & `_Alignas`: +// - Can be used multiple times; the greatest alignment applies. +// - It is an *error* if the combined effect of all `alignas` modifiers would +// decrease the alignment. +// - Takes types or numbers. +// - May not be used on a struct definition, unless also defining a variable. +// - `__declspec(align)`: +// - Has no effect if it would decrease alignment. +// - Only takes an integer literal. +// - May be used on struct or variable definitions. +// However, when defining both the struct and the variable at once, +// `declspec(aligned)` causes compiler warning 5274 and possible ABI +// incompatibility. +// - ` __attribute__((aligned))`: +// - Has no effect if it would decrease alignment. +// - Takes types or numbers +// - May be used on struct or variable definitions. +#ifndef _Py_ALIGNED_DEF +# ifdef __cplusplus +# if __cplusplus >= 201103L +# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T +# elif defined(__GNUC__) || defined(__clang__) +# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T +# elif defined(_MSC_VER) +# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T +# else +# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T +# endif +# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T +# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T +# elif (defined(__GNUC__) || defined(__clang__)) +# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T +# elif defined(_MSC_VER) +# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T +# else +# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T +# endif #endif /* Minimum value between x and y */ diff --git a/Misc/NEWS.d/next/Build/2024-12-04-10-00-35.gh-issue-127545.t0THjE.rst b/Misc/NEWS.d/next/Build/2024-12-04-10-00-35.gh-issue-127545.t0THjE.rst new file mode 100644 index 00000000000000..3667e2778b7b93 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-12-04-10-00-35.gh-issue-127545.t0THjE.rst @@ -0,0 +1 @@ +Fix crash when building on Linux/m68k. From c87b5b2cb665b6bc413bc3a13cc2d70da14dfd66 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 11 Jun 2025 15:52:25 +0100 Subject: [PATCH 538/989] GH-135379: Remove types from stack items in code generator. (GH-135384) * Make casts explicit in the instruction definitions --- Include/internal/pycore_stackref.h | 26 ++++ Lib/test/test_generated_cases.py | 38 +---- ...-06-11-12-14-06.gh-issue-135379.25ttXq.rst | 4 + Python/bytecodes.c | 63 ++++---- Python/executor_cases.c.h | 134 ++++++++++-------- Python/generated_cases.c.h | 122 ++++++++-------- Python/optimizer_bytecodes.c | 29 ++-- Python/optimizer_cases.c.h | 46 +++--- Tools/cases_generator/analyzer.py | 10 +- Tools/cases_generator/generators_common.py | 4 +- Tools/cases_generator/optimizer_generator.py | 4 +- Tools/cases_generator/parsing.py | 12 +- Tools/cases_generator/stack.py | 20 +-- 13 files changed, 257 insertions(+), 255 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-06-11-12-14-06.gh-issue-135379.25ttXq.rst diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 8791476725289c..10e7199269eb40 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -264,6 +264,32 @@ PyStackRef_IsNullOrInt(_PyStackRef ref); static const _PyStackRef PyStackRef_ERROR = { .bits = Py_TAG_INVALID }; +/* Wrap a pointer in a stack ref. + * The resulting stack reference is not safe and should only be used + * in the interpreter to pass values from one uop to another. + * The GC should never see one of these stack refs. */ +static inline _PyStackRef +PyStackRef_Wrap(void *ptr) +{ + assert(ptr != NULL); +#ifdef Py_DEBUG + return (_PyStackRef){ .bits = ((uintptr_t)ptr) | Py_TAG_INVALID }; +#else + return (_PyStackRef){ .bits = (uintptr_t)ptr }; +#endif +} + +static inline void * +PyStackRef_Unwrap(_PyStackRef ref) +{ +#ifdef Py_DEBUG + assert ((ref.bits & Py_TAG_BITS) == Py_TAG_INVALID); + return (void *)(ref.bits & ~Py_TAG_BITS); +#else + return (void *)(ref.bits); +#endif +} + static inline bool PyStackRef_IsError(_PyStackRef ref) { diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 37046d8e1c02b7..6411e4318b6c3a 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -56,14 +56,14 @@ class TestEffects(unittest.TestCase): def test_effect_sizes(self): stack = Stack() inputs = [ - x := StackItem("x", None, "1"), - y := StackItem("y", None, "oparg"), - z := StackItem("z", None, "oparg*2"), + x := StackItem("x", "1"), + y := StackItem("y", "oparg"), + z := StackItem("z", "oparg*2"), ] outputs = [ - StackItem("x", None, "1"), - StackItem("b", None, "oparg*4"), - StackItem("c", None, "1"), + StackItem("x", "1"), + StackItem("b", "oparg*4"), + StackItem("c", "1"), ] null = CWriter.null() stack.pop(z, null) @@ -1103,32 +1103,6 @@ def test_array_of_one(self): """ self.run_cases_test(input, output) - def test_pointer_to_stackref(self): - input = """ - inst(OP, (arg: _PyStackRef * -- out)) { - out = *arg; - DEAD(arg); - } - """ - output = """ - TARGET(OP) { - #if Py_TAIL_CALL_INTERP - int opcode = OP; - (void)(opcode); - #endif - frame->instr_ptr = next_instr; - next_instr += 1; - INSTRUCTION_STATS(OP); - _PyStackRef *arg; - _PyStackRef out; - arg = (_PyStackRef *)stack_pointer[-1].bits; - out = *arg; - stack_pointer[-1] = out; - DISPATCH(); - } - """ - self.run_cases_test(input, output) - def test_unused_cached_value(self): input = """ op(FIRST, (arg1 -- out)) { diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-06-11-12-14-06.gh-issue-135379.25ttXq.rst b/Misc/NEWS.d/next/Tools-Demos/2025-06-11-12-14-06.gh-issue-135379.25ttXq.rst new file mode 100644 index 00000000000000..25599a865b7246 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-06-11-12-14-06.gh-issue-135379.25ttXq.rst @@ -0,0 +1,4 @@ +The cases generator no longer accepts type annotations on stack items. +Conversions to non-default types are now done explictly in bytecodes.c and +optimizer_bytecodes.c. This will simplify code generation for top-of-stack +caching and other future features. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c4b13da5db41d8..032e76f72af42c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -985,12 +985,13 @@ dummy_func( STAT_INC(BINARY_OP, hit); } - op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _PyInterpreterFrame* )) { - new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); - new_frame->localsplus[0] = container; - new_frame->localsplus[1] = sub; + op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame)) { + _PyInterpreterFrame* pushed_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); + pushed_frame->localsplus[0] = container; + pushed_frame->localsplus[1] = sub; INPUTS_DEAD(); frame->return_offset = INSTRUCTION_SIZE; + new_frame = PyStackRef_Wrap(pushed_frame); } macro(BINARY_OP_SUBSCR_GETITEM) = @@ -1296,20 +1297,21 @@ dummy_func( macro(SEND) = _SPECIALIZE_SEND + _SEND; - op(_SEND_GEN_FRAME, (receiver, v -- receiver, gen_frame: _PyInterpreterFrame *)) { + op(_SEND_GEN_FRAME, (receiver, v -- receiver, gen_frame)) { PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(receiver); DEOPT_IF(Py_TYPE(gen) != &PyGen_Type && Py_TYPE(gen) != &PyCoro_Type); DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(SEND, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_MakeHeapSafe(v)); DEAD(v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; assert(INSTRUCTION_SIZE + oparg <= UINT16_MAX); frame->return_offset = (uint16_t)(INSTRUCTION_SIZE + oparg); - gen_frame->previous = frame; + pushed_frame->previous = frame; + gen_frame = PyStackRef_Wrap(pushed_frame); } macro(SEND_GEN) = @@ -2463,7 +2465,7 @@ dummy_func( _LOAD_ATTR_CLASS + _PUSH_NULL_CONDITIONAL; - op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame: _PyInterpreterFrame *)) { + op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame)) { assert((oparg & 1) == 0); assert(Py_IS_TYPE(fget, &PyFunction_Type)); PyFunctionObject *f = (PyFunctionObject *)fget; @@ -2473,9 +2475,10 @@ dummy_func( DEOPT_IF(code->co_argcount != 1); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); STAT_INC(LOAD_ATTR, hit); - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); - new_frame->localsplus[0] = owner; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); + pushed_frame->localsplus[0] = owner; DEAD(owner); + new_frame = PyStackRef_Wrap(pushed_frame); } macro(LOAD_ATTR_PROPERTY) = @@ -3344,7 +3347,7 @@ dummy_func( _ITER_JUMP_RANGE + _ITER_NEXT_RANGE; - op(_FOR_ITER_GEN_FRAME, (iter, null -- iter, null, gen_frame: _PyInterpreterFrame*)) { + op(_FOR_ITER_GEN_FRAME, (iter, null -- iter, null, gen_frame)) { PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); DEOPT_IF(Py_TYPE(gen) != &PyGen_Type); #ifdef Py_GIL_DISABLED @@ -3356,14 +3359,15 @@ dummy_func( #endif DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(FOR_ITER, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_None); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - gen_frame->previous = frame; + pushed_frame->previous = frame; // oparg is the return offset from the next instruction. frame->return_offset = (uint16_t)(INSTRUCTION_SIZE + oparg); + gen_frame = PyStackRef_Wrap(pushed_frame); } macro(FOR_ITER_GEN) = @@ -3715,7 +3719,7 @@ dummy_func( macro(CALL) = _SPECIALIZE_CALL + unused/2 + _MAYBE_EXPAND_METHOD + _DO_CALL + _CHECK_PERIODIC; macro(INSTRUMENTED_CALL) = unused/3 + _MAYBE_EXPAND_METHOD + _MONITOR_CALL + _DO_CALL + _CHECK_PERIODIC; - op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { + op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame)) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); // oparg counts all of the args, but *not* self: @@ -3737,7 +3741,7 @@ dummy_func( if (temp == NULL) { ERROR_NO_POP(); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } op(_CHECK_FUNCTION_VERSION, (func_version/2, callable, unused, unused[oparg] -- callable, unused, unused[oparg])) { @@ -3874,27 +3878,26 @@ dummy_func( DEOPT_IF(tstate->py_recursion_remaining <= 1); } - replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { + replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame)) { int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } INPUTS_DEAD(); + new_frame = PyStackRef_Wrap(pushed_frame); } - op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- )) { - // Write it out explicitly because it's subtly different. - // Eventually this should be the only occurrence of this code. + op(_PUSH_FRAME, (new_frame -- )) { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); DEAD(new_frame); SYNC_SP(); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -4046,7 +4049,7 @@ dummy_func( PyStackRef_CLOSE(temp); } - op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame: _PyInterpreterFrame *)) { + op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) { _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); @@ -4063,12 +4066,12 @@ dummy_func( _PyEval_FrameClearAndPop(tstate, shim); ERROR_NO_POP(); } - init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, * as it will be checked after start_frame */ tstate->py_recursion_remaining--; + init_frame = PyStackRef_Wrap(temp); } macro(CALL_ALLOC_AND_ENTER_INIT) = @@ -4594,7 +4597,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame: _PyInterpreterFrame*)) { + op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); // oparg counts all of the args, but *not* self: @@ -4621,7 +4624,7 @@ dummy_func( DEAD(callable); SYNC_SP(); ERROR_IF(temp == NULL); - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable, unused, unused[oparg], unused -- callable, unused, unused[oparg], unused)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d19605169d5e55..4f772f916d1152 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1551,15 +1551,16 @@ _PyStackRef getitem; _PyStackRef sub; _PyStackRef container; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; getitem = stack_pointer[-1]; sub = stack_pointer[-2]; container = stack_pointer[-3]; - new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); - new_frame->localsplus[0] = container; - new_frame->localsplus[1] = sub; + _PyInterpreterFrame* pushed_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); + pushed_frame->localsplus[0] = container; + pushed_frame->localsplus[1] = sub; frame->return_offset = 6 ; - stack_pointer[-3].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-3] = new_frame; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; @@ -1907,7 +1908,7 @@ case _SEND_GEN_FRAME: { _PyStackRef v; _PyStackRef receiver; - _PyInterpreterFrame *gen_frame; + _PyStackRef gen_frame; oparg = CURRENT_OPARG(); v = stack_pointer[-1]; receiver = stack_pointer[-2]; @@ -1921,15 +1922,16 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(SEND, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; assert( 2 + oparg <= UINT16_MAX); frame->return_offset = (uint16_t)( 2 + oparg); - gen_frame->previous = frame; - stack_pointer[-1].bits = (uintptr_t)gen_frame; + pushed_frame->previous = frame; + gen_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-1] = gen_frame; break; } @@ -3471,7 +3473,7 @@ case _LOAD_ATTR_PROPERTY_FRAME: { _PyStackRef owner; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = CURRENT_OPARG(); owner = stack_pointer[-1]; PyObject *fget = (PyObject *)CURRENT_OPERAND0(); @@ -3496,9 +3498,10 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); - new_frame->localsplus[0] = owner; - stack_pointer[-1].bits = (uintptr_t)new_frame; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); + pushed_frame->localsplus[0] = owner; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-1] = new_frame; break; } @@ -4467,7 +4470,7 @@ case _FOR_ITER_GEN_FRAME: { _PyStackRef iter; - _PyInterpreterFrame *gen_frame; + _PyStackRef gen_frame; oparg = CURRENT_OPARG(); iter = stack_pointer[-2]; PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); @@ -4487,14 +4490,15 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(FOR_ITER, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_None); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - gen_frame->previous = frame; + pushed_frame->previous = frame; frame->return_offset = (uint16_t)( 2 + oparg); - stack_pointer[0].bits = (uintptr_t)gen_frame; + gen_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[0] = gen_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -4775,7 +4779,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = CURRENT_OPARG(); args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; @@ -4800,8 +4804,8 @@ if (temp == NULL) { JUMP_TO_ERROR(); } - new_frame = temp; - stack_pointer[0].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(temp); + stack_pointer[0] = new_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -5067,7 +5071,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 0; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5075,13 +5079,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5091,7 +5096,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 1; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5099,13 +5104,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5115,7 +5121,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 2; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5123,13 +5129,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5139,7 +5146,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 3; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5147,13 +5154,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5163,7 +5171,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 4; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5171,13 +5179,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5187,34 +5196,35 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = CURRENT_OPARG(); args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; } case _PUSH_FRAME: { - _PyInterpreterFrame *new_frame; - new_frame = (_PyInterpreterFrame *)stack_pointer[-1].bits; + _PyStackRef new_frame; + new_frame = stack_pointer[-1]; assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -5429,7 +5439,7 @@ _PyStackRef *args; _PyStackRef self; _PyStackRef init; - _PyInterpreterFrame *init_frame; + _PyStackRef init_frame; oparg = CURRENT_OPARG(); args = &stack_pointer[-oparg]; self = stack_pointer[-1 - oparg]; @@ -5453,10 +5463,10 @@ stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_ERROR(); } - init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; tstate->py_recursion_remaining--; - stack_pointer[0].bits = (uintptr_t)init_frame; + init_frame = PyStackRef_Wrap(temp); + stack_pointer[0] = init_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -6309,7 +6319,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = CURRENT_OPARG(); kwnames = stack_pointer[-1]; args = &stack_pointer[-1 - oparg]; @@ -6343,8 +6353,8 @@ if (temp == NULL) { JUMP_TO_ERROR(); } - new_frame = temp; - stack_pointer[0].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(temp); + stack_pointer[0] = new_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c8825df3ade5a5..5ac519bb1b6093 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -604,7 +604,7 @@ _PyStackRef container; _PyStackRef getitem; _PyStackRef sub; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 5 cache entries */ // _CHECK_PEP_523 { @@ -650,19 +650,20 @@ // _BINARY_OP_SUBSCR_INIT_CALL { sub = stack_pointer[-1]; - new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); - new_frame->localsplus[0] = container; - new_frame->localsplus[1] = sub; + _PyInterpreterFrame* pushed_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); + pushed_frame->localsplus[0] = container; + pushed_frame->localsplus[1] = sub; frame->return_offset = 6 ; + new_frame = PyStackRef_Wrap(pushed_frame); } // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -1708,8 +1709,8 @@ _PyStackRef init; _PyStackRef self; _PyStackRef *args; - _PyInterpreterFrame *init_frame; - _PyInterpreterFrame *new_frame; + _PyStackRef init_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -1792,17 +1793,17 @@ stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_LABEL(error); } - init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; tstate->py_recursion_remaining--; + init_frame = PyStackRef_Wrap(temp); } // _PUSH_FRAME { new_frame = init_frame; assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -1828,7 +1829,7 @@ _PyStackRef null; _PyStackRef self_or_null; _PyStackRef *args; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -1921,12 +1922,13 @@ args = &stack_pointer[-oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } + new_frame = PyStackRef_Wrap(pushed_frame); } // _SAVE_RETURN_OFFSET { @@ -1940,11 +1942,11 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -1970,7 +1972,7 @@ _PyStackRef null; _PyStackRef self_or_null; _PyStackRef *args; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -2056,7 +2058,7 @@ if (temp == NULL) { JUMP_TO_LABEL(error); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } // _SAVE_RETURN_OFFSET { @@ -2070,9 +2072,9 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -3040,7 +3042,7 @@ _PyStackRef self_or_null; _PyStackRef *args; _PyStackRef kwnames; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -3127,7 +3129,7 @@ if (temp == NULL) { JUMP_TO_LABEL(error); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } // _SAVE_RETURN_OFFSET { @@ -3141,9 +3143,9 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -3304,7 +3306,7 @@ _PyStackRef self_or_null; _PyStackRef *args; _PyStackRef kwnames; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -3364,7 +3366,7 @@ if (temp == NULL) { JUMP_TO_LABEL(error); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } // _SAVE_RETURN_OFFSET { @@ -3378,9 +3380,9 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -4163,7 +4165,7 @@ _PyStackRef callable; _PyStackRef self_or_null; _PyStackRef *args; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -4227,12 +4229,13 @@ args = &stack_pointer[-oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } + new_frame = PyStackRef_Wrap(pushed_frame); } // _SAVE_RETURN_OFFSET { @@ -4246,11 +4249,11 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -4275,7 +4278,7 @@ _PyStackRef callable; _PyStackRef self_or_null; _PyStackRef *args; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -4334,7 +4337,7 @@ if (temp == NULL) { JUMP_TO_LABEL(error); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } // _SAVE_RETURN_OFFSET { @@ -4348,9 +4351,9 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -5785,8 +5788,8 @@ INSTRUCTION_STATS(FOR_ITER_GEN); static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); _PyStackRef iter; - _PyInterpreterFrame *gen_frame; - _PyInterpreterFrame *new_frame; + _PyStackRef gen_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -5818,21 +5821,22 @@ JUMP_TO_PREDICTED(FOR_ITER); } STAT_INC(FOR_ITER, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_None); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - gen_frame->previous = frame; + pushed_frame->previous = frame; frame->return_offset = (uint16_t)( 2 + oparg); + gen_frame = PyStackRef_Wrap(pushed_frame); } // _PUSH_FRAME { new_frame = gen_frame; assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -8650,7 +8654,7 @@ INSTRUCTION_STATS(LOAD_ATTR_PROPERTY); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); _PyStackRef owner; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -8701,8 +8705,9 @@ JUMP_TO_PREDICTED(LOAD_ATTR); } STAT_INC(LOAD_ATTR, hit); - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); - new_frame->localsplus[0] = owner; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); + pushed_frame->localsplus[0] = owner; + new_frame = PyStackRef_Wrap(pushed_frame); } // _SAVE_RETURN_OFFSET { @@ -8716,11 +8721,11 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -10661,8 +10666,8 @@ static_assert(INLINE_CACHE_ENTRIES_SEND == 1, "incorrect cache size"); _PyStackRef receiver; _PyStackRef v; - _PyInterpreterFrame *gen_frame; - _PyInterpreterFrame *new_frame; + _PyStackRef gen_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -10688,24 +10693,25 @@ JUMP_TO_PREDICTED(SEND); } STAT_INC(SEND, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; assert( 2 + oparg <= UINT16_MAX); frame->return_offset = (uint16_t)( 2 + oparg); - gen_frame->previous = frame; + pushed_frame->previous = frame; + gen_frame = PyStackRef_Wrap(pushed_frame); } // _PUSH_FRAME { new_frame = gen_frame; assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index a9d5e92ca02a59..babd3e46b8d1da 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -373,7 +373,7 @@ dummy_func(void) { GETLOCAL(this_instr->operand0) = res; } - op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame)) { new_frame = NULL; ctx->done = true; } @@ -697,7 +697,7 @@ dummy_func(void) { self = owner; } - op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame)) { (void)fget; new_frame = NULL; ctx->done = true; @@ -735,7 +735,7 @@ dummy_func(void) { sym_set_type(callable, &PyMethod_Type); } - op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame)) { int argcount = oparg; PyCodeObject *co = NULL; @@ -756,10 +756,9 @@ dummy_func(void) { } if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - new_frame = frame_new(ctx, co, 0, args, argcount); + new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, args, argcount); } else { - new_frame = frame_new(ctx, co, 0, NULL, 0); - + new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0); } } @@ -769,7 +768,7 @@ dummy_func(void) { self_or_null = sym_new_not_null(ctx); } - op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame)) { PyCodeObject *co = NULL; assert((this_instr + 2)->opcode == _PUSH_FRAME); co = get_code_with_logging((this_instr + 2)); @@ -778,10 +777,10 @@ dummy_func(void) { break; } - new_frame = frame_new(ctx, co, 0, NULL, 0); + new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0); } - op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) { new_frame = NULL; ctx->done = true; } @@ -793,7 +792,7 @@ dummy_func(void) { self_or_null = sym_new_not_null(ctx); } - op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame: _Py_UOpsAbstractFrame *)) { + op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) { init_frame = NULL; ctx->done = true; } @@ -860,13 +859,13 @@ dummy_func(void) { } } - op(_FOR_ITER_GEN_FRAME, (unused, unused -- unused, unused, gen_frame: _Py_UOpsAbstractFrame*)) { + op(_FOR_ITER_GEN_FRAME, (unused, unused -- unused, unused, gen_frame)) { gen_frame = NULL; /* We are about to hit the end of the trace */ ctx->done = true; } - op(_SEND_GEN_FRAME, (unused, unused -- unused, gen_frame: _Py_UOpsAbstractFrame *)) { + op(_SEND_GEN_FRAME, (unused, unused -- unused, gen_frame)) { gen_frame = NULL; // We are about to hit the end of the trace: ctx->done = true; @@ -884,12 +883,12 @@ dummy_func(void) { Py_UNREACHABLE(); } - op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- )) { + op(_PUSH_FRAME, (new_frame -- )) { SYNC_SP(); ctx->frame->stack_pointer = stack_pointer; - ctx->frame = new_frame; + ctx->frame = (_Py_UOpsAbstractFrame *)new_frame; ctx->curr_frame_depth++; - stack_pointer = new_frame->stack_pointer; + stack_pointer = ctx->frame->stack_pointer; co = get_code(this_instr); if (co == NULL) { // should be about to _EXIT_TRACE anyway diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 4780e492f61d74..adab110c5ced66 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -715,10 +715,10 @@ } case _BINARY_OP_SUBSCR_INIT_CALL: { - _Py_UOpsAbstractFrame *new_frame; + JitOptSymbol *new_frame; new_frame = NULL; ctx->done = true; - stack_pointer[-3] = (JitOptSymbol *)new_frame; + stack_pointer[-3] = new_frame; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; @@ -829,10 +829,10 @@ /* _SEND is not a viable micro-op for tier 2 */ case _SEND_GEN_FRAME: { - _Py_UOpsAbstractFrame *gen_frame; + JitOptSymbol *gen_frame; gen_frame = NULL; ctx->done = true; - stack_pointer[-1] = (JitOptSymbol *)gen_frame; + stack_pointer[-1] = gen_frame; break; } @@ -1323,12 +1323,12 @@ } case _LOAD_ATTR_PROPERTY_FRAME: { - _Py_UOpsAbstractFrame *new_frame; + JitOptSymbol *new_frame; PyObject *fget = (PyObject *)this_instr->operand0; (void)fget; new_frame = NULL; ctx->done = true; - stack_pointer[-1] = (JitOptSymbol *)new_frame; + stack_pointer[-1] = new_frame; break; } @@ -1685,10 +1685,10 @@ } case _FOR_ITER_GEN_FRAME: { - _Py_UOpsAbstractFrame *gen_frame; + JitOptSymbol *gen_frame; gen_frame = NULL; ctx->done = true; - stack_pointer[0] = (JitOptSymbol *)gen_frame; + stack_pointer[0] = gen_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -1857,7 +1857,7 @@ /* _MONITOR_CALL is not a viable micro-op for tier 2 */ case _PY_FRAME_GENERAL: { - _Py_UOpsAbstractFrame *new_frame; + JitOptSymbol *new_frame; PyCodeObject *co = NULL; assert((this_instr + 2)->opcode == _PUSH_FRAME); co = get_code_with_logging((this_instr + 2)); @@ -1865,8 +1865,8 @@ ctx->done = true; break; } - new_frame = frame_new(ctx, co, 0, NULL, 0); - stack_pointer[-2 - oparg] = (JitOptSymbol *)new_frame; + new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -1970,7 +1970,7 @@ case _INIT_CALL_PY_EXACT_ARGS: { JitOptSymbol **args; JitOptSymbol *self_or_null; - _Py_UOpsAbstractFrame *new_frame; + JitOptSymbol *new_frame; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; int argcount = oparg; @@ -1988,25 +1988,25 @@ argcount++; } if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - new_frame = frame_new(ctx, co, 0, args, argcount); + new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, args, argcount); } else { - new_frame = frame_new(ctx, co, 0, NULL, 0); + new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0); } - stack_pointer[-2 - oparg] = (JitOptSymbol *)new_frame; + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; } case _PUSH_FRAME: { - _Py_UOpsAbstractFrame *new_frame; - new_frame = (_Py_UOpsAbstractFrame *)stack_pointer[-1]; + JitOptSymbol *new_frame; + new_frame = stack_pointer[-1]; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; - ctx->frame = new_frame; + ctx->frame = (_Py_UOpsAbstractFrame *)new_frame; ctx->curr_frame_depth++; - stack_pointer = new_frame->stack_pointer; + stack_pointer = ctx->frame->stack_pointer; co = get_code(this_instr); if (co == NULL) { ctx->done = true; @@ -2159,10 +2159,10 @@ } case _CREATE_INIT_FRAME: { - _Py_UOpsAbstractFrame *init_frame; + JitOptSymbol *init_frame; init_frame = NULL; ctx->done = true; - stack_pointer[-2 - oparg] = (JitOptSymbol *)init_frame; + stack_pointer[-2 - oparg] = init_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -2326,10 +2326,10 @@ /* _DO_CALL_KW is not a viable micro-op for tier 2 */ case _PY_FRAME_KW: { - _Py_UOpsAbstractFrame *new_frame; + JitOptSymbol *new_frame; new_frame = NULL; ctx->done = true; - stack_pointer[-3 - oparg] = (JitOptSymbol *)new_frame; + stack_pointer[-3 - oparg] = new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); break; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 1447f365336d82..fca9b29f9ebc2e 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -135,15 +135,13 @@ def size(self) -> int: @dataclass class StackItem: name: str - type: str | None size: str peek: bool = False used: bool = False def __str__(self) -> str: size = f"[{self.size}]" if self.size else "" - type = "" if self.type is None else f"{self.type} " - return f"{type}{self.name}{size} {self.peek}" + return f"{self.name}{size} {self.peek}" def is_array(self) -> bool: return self.size != "" @@ -345,7 +343,7 @@ def override_error( def convert_stack_item( item: parser.StackEffect, replace_op_arg_1: str | None ) -> StackItem: - return StackItem(item.name, item.type, item.size) + return StackItem(item.name, item.size) def check_unused(stack: list[StackItem], input_names: dict[str, lexer.Token]) -> None: "Unused items cannot be on the stack above used, non-peek items" @@ -683,6 +681,8 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsNullOrInt", "PyStackRef_IsError", "PyStackRef_IsValid", + "PyStackRef_Wrap", + "PyStackRef_Unwrap", ) @@ -811,7 +811,7 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: if len(stack_inputs) == 0: return False return all( - (s.name == other.name and s.type == other.type and s.size == other.size) + (s.name == other.name and s.size == other.size) for s, other in zip(stack_inputs, instr.outputs) ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 02f9a952754e66..47de205c0e9120 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -56,9 +56,7 @@ def root_relative_path(filename: str) -> str: def type_and_null(var: StackItem) -> tuple[str, str]: - if var.type: - return var.type, "NULL" - elif var.is_array(): + if var.is_array(): return "_PyStackRef *", "NULL" else: return "_PyStackRef", "PyStackRef_NULL" diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index fda022a44e59cc..75805dbd7f37f4 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -73,8 +73,6 @@ def validate_uop(override: Uop, uop: Uop) -> None: def type_name(var: StackItem) -> str: if var.is_array(): return "JitOptSymbol **" - if var.type: - return var.type return "JitOptSymbol *" @@ -230,7 +228,7 @@ def generate_abstract_interpreter( declare_variables(override, out, skip_inputs=False) else: declare_variables(uop, out, skip_inputs=True) - stack = Stack(extract_bits=False, cast_type="JitOptSymbol *") + stack = Stack() write_uop(override, uop, out, stack, debug, skip_inputs=(override is None)) out.start_line() out.emit("break;\n") diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 9c9b0053a5928b..a6dac48187525d 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -247,12 +247,11 @@ def accept(self, visitor: Visitor) -> None: @dataclass class StackEffect(Node): name: str = field(compare=False) # __eq__ only uses type, cond, size - type: str = "" # Optional `:type` size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond def __repr__(self) -> str: - items = [self.name, self.type, self.size] + items = [self.name, self.size] while items and items[-1] == "": del items[-1] return f"StackEffect({', '.join(repr(item) for item in items)})" @@ -463,20 +462,13 @@ def stack_effect(self) -> StackEffect | None: # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] # | IDENTIFIER '[' expression ']' if tkn := self.expect(lx.IDENTIFIER): - type_text = "" - if self.expect(lx.COLON): - type_text = self.require(lx.IDENTIFIER).text.strip() - if self.expect(lx.TIMES): - type_text += " *" size_text = "" if self.expect(lx.LBRACKET): - if type_text: - raise self.make_syntax_error("Unexpected [") if not (size := self.expression()): raise self.make_syntax_error("Expected expression") self.require(lx.RBRACKET) size_text = size.text.strip() - return StackEffect(tkn.text, type_text, size_text) + return StackEffect(tkn.text, size_text) return None @contextual diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index df168afa88888c..3a0e7e5d0d5636 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -168,7 +168,7 @@ def from_memory(defn: StackItem, offset: PointerOffset) -> "Local": @staticmethod def register(name: str) -> "Local": - item = StackItem(name, None, "", False, True) + item = StackItem(name, "", False, True) return Local(item, None, True) def kill(self) -> None: @@ -216,13 +216,11 @@ def array_or_scalar(var: StackItem | Local) -> str: return "array" if var.is_array() else "scalar" class Stack: - def __init__(self, extract_bits: bool=True, cast_type: str = "uintptr_t") -> None: + def __init__(self) -> None: self.base_offset = PointerOffset.zero() self.physical_sp = PointerOffset.zero() self.logical_sp = PointerOffset.zero() self.variables: list[Local] = [] - self.extract_bits = extract_bits - self.cast_type = cast_type def drop(self, var: StackItem, check_liveness: bool) -> None: self.logical_sp = self.logical_sp.pop(var) @@ -268,10 +266,8 @@ def pop(self, var: StackItem, out: CWriter) -> Local: self.base_offset = self.logical_sp if var.name in UNUSED or not var.used: return Local.unused(var, self.base_offset) - cast = f"({var.type})" if (not indirect and var.type) else "" - bits = ".bits" if cast and self.extract_bits else "" c_offset = (self.base_offset - self.physical_sp).to_c() - assign = f"{var.name} = {cast}{indirect}stack_pointer[{c_offset}]{bits};\n" + assign = f"{var.name} = {indirect}stack_pointer[{c_offset}];\n" out.emit(assign) self._print(out) return Local.from_memory(var, self.base_offset) @@ -292,12 +288,8 @@ def _do_emit( out: CWriter, var: StackItem, stack_offset: PointerOffset, - cast_type: str, - extract_bits: bool, ) -> None: - cast = f"({cast_type})" if var.type else "" - bits = ".bits" if cast and extract_bits else "" - out.emit(f"stack_pointer[{stack_offset.to_c()}]{bits} = {cast}{var.name};\n") + out.emit(f"stack_pointer[{stack_offset.to_c()}] = {var.name};\n") def _save_physical_sp(self, out: CWriter) -> None: if self.physical_sp != self.logical_sp: @@ -320,7 +312,7 @@ def save_variables(self, out: CWriter) -> None: self._print(out) var.memory_offset = var_offset stack_offset = var_offset - self.physical_sp - Stack._do_emit(out, var.item, stack_offset, self.cast_type, self.extract_bits) + Stack._do_emit(out, var.item, stack_offset) self._print(out) var_offset = var_offset.push(var.item) @@ -350,7 +342,7 @@ def _print(self, out: CWriter) -> None: out.emit(self.as_comment() + "\n") def copy(self) -> "Stack": - other = Stack(self.extract_bits, self.cast_type) + other = Stack() other.base_offset = self.base_offset other.physical_sp = self.physical_sp other.logical_sp = self.logical_sp From b706ff003c536c5bca24dfdd3a8917bffcfa3df1 Mon Sep 17 00:00:00 2001 From: Malcolm Smith Date: Wed, 11 Jun 2025 16:23:47 +0100 Subject: [PATCH 539/989] gh-133264: Correct documentation of how Py_Main and Py_RunMain handle SystemExit (#135337) --- Doc/c-api/init.rst | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 9c866438b487d0..3106bf9808f254 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -492,17 +492,8 @@ Initializing and finalizing the interpreter strings other than those passed in (however, the contents of the strings pointed to by the argument list are not modified). - The return value will be ``0`` if the interpreter exits normally (i.e., - without an exception), ``1`` if the interpreter exits due to an exception, - or ``2`` if the argument list does not represent a valid Python command - line. - - Note that if an otherwise unhandled :exc:`SystemExit` is raised, this - function will not return ``1``, but exit the process, as long as - ``Py_InspectFlag`` is not set. If ``Py_InspectFlag`` is set, execution will - drop into the interactive Python prompt, at which point a second otherwise - unhandled :exc:`SystemExit` will still exit the process, while any other - means of exiting will set the return value as described above. + The return value is ``2`` if the argument list does not represent a valid + Python command line, and otherwise the same as :c:func:`Py_RunMain`. In terms of the CPython runtime configuration APIs documented in the :ref:`runtime configuration ` section (and without accounting @@ -539,23 +530,18 @@ Initializing and finalizing the interpreter If :c:member:`PyConfig.inspect` is not set (the default), the return value will be ``0`` if the interpreter exits normally (that is, without raising - an exception), or ``1`` if the interpreter exits due to an exception. If an - otherwise unhandled :exc:`SystemExit` is raised, the function will immediately - exit the process instead of returning ``1``. + an exception), the exit status of an unhandled :exc:`SystemExit`, or ``1`` + for any other unhandled exception. If :c:member:`PyConfig.inspect` is set (such as when the :option:`-i` option is used), rather than returning when the interpreter exits, execution will instead resume in an interactive Python prompt (REPL) using the ``__main__`` module's global namespace. If the interpreter exited with an exception, it is immediately raised in the REPL session. The function return value is - then determined by the way the *REPL session* terminates: returning ``0`` - if the session terminates without raising an unhandled exception, exiting - immediately for an unhandled :exc:`SystemExit`, and returning ``1`` for - any other unhandled exception. - - This function always finalizes the Python interpreter regardless of whether - it returns a value or immediately exits the process due to an unhandled - :exc:`SystemExit` exception. + then determined by the way the *REPL session* terminates: ``0``, ``1``, or + the status of a :exc:`SystemExit`, as specified above. + + This function always finalizes the Python interpreter before it returns. See :ref:`Python Configuration ` for an example of a customized Python that always runs in isolated mode using From 62143736b623fd0bcf998995196a13c4e50bb778 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 11 Jun 2025 17:35:48 -0600 Subject: [PATCH 540/989] gh-134939: Add the concurrent.interpreters Module (gh-133958) PEP-734 has been accepted (for 3.14). (FTR, I'm opposed to putting this under the concurrent package, but doing so is the SC condition under which the module can land in 3.14.) --- .github/CODEOWNERS | 6 +- Doc/library/concurrency.rst | 1 + Doc/library/concurrent.interpreters.rst | 198 ++++++++++++++++++ Doc/library/concurrent.rst | 3 +- Doc/library/python.rst | 5 + Doc/whatsnew/3.14.rst | 98 +++++++++ Lib/concurrent/futures/interpreter.py | 2 +- .../interpreters/__init__.py | 19 +- .../interpreters/_crossinterp.py | 2 +- .../interpreters/_queues.py} | 0 .../support/{interpreters => }/channels.py | 4 +- Lib/test/test__interpchannels.py | 2 +- .../test_interpreter_pool.py | 2 +- Lib/test/test_interpreters/test_api.py | 26 +-- Lib/test/test_interpreters/test_channels.py | 16 +- Lib/test/test_interpreters/test_lifecycle.py | 4 +- Lib/test/test_interpreters/test_queues.py | 18 +- Lib/test/test_interpreters/test_stress.py | 2 +- Lib/test/test_interpreters/utils.py | 2 +- Lib/test/test_sys.py | 2 +- Lib/test/test_threading.py | 2 +- Lib/test/test_types.py | 9 +- Makefile.pre.in | 3 +- ...-05-30-09-46-21.gh-issue-134939.Pu3nnm.rst | 1 + Modules/_interpchannelsmodule.c | 26 ++- Modules/_interpqueuesmodule.c | 11 +- Modules/_testinternalcapi.c | 4 +- Python/crossinterp_exceptions.h | 6 +- 28 files changed, 389 insertions(+), 85 deletions(-) create mode 100644 Doc/library/concurrent.interpreters.rst rename Lib/{test/support => concurrent}/interpreters/__init__.py (95%) rename Lib/{test/support => concurrent}/interpreters/_crossinterp.py (98%) rename Lib/{test/support/interpreters/queues.py => concurrent/interpreters/_queues.py} (100%) rename Lib/test/support/{interpreters => }/channels.py (98%) create mode 100644 Misc/NEWS.d/next/Library/2025-05-30-09-46-21.gh-issue-134939.Pu3nnm.rst diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 775d9c63260c83..63a28490043899 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -281,9 +281,13 @@ Doc/howto/clinic.rst @erlend-aasland # Subinterpreters **/*interpreteridobject.* @ericsnowcurrently **/*crossinterp* @ericsnowcurrently -Lib/test/support/interpreters/ @ericsnowcurrently Modules/_interp*module.c @ericsnowcurrently +Lib/test/test__interp*.py @ericsnowcurrently +Lib/concurrent/interpreters/ @ericsnowcurrently +Lib/test/support/channels.py @ericsnowcurrently +Doc/library/concurrent.interpreters.rst @ericsnowcurrently Lib/test/test_interpreters/ @ericsnowcurrently +Lib/concurrent/futures/interpreter.py @ericsnowcurrently # Android **/*Android* @mhsmith @freakboy3742 diff --git a/Doc/library/concurrency.rst b/Doc/library/concurrency.rst index 5be1a1106b09a0..18f9443cbfea20 100644 --- a/Doc/library/concurrency.rst +++ b/Doc/library/concurrency.rst @@ -18,6 +18,7 @@ multitasking). Here's an overview: multiprocessing.shared_memory.rst concurrent.rst concurrent.futures.rst + concurrent.interpreters.rst subprocess.rst sched.rst queue.rst diff --git a/Doc/library/concurrent.interpreters.rst b/Doc/library/concurrent.interpreters.rst new file mode 100644 index 00000000000000..8860418e87a585 --- /dev/null +++ b/Doc/library/concurrent.interpreters.rst @@ -0,0 +1,198 @@ +:mod:`!concurrent.interpreters` --- Multiple interpreters in the same process +============================================================================= + +.. module:: concurrent.interpreters + :synopsis: Multiple interpreters in the same process + +.. moduleauthor:: Eric Snow +.. sectionauthor:: Eric Snow + +.. versionadded:: 3.14 + +**Source code:** :source:`Lib/concurrent/interpreters.py` + +-------------- + + +Introduction +------------ + +The :mod:`!concurrent.interpreters` module constructs higher-level +interfaces on top of the lower level :mod:`!_interpreters` module. + +.. XXX Add references to the upcoming HOWTO docs in the seealso block. + +.. seealso:: + + :ref:`isolating-extensions-howto` + how to update an extension module to support multiple interpreters + + :pep:`554` + + :pep:`734` + + :pep:`684` + +.. XXX Why do we disallow multiple interpreters on WASM? + +.. include:: ../includes/wasm-notavail.rst + + +Key details +----------- + +Before we dive into examples, there are a small number of details +to keep in mind about using multiple interpreters: + +* isolated, by default +* no implicit threads +* not all PyPI packages support use in multiple interpreters yet + +.. XXX Are there other relevant details to list? + +In the context of multiple interpreters, "isolated" means that +different interpreters do not share any state. In practice, there is some +process-global data they all share, but that is managed by the runtime. + + +Reference +--------- + +This module defines the following functions: + +.. function:: list_all() + + Return a :class:`list` of :class:`Interpreter` objects, + one for each existing interpreter. + +.. function:: get_current() + + Return an :class:`Interpreter` object for the currently running + interpreter. + +.. function:: get_main() + + Return an :class:`Interpreter` object for the main interpreter. + +.. function:: create() + + Initialize a new (idle) Python interpreter + and return a :class:`Interpreter` object for it. + + +Interpreter objects +^^^^^^^^^^^^^^^^^^^ + +.. class:: Interpreter(id) + + A single interpreter in the current process. + + Generally, :class:`Interpreter` shouldn't be called directly. + Instead, use :func:`create` or one of the other module functions. + + .. attribute:: id + + (read-only) + + The interpreter's ID. + + .. attribute:: whence + + (read-only) + + A string describing where the interpreter came from. + + .. method:: is_running() + + Return ``True`` if the interpreter is currently executing code + in its :mod:`!__main__` module and ``False`` otherwise. + + .. method:: close() + + Finalize and destroy the interpreter. + + .. method:: prepare_main(ns=None, **kwargs) + + Bind "shareable" objects in the interpreter's + :mod:`!__main__` module. + + .. method:: exec(code, /, dedent=True) + + Run the given source code in the interpreter (in the current thread). + + .. method:: call(callable, /, *args, **kwargs) + + Return the result of calling running the given function in the + interpreter (in the current thread). + + .. method:: call_in_thread(callable, /, *args, **kwargs) + + Run the given function in the interpreter (in a new thread). + +Exceptions +^^^^^^^^^^ + +.. exception:: InterpreterError + + This exception, a subclass of :exc:`Exception`, is raised when + an interpreter-related error happens. + +.. exception:: InterpreterNotFoundError + + This exception, a subclass of :exc:`InterpreterError`, is raised when + the targeted interpreter no longer exists. + +.. exception:: ExecutionFailed + + This exception, a subclass of :exc:`InterpreterError`, is raised when + the running code raised an uncaught exception. + + .. attribute:: excinfo + + A basic snapshot of the exception raised in the other interpreter. + +.. XXX Document the excinfoattrs? + +.. exception:: NotShareableError + + This exception, a subclass of :exc:`TypeError`, is raised when + an object cannot be sent to another interpreter. + + +.. XXX Add functions for communicating between interpreters. + + +Basic usage +----------- + +Creating an interpreter and running code in it:: + + from concurrent import interpreters + + interp = interpreters.create() + + # Run in the current OS thread. + + interp.exec('print("spam!")') + + interp.exec("""if True: + print('spam!') + """) + + from textwrap import dedent + interp.exec(dedent(""" + print('spam!') + """)) + + def run(): + print('spam!') + + interp.call(run) + + # Run in new OS thread. + + t = interp.call_in_thread(run) + t.join() + + +.. XXX Explain about object "sharing". diff --git a/Doc/library/concurrent.rst b/Doc/library/concurrent.rst index 8caea78bbb57e8..748c72c733bba2 100644 --- a/Doc/library/concurrent.rst +++ b/Doc/library/concurrent.rst @@ -1,6 +1,7 @@ The :mod:`!concurrent` package ============================== -Currently, there is only one module in this package: +This package contains the following modules: * :mod:`concurrent.futures` -- Launching parallel tasks +* :mod:`concurrent.interpreters` -- Multiple interpreters in the same process diff --git a/Doc/library/python.rst b/Doc/library/python.rst index c2c231af7c3033..c5c762e11b99e5 100644 --- a/Doc/library/python.rst +++ b/Doc/library/python.rst @@ -27,3 +27,8 @@ overview: inspect.rst annotationlib.rst site.rst + +.. seealso:: + + * See the :mod:`concurrent.interpreters` module, which similarly + exposes core runtime functionality. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 45e68aea5fb9a2..ca330a32b33c4b 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -83,6 +83,7 @@ and improvements in user-friendliness and correctness. .. PEP-sized items next. * :ref:`PEP 649 and 749: deferred evaluation of annotations ` +* :ref:`PEP 734: Multiple Interpreters in the Stdlib ` * :ref:`PEP 741: Python Configuration C API ` * :ref:`PEP 750: Template strings ` * :ref:`PEP 758: Allow except and except* expressions without parentheses ` @@ -123,6 +124,101 @@ of Python. See :ref:`below ` for details. New features ============ +.. _whatsnew314-pep734: + +PEP 734: Multiple Interpreters in the Stdlib +-------------------------------------------- + +The CPython runtime supports running multiple copies of Python in the +same process simultaneously and has done so for over 20 years. +Each of these separate copies is called an "interpreter". +However, the feature had been available only through the C-API. + +That limitation is removed in the 3.14 release, +with the new :mod:`concurrent.interpreters` module. + +There are at least two notable reasons why using multiple interpreters +is worth considering: + +* they support a new (to Python), human-friendly concurrency model +* true multi-core parallelism + +For some use cases, concurrency in software enables efficiency and +can simplify software, at a high level. At the same time, implementing +and maintaining all but the simplest concurrency is often a struggle +for the human brain. That especially applies to plain threads +(for example, :mod:`threading`), where all memory is shared between all threads. + +With multiple isolated interpreters, you can take advantage of a class +of concurrency models, like CSP or the actor model, that have found +success in other programming languages, like Smalltalk, Erlang, +Haskell, and Go. Think of multiple interpreters like threads +but with opt-in sharing. + +Regarding multi-core parallelism: as of the 3.12 release, interpreters +are now sufficiently isolated from one another to be used in parallel. +(See :pep:`684`.) This unlocks a variety of CPU-intensive use cases +for Python that were limited by the :term:`GIL`. + +Using multiple interpreters is similar in many ways to +:mod:`multiprocessing`, in that they both provide isolated logical +"processes" that can run in parallel, with no sharing by default. +However, when using multiple interpreters, an application will use +fewer system resources and will operate more efficiently (since it +stays within the same process). Think of multiple interpreters as +having the isolation of processes with the efficiency of threads. + +.. XXX Add an example or two. +.. XXX Link to the not-yet-added HOWTO doc. + +While the feature has been around for decades, multiple interpreters +have not been used widely, due to low awareness and the lack of a stdlib +module. Consequently, they currently have several notable limitations, +which will improve significantly now that the feature is finally +going mainstream. + +Current limitations: + +* starting each interpreter has not been optimized yet +* each interpreter uses more memory than necessary + (we will be working next on extensive internal sharing between + interpreters) +* there aren't many options *yet* for truly sharing objects or other + data between interpreters (other than :type:`memoryview`) +* many extension modules on PyPI are not compatible with multiple + interpreters yet (stdlib extension modules *are* compatible) +* the approach to writing applications that use multiple isolated + interpreters is mostly unfamiliar to Python users, for now + +The impact of these limitations will depend on future CPython +improvements, how interpreters are used, and what the community solves +through PyPI packages. Depending on the use case, the limitations may +not have much impact, so try it out! + +Furthermore, future CPython releases will reduce or eliminate overhead +and provide utilities that are less appropriate on PyPI. In the +meantime, most of the limitations can also be addressed through +extension modules, meaning PyPI packages can fill any gap for 3.14, and +even back to 3.12 where interpreters were finally properly isolated and +stopped sharing the :term:`GIL`. Likewise, we expect to slowly see +libraries on PyPI for high-level abstractions on top of interpreters. + +Regarding extension modules, work is in progress to update some PyPI +projects, as well as tools like Cython, pybind11, nanobind, and PyO3. +The steps for isolating an extension module are found at +:ref:`isolating-extensions-howto`. Isolating a module has a lot of +overlap with what is required to support +:ref:`free-threading `, +so the ongoing work in the community in that area will help accelerate +support for multiple interpreters. + +Also added in 3.14: :ref:`concurrent.futures.InterpreterPoolExecutor +`. + +.. seealso:: + :pep:`734`. + + .. _whatsnew314-pep750: PEP 750: Template strings @@ -1109,6 +1205,8 @@ calendar concurrent.futures ------------------ +.. _whatsnew314-concurrent-futures-interp-pool: + * Add :class:`~concurrent.futures.InterpreterPoolExecutor`, which exposes "subinterpreters" (multiple Python interpreters in the same process) to Python code. This is separate from the proposed API diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py index a2c4fbfd3fb831..f12b4ac33cda27 100644 --- a/Lib/concurrent/futures/interpreter.py +++ b/Lib/concurrent/futures/interpreter.py @@ -167,7 +167,7 @@ def run(self, task): except _interpqueues.QueueError: continue except ModuleNotFoundError: - # interpreters.queues doesn't exist, which means + # interpreters._queues doesn't exist, which means # QueueEmpty doesn't. Act as though it does. continue else: diff --git a/Lib/test/support/interpreters/__init__.py b/Lib/concurrent/interpreters/__init__.py similarity index 95% rename from Lib/test/support/interpreters/__init__.py rename to Lib/concurrent/interpreters/__init__.py index 6d1b0690805d2d..0fd661249a276c 100644 --- a/Lib/test/support/interpreters/__init__.py +++ b/Lib/concurrent/interpreters/__init__.py @@ -9,6 +9,10 @@ InterpreterError, InterpreterNotFoundError, NotShareableError, is_shareable, ) +from ._queues import ( + create as create_queue, + Queue, QueueEmpty, QueueFull, +) __all__ = [ @@ -20,21 +24,6 @@ ] -_queuemod = None - -def __getattr__(name): - if name in ('Queue', 'QueueEmpty', 'QueueFull', 'create_queue'): - global create_queue, Queue, QueueEmpty, QueueFull - ns = globals() - from .queues import ( - create as create_queue, - Queue, QueueEmpty, QueueFull, - ) - return ns[name] - else: - raise AttributeError(name) - - _EXEC_FAILURE_STR = """ {superstr} diff --git a/Lib/test/support/interpreters/_crossinterp.py b/Lib/concurrent/interpreters/_crossinterp.py similarity index 98% rename from Lib/test/support/interpreters/_crossinterp.py rename to Lib/concurrent/interpreters/_crossinterp.py index 544e197ba4c028..f47eb693ac861c 100644 --- a/Lib/test/support/interpreters/_crossinterp.py +++ b/Lib/concurrent/interpreters/_crossinterp.py @@ -61,7 +61,7 @@ def __new__(cls): def __repr__(self): return f'{self._MODULE}.{self._NAME}' -# return f'interpreters.queues.UNBOUND' +# return f'interpreters._queues.UNBOUND' UNBOUND = object.__new__(UnboundItem) diff --git a/Lib/test/support/interpreters/queues.py b/Lib/concurrent/interpreters/_queues.py similarity index 100% rename from Lib/test/support/interpreters/queues.py rename to Lib/concurrent/interpreters/_queues.py diff --git a/Lib/test/support/interpreters/channels.py b/Lib/test/support/channels.py similarity index 98% rename from Lib/test/support/interpreters/channels.py rename to Lib/test/support/channels.py index 1724759b75a999..b2de24d9d3e534 100644 --- a/Lib/test/support/interpreters/channels.py +++ b/Lib/test/support/channels.py @@ -2,14 +2,14 @@ import time import _interpchannels as _channels -from . import _crossinterp +from concurrent.interpreters import _crossinterp # aliases: from _interpchannels import ( ChannelError, ChannelNotFoundError, ChannelClosedError, # noqa: F401 ChannelEmptyError, ChannelNotEmptyError, # noqa: F401 ) -from ._crossinterp import ( +from concurrent.interpreters._crossinterp import ( UNBOUND_ERROR, UNBOUND_REMOVE, ) diff --git a/Lib/test/test__interpchannels.py b/Lib/test/test__interpchannels.py index 88eee03a3de93a..858d31a73cf4f4 100644 --- a/Lib/test/test__interpchannels.py +++ b/Lib/test/test__interpchannels.py @@ -9,7 +9,7 @@ from test.support import import_helper, skip_if_sanitizer _channels = import_helper.import_module('_interpchannels') -from test.support.interpreters import _crossinterp +from concurrent.interpreters import _crossinterp from test.test__interpreters import ( _interpreters, _run_output, diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py index f6c62ae4b2021b..5fd5684e1035e9 100644 --- a/Lib/test/test_concurrent_futures/test_interpreter_pool.py +++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py @@ -8,10 +8,10 @@ from concurrent.futures.interpreter import ( ExecutionFailed, BrokenInterpreterPool, ) +from concurrent.interpreters import _queues as queues import _interpreters from test import support import test.test_asyncio.utils as testasyncio_utils -from test.support.interpreters import queues from .executor import ExecutorTest, mul from .util import BaseTestCase, InterpreterPoolMixin, setup_module diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index b3c9ef8efba37a..1403cd145b6787 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -13,11 +13,11 @@ from test.support import import_helper # Raise SkipTest if subinterpreters not supported. _interpreters = import_helper.import_module('_interpreters') +from concurrent import interpreters from test.support import Py_GIL_DISABLED -from test.support import interpreters from test.support import force_not_colorized import test._crossinterp_definitions as defs -from test.support.interpreters import ( +from concurrent.interpreters import ( InterpreterError, InterpreterNotFoundError, ExecutionFailed, ) from .utils import ( @@ -133,7 +133,7 @@ def test_in_subinterpreter(self): main, = interpreters.list_all() interp = interpreters.create() out = _run_output(interp, dedent(""" - from test.support import interpreters + from concurrent import interpreters interp = interpreters.create() print(interp.id) """)) @@ -196,7 +196,7 @@ def test_subinterpreter(self): main = interpreters.get_main() interp = interpreters.create() out = _run_output(interp, dedent(""" - from test.support import interpreters + from concurrent import interpreters cur = interpreters.get_current() print(cur.id) """)) @@ -213,7 +213,7 @@ def test_idempotent(self): with self.subTest('subinterpreter'): interp = interpreters.create() out = _run_output(interp, dedent(""" - from test.support import interpreters + from concurrent import interpreters cur = interpreters.get_current() print(id(cur)) cur = interpreters.get_current() @@ -225,7 +225,7 @@ def test_idempotent(self): with self.subTest('per-interpreter'): interp = interpreters.create() out = _run_output(interp, dedent(""" - from test.support import interpreters + from concurrent import interpreters cur = interpreters.get_current() print(id(cur)) """)) @@ -582,7 +582,7 @@ def test_from_current(self): main, = interpreters.list_all() interp = interpreters.create() out = _run_output(interp, dedent(f""" - from test.support import interpreters + from concurrent import interpreters interp = interpreters.Interpreter({interp.id}) try: interp.close() @@ -599,7 +599,7 @@ def test_from_sibling(self): self.assertEqual(set(interpreters.list_all()), {main, interp1, interp2}) interp1.exec(dedent(f""" - from test.support import interpreters + from concurrent import interpreters interp2 = interpreters.Interpreter({interp2.id}) interp2.close() interp3 = interpreters.create() @@ -806,7 +806,7 @@ def eggs(): ham() """) scriptfile = self.make_script('script.py', tempdir, text=""" - from test.support import interpreters + from concurrent import interpreters def script(): import spam @@ -827,7 +827,7 @@ def script(): ~~~~~~~~~~~^^^^^^^^ {interpmod_line.strip()} raise ExecutionFailed(excinfo) - test.support.interpreters.ExecutionFailed: RuntimeError: uh-oh! + concurrent.interpreters.ExecutionFailed: RuntimeError: uh-oh! Uncaught in the interpreter: @@ -1281,7 +1281,7 @@ def run(text): # no module indirection with self.subTest('no indirection'): text = run(f""" - from test.support import interpreters + from concurrent import interpreters def spam(): # This a global var... @@ -1301,7 +1301,7 @@ def run(interp, func): """) with self.subTest('indirect as func, direct interp'): text = run(f""" - from test.support import interpreters + from concurrent import interpreters import mymod def spam(): @@ -1317,7 +1317,7 @@ def spam(): # indirect as func, indirect interp new_mod('mymod', f""" - from test.support import interpreters + from concurrent import interpreters def run(func): interp = interpreters.create() return interp.call(func) diff --git a/Lib/test/test_interpreters/test_channels.py b/Lib/test/test_interpreters/test_channels.py index 0c027b17cea68c..109ddf344539ad 100644 --- a/Lib/test/test_interpreters/test_channels.py +++ b/Lib/test/test_interpreters/test_channels.py @@ -8,8 +8,8 @@ from test.support import import_helper # Raise SkipTest if subinterpreters not supported. _channels = import_helper.import_module('_interpchannels') -from test.support import interpreters -from test.support.interpreters import channels +from concurrent import interpreters +from test.support import channels from .utils import _run_output, TestBase @@ -171,7 +171,7 @@ def test_send_recv_main(self): def test_send_recv_same_interpreter(self): interp = interpreters.create() interp.exec(dedent(""" - from test.support.interpreters import channels + from test.support import channels r, s = channels.create() orig = b'spam' s.send_nowait(orig) @@ -244,7 +244,7 @@ def test_send_recv_nowait_main_with_default(self): def test_send_recv_nowait_same_interpreter(self): interp = interpreters.create() interp.exec(dedent(""" - from test.support.interpreters import channels + from test.support import channels r, s = channels.create() orig = b'spam' s.send_nowait(orig) @@ -387,7 +387,7 @@ def common(rch, sch, unbound=None, presize=0): interp = interpreters.create() _run_output(interp, dedent(f""" - from test.support.interpreters import channels + from test.support import channels sch = channels.SendChannel({sch.id}) obj1 = b'spam' obj2 = b'eggs' @@ -482,7 +482,7 @@ def test_send_cleared_with_subinterpreter_mixed(self): self.assertEqual(_channels.get_count(rch.id), 0) _run_output(interp, dedent(f""" - from test.support.interpreters import channels + from test.support import channels sch = channels.SendChannel({sch.id}) sch.send_nowait(1, unbounditems=channels.UNBOUND) sch.send_nowait(2, unbounditems=channels.UNBOUND_ERROR) @@ -518,7 +518,7 @@ def test_send_cleared_with_subinterpreter_multiple(self): sch.send_nowait(1) _run_output(interp1, dedent(f""" - from test.support.interpreters import channels + from test.support import channels rch = channels.RecvChannel({rch.id}) sch = channels.SendChannel({sch.id}) obj1 = rch.recv() @@ -526,7 +526,7 @@ def test_send_cleared_with_subinterpreter_multiple(self): sch.send_nowait(obj1, unbounditems=channels.UNBOUND_REMOVE) """)) _run_output(interp2, dedent(f""" - from test.support.interpreters import channels + from test.support import channels rch = channels.RecvChannel({rch.id}) sch = channels.SendChannel({sch.id}) obj2 = rch.recv() diff --git a/Lib/test/test_interpreters/test_lifecycle.py b/Lib/test/test_interpreters/test_lifecycle.py index ac24f6568acd95..15537ac6cc8f82 100644 --- a/Lib/test/test_interpreters/test_lifecycle.py +++ b/Lib/test/test_interpreters/test_lifecycle.py @@ -119,7 +119,7 @@ def test_sys_path_0(self): # The main interpreter's sys.path[0] should be used by subinterpreters. script = ''' import sys - from test.support import interpreters + from concurrent import interpreters orig = sys.path[0] @@ -170,7 +170,7 @@ def test_gh_109793(self): # is reported, even when subinterpreters get cleaned up at the end. import subprocess argv = [sys.executable, '-c', '''if True: - from test.support import interpreters + from concurrent import interpreters interp = interpreters.create() raise Exception '''] diff --git a/Lib/test/test_interpreters/test_queues.py b/Lib/test/test_interpreters/test_queues.py index 757373904d7a43..3e982d76e86314 100644 --- a/Lib/test/test_interpreters/test_queues.py +++ b/Lib/test/test_interpreters/test_queues.py @@ -7,8 +7,8 @@ from test.support import import_helper, Py_DEBUG # Raise SkipTest if subinterpreters not supported. _queues = import_helper.import_module('_interpqueues') -from test.support import interpreters -from test.support.interpreters import queues, _crossinterp +from concurrent import interpreters +from concurrent.interpreters import _queues as queues, _crossinterp from .utils import _run_output, TestBase as _TestBase @@ -126,7 +126,7 @@ def test_shareable(self): interp = interpreters.create() interp.exec(dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue1 = queues.Queue({queue1.id}) """)); @@ -324,7 +324,7 @@ def test_put_get_full_fallback(self): def test_put_get_same_interpreter(self): interp = interpreters.create() interp.exec(dedent(""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.create() """)) for methname in ('get', 'get_nowait'): @@ -351,7 +351,7 @@ def test_put_get_different_interpreters(self): out = _run_output( interp, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue1 = queues.Queue({queue1.id}) queue2 = queues.Queue({queue2.id}) assert queue1.qsize() == 1, 'expected: queue1.qsize() == 1' @@ -390,7 +390,7 @@ def common(queue, unbound=None, presize=0): interp = interpreters.create() _run_output(interp, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.Queue({queue.id}) obj1 = b'spam' obj2 = b'eggs' @@ -468,7 +468,7 @@ def test_put_cleared_with_subinterpreter_mixed(self): queue = queues.create() interp = interpreters.create() _run_output(interp, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.Queue({queue.id}) queue.put(1, unbounditems=queues.UNBOUND) queue.put(2, unbounditems=queues.UNBOUND_ERROR) @@ -504,14 +504,14 @@ def test_put_cleared_with_subinterpreter_multiple(self): queue.put(1) _run_output(interp1, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.Queue({queue.id}) obj1 = queue.get() queue.put(2, unbounditems=queues.UNBOUND) queue.put(obj1, unbounditems=queues.UNBOUND_REMOVE) """)) _run_output(interp2, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.Queue({queue.id}) obj2 = queue.get() obj1 = queue.get() diff --git a/Lib/test/test_interpreters/test_stress.py b/Lib/test/test_interpreters/test_stress.py index fae2f38cb5534b..e25e67a0d4f445 100644 --- a/Lib/test/test_interpreters/test_stress.py +++ b/Lib/test/test_interpreters/test_stress.py @@ -6,7 +6,7 @@ from test.support import threading_helper # Raise SkipTest if subinterpreters not supported. import_helper.import_module('_interpreters') -from test.support import interpreters +from concurrent import interpreters from .utils import TestBase diff --git a/Lib/test/test_interpreters/utils.py b/Lib/test/test_interpreters/utils.py index c25e0fb7475e7e..ae09aa457b48c7 100644 --- a/Lib/test/test_interpreters/utils.py +++ b/Lib/test/test_interpreters/utils.py @@ -21,7 +21,7 @@ import _interpreters except ImportError as exc: raise unittest.SkipTest(str(exc)) -from test.support import interpreters +from concurrent import interpreters try: diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index bf415894903e9b..39e62027f03e5a 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -24,7 +24,7 @@ from test.support import force_not_colorized from test.support import SHORT_TIMEOUT try: - from test.support import interpreters + from concurrent import interpreters except ImportError: interpreters = None import textwrap diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 59b3a749d2fffa..125c27446986c0 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -28,7 +28,7 @@ from test import support try: - from test.support import interpreters + from concurrent import interpreters except ImportError: interpreters = None diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 9011e0e1962820..a117413301bebe 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -2513,15 +2513,16 @@ class SubinterpreterTests(unittest.TestCase): def setUpClass(cls): global interpreters try: - from test.support import interpreters + from concurrent import interpreters except ModuleNotFoundError: raise unittest.SkipTest('subinterpreters required') - import test.support.interpreters.channels # noqa: F401 + from test.support import channels # noqa: F401 + cls.create_channel = staticmethod(channels.create) @cpython_only @no_rerun('channels (and queues) might have a refleak; see gh-122199') def test_static_types_inherited_slots(self): - rch, sch = interpreters.channels.create() + rch, sch = self.create_channel() script = textwrap.dedent(""" import test.support @@ -2547,7 +2548,7 @@ def collate_results(raw): main_results = collate_results(raw) interp = interpreters.create() - interp.exec('from test.support import interpreters') + interp.exec('from concurrent import interpreters') interp.prepare_main(sch=sch) interp.exec(script) raw = rch.recv_nowait() diff --git a/Makefile.pre.in b/Makefile.pre.in index b5703fbe6ae974..66b34b779f27cb 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2514,7 +2514,7 @@ XMLLIBSUBDIRS= xml xml/dom xml/etree xml/parsers xml/sax LIBSUBDIRS= asyncio \ collections \ compression compression/_common compression/zstd \ - concurrent concurrent/futures \ + concurrent concurrent/futures concurrent/interpreters \ csv \ ctypes ctypes/macholib \ curses \ @@ -2573,7 +2573,6 @@ TESTSUBDIRS= idlelib/idle_test \ test/subprocessdata \ test/support \ test/support/_hypothesis_stubs \ - test/support/interpreters \ test/test_asyncio \ test/test_capi \ test/test_cext \ diff --git a/Misc/NEWS.d/next/Library/2025-05-30-09-46-21.gh-issue-134939.Pu3nnm.rst b/Misc/NEWS.d/next/Library/2025-05-30-09-46-21.gh-issue-134939.Pu3nnm.rst new file mode 100644 index 00000000000000..2bda69bff52156 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-30-09-46-21.gh-issue-134939.Pu3nnm.rst @@ -0,0 +1 @@ +Add the :mod:`concurrent.interpreters` module. See :pep:`734`. diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index ea2e5f99dfa308..ee5e2b005e0a5b 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -220,6 +220,22 @@ wait_for_lock(PyThread_type_lock mutex, PY_TIMEOUT_T timeout) return 0; } +static int +ensure_highlevel_module_loaded(void) +{ + PyObject *highlevel = + PyImport_ImportModule("concurrent.interpreters._channels"); + if (highlevel == NULL) { + PyErr_Clear(); + highlevel = PyImport_ImportModule("test.support.channels"); + if (highlevel == NULL) { + return -1; + } + } + Py_DECREF(highlevel); + return 0; +} + /* module state *************************************************************/ @@ -2742,15 +2758,9 @@ _get_current_channelend_type(int end) } if (cls == NULL) { // Force the module to be loaded, to register the type. - PyObject *highlevel = PyImport_ImportModule("interpreters.channels"); - if (highlevel == NULL) { - PyErr_Clear(); - highlevel = PyImport_ImportModule("test.support.interpreters.channels"); - if (highlevel == NULL) { - return NULL; - } + if (ensure_highlevel_module_loaded() < 0) { + return NULL; } - Py_DECREF(highlevel); if (end == CHANNEL_SEND) { cls = state->send_channel_type; } diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 71d8fd8716cd94..e22709d5119b7c 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -136,13 +136,10 @@ idarg_int64_converter(PyObject *arg, void *ptr) static int ensure_highlevel_module_loaded(void) { - PyObject *highlevel = PyImport_ImportModule("interpreters.queues"); + PyObject *highlevel = + PyImport_ImportModule("concurrent.interpreters._queues"); if (highlevel == NULL) { - PyErr_Clear(); - highlevel = PyImport_ImportModule("test.support.interpreters.queues"); - if (highlevel == NULL) { - return -1; - } + return -1; } Py_DECREF(highlevel); return 0; @@ -299,7 +296,7 @@ add_QueueError(PyObject *mod) { module_state *state = get_module_state(mod); -#define PREFIX "test.support.interpreters." +#define PREFIX "concurrent.interpreters." #define ADD_EXCTYPE(NAME, BASE, DOC) \ assert(state->NAME == NULL); \ if (add_exctype(mod, &state->NAME, PREFIX #NAME, DOC, BASE) < 0) { \ diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 845c218e679ad2..804cb4e4d1c8ee 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1788,9 +1788,9 @@ exec_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) /* To run some code in a sub-interpreter. -Generally you can use test.support.interpreters, +Generally you can use the interpreters module, but we keep this helper as a distinct implementation. -That's especially important for testing test.support.interpreters. +That's especially important for testing the interpreters module. */ static PyObject * run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs) diff --git a/Python/crossinterp_exceptions.h b/Python/crossinterp_exceptions.h index ca4ca1cf123e49..12cd61db1b6762 100644 --- a/Python/crossinterp_exceptions.h +++ b/Python/crossinterp_exceptions.h @@ -24,7 +24,7 @@ _ensure_current_cause(PyThreadState *tstate, PyObject *cause) static PyTypeObject _PyExc_InterpreterError = { PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "interpreters.InterpreterError", + .tp_name = "concurrent.interpreters.InterpreterError", .tp_doc = PyDoc_STR("A cross-interpreter operation failed"), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, //.tp_traverse = ((PyTypeObject *)PyExc_Exception)->tp_traverse, @@ -37,7 +37,7 @@ PyObject *PyExc_InterpreterError = (PyObject *)&_PyExc_InterpreterError; static PyTypeObject _PyExc_InterpreterNotFoundError = { PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "interpreters.InterpreterNotFoundError", + .tp_name = "concurrent.interpreters.InterpreterNotFoundError", .tp_doc = PyDoc_STR("An interpreter was not found"), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, //.tp_traverse = ((PyTypeObject *)PyExc_Exception)->tp_traverse, @@ -51,7 +51,7 @@ PyObject *PyExc_InterpreterNotFoundError = (PyObject *)&_PyExc_InterpreterNotFou static int _init_notshareableerror(exceptions_t *state) { - const char *name = "interpreters.NotShareableError"; + const char *name = "concurrent.interpreters.NotShareableError"; PyObject *base = PyExc_TypeError; PyObject *ns = NULL; PyObject *exctype = PyErr_NewException(name, base, ns); From 71f5fafdfb2e509f59cd584d45949c6496f88d41 Mon Sep 17 00:00:00 2001 From: Victorien <65306057+Viicos@users.noreply.github.com> Date: Thu, 12 Jun 2025 03:30:33 +0200 Subject: [PATCH 541/989] Fix presentation of dataclasses' `unsafe_hash` default value (#116532) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/dataclasses.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index f18c7cc9c02da6..299c8aa399c25c 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -121,8 +121,11 @@ Module contents :meth:`!__le__`, :meth:`!__gt__`, or :meth:`!__ge__`, then :exc:`TypeError` is raised. - - *unsafe_hash*: If ``False`` (the default), a :meth:`~object.__hash__` method - is generated according to how *eq* and *frozen* are set. + - *unsafe_hash*: If true, force ``dataclasses`` to create a + :meth:`~object.__hash__` method, even though it may not be safe to do so. + Otherwise, generate a :meth:`~object.__hash__` method according to how + *eq* and *frozen* are set. + The default value is ``False``. :meth:`!__hash__` is used by built-in :meth:`hash`, and when objects are added to hashed collections such as dictionaries and sets. Having a From d4471297586335d8c24db8b2c030d32c94570344 Mon Sep 17 00:00:00 2001 From: Gyeongjae Choi Date: Thu, 12 Jun 2025 13:04:13 +0900 Subject: [PATCH 542/989] gh-128627: Fix iPad detection in wasm-gc (#135388) On some iPad versions, Safari reports as "macOS". Modifies the GC trampoline detection to add a feature-based check to detect this case. --- Python/emscripten_trampoline.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Python/emscripten_trampoline.c b/Python/emscripten_trampoline.c index cc5047d6bda224..975c28eec104c6 100644 --- a/Python/emscripten_trampoline.c +++ b/Python/emscripten_trampoline.c @@ -71,7 +71,16 @@ EM_JS(CountArgsFunc, _PyEM_GetCountArgsPtr, (), { // ) function getPyEMCountArgsPtr() { - let isIOS = globalThis.navigator && /iPad|iPhone|iPod/.test(navigator.platform); + // Starting with iOS 18.3.1, WebKit on iOS has an issue with the garbage + // collector that breaks the call trampoline. See #130418 and + // https://bugs.webkit.org/show_bug.cgi?id=293113 for details. + let isIOS = globalThis.navigator && ( + /iPad|iPhone|iPod/.test(navigator.userAgent) || + // Starting with iPadOS 13, iPads might send a platform string that looks like a desktop Mac. + // To differentiate, we check if the platform is 'MacIntel' (common for Macs and newer iPads) + // AND if the device has multi-touch capabilities (navigator.maxTouchPoints > 1) + (navigator.platform === 'MacIntel' && typeof navigator.maxTouchPoints !== 'undefined' && navigator.maxTouchPoints > 1) + ) if (isIOS) { return 0; } From e6c3039cb39e68ae9af9ddcaca341c5af8f9cf23 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Thu, 12 Jun 2025 07:11:05 -0400 Subject: [PATCH 543/989] gh-135410: use a critical section around `StringIO.__next__` (#135412) --- Lib/test/test_memoryio.py | 19 +++++++++++++++++++ ...-06-11-19-05-49.gh-issue-135410.E89Boi.rst | 2 ++ Modules/_io/stringio.c | 12 +++++++++++- 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-11-19-05-49.gh-issue-135410.E89Boi.rst diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 63998a86c45b53..249e0f3ba32f29 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -5,6 +5,7 @@ import unittest from test import support +from test.support import threading_helper import gc import io @@ -12,6 +13,7 @@ import pickle import sys import weakref +import threading class IntLike: def __init__(self, num): @@ -723,6 +725,22 @@ def test_newline_argument(self): for newline in (None, "", "\n", "\r", "\r\n"): self.ioclass(newline=newline) + @unittest.skipUnless(support.Py_GIL_DISABLED, "only meaningful under free-threading") + @threading_helper.requires_working_threading() + def test_concurrent_use(self): + memio = self.ioclass("") + + def use(): + memio.write("x" * 10) + memio.readlines() + + threads = [threading.Thread(target=use) for _ in range(8)] + with threading_helper.catch_threading_exception() as cm: + with threading_helper.start_threads(threads): + pass + + self.assertIsNone(cm.exc_value) + class PyStringIOTest(MemoryTestMixin, MemorySeekTestMixin, TextIOTestMixin, unittest.TestCase): @@ -890,6 +908,7 @@ def test_setstate(self): self.assertRaises(ValueError, memio.__setstate__, ("closed", "", 0, None)) + class CStringIOPickleTest(PyStringIOPickleTest): UnsupportedOperation = io.UnsupportedOperation diff --git a/Misc/NEWS.d/next/Library/2025-06-11-19-05-49.gh-issue-135410.E89Boi.rst b/Misc/NEWS.d/next/Library/2025-06-11-19-05-49.gh-issue-135410.E89Boi.rst new file mode 100644 index 00000000000000..a5917fba3f7bb9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-11-19-05-49.gh-issue-135410.E89Boi.rst @@ -0,0 +1,2 @@ +Fix a crash when iterating over :class:`io.StringIO` on the :term:`free +threaded ` build. diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index 56913fafefba8b..8482c268176c5b 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -404,7 +404,7 @@ _io_StringIO_readline_impl(stringio *self, Py_ssize_t size) } static PyObject * -stringio_iternext(PyObject *op) +stringio_iternext_lock_held(PyObject *op) { PyObject *line; stringio *self = stringio_CAST(op); @@ -441,6 +441,16 @@ stringio_iternext(PyObject *op) return line; } +static PyObject * +stringio_iternext(PyObject *op) +{ + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(op); + res = stringio_iternext_lock_held(op); + Py_END_CRITICAL_SECTION(); + return res; +} + /*[clinic input] @critical_section _io.StringIO.truncate From e7a3c20b925bbe7a71707caa5a648033f774b8ca Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 12 Jun 2025 16:28:30 +0200 Subject: [PATCH 544/989] gh-133390: Support SQL keyword completion for sqlite3 CLI (GH-133393) (GH-135292) Co-authored-by: Tan Long --- Doc/whatsnew/3.15.rst | 7 ++ Lib/sqlite3/__main__.py | 11 +- Lib/sqlite3/_completer.py | 42 +++++++ Lib/test/test_sqlite3/test_cli.py | 111 ++++++++++++++++++ Misc/ACKS | 1 + ...-05-05-03-14-08.gh-issue-133390.AuTggn.rst | 1 + Modules/_sqlite/module.c | 39 ++++++ 7 files changed, 206 insertions(+), 6 deletions(-) create mode 100644 Lib/sqlite3/_completer.py create mode 100644 Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 88e7462f688e70..9f327cf904da1b 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -134,6 +134,13 @@ shelve (Contributed by Andrea Oliveri in :gh:`134004`.) +sqlite3 +------- + +* Support SQL keyword completion in the :mod:`sqlite3` command-line interface. + (Contributed by Long Tan in :gh:`133393`.) + + ssl --- diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py index c2fa23c46cf990..9e74b49ee828bc 100644 --- a/Lib/sqlite3/__main__.py +++ b/Lib/sqlite3/__main__.py @@ -12,6 +12,8 @@ from textwrap import dedent from _colorize import get_theme, theme_no_color +from ._completer import completer + def execute(c, sql, suppress_errors=True, theme=theme_no_color): """Helper that wraps execution of SQL code. @@ -136,12 +138,9 @@ def main(*args): execute(con, args.sql, suppress_errors=False, theme=theme) else: # No SQL provided; start the REPL. - console = SqliteInteractiveConsole(con, use_color=True) - try: - import readline # noqa: F401 - except ImportError: - pass - console.interact(banner, exitmsg="") + with completer(): + console = SqliteInteractiveConsole(con, use_color=True) + console.interact(banner, exitmsg="") finally: con.close() diff --git a/Lib/sqlite3/_completer.py b/Lib/sqlite3/_completer.py new file mode 100644 index 00000000000000..f21ef69cad6439 --- /dev/null +++ b/Lib/sqlite3/_completer.py @@ -0,0 +1,42 @@ +from contextlib import contextmanager + +try: + from _sqlite3 import SQLITE_KEYWORDS +except ImportError: + SQLITE_KEYWORDS = () + +_completion_matches = [] + + +def _complete(text, state): + global _completion_matches + + if state == 0: + text_upper = text.upper() + _completion_matches = [c for c in SQLITE_KEYWORDS if c.startswith(text_upper)] + try: + return _completion_matches[state] + " " + except IndexError: + return None + + +@contextmanager +def completer(): + try: + import readline + except ImportError: + yield + return + + old_completer = readline.get_completer() + try: + readline.set_completer(_complete) + if readline.backend == "editline": + # libedit uses "^I" instead of "tab" + command_string = "bind ^I rl_complete" + else: + command_string = "tab: complete" + readline.parse_and_bind(command_string) + yield + finally: + readline.set_completer(old_completer) diff --git a/Lib/test/test_sqlite3/test_cli.py b/Lib/test/test_sqlite3/test_cli.py index 37e0f74f688659..d993e28c4bb3a6 100644 --- a/Lib/test/test_sqlite3/test_cli.py +++ b/Lib/test/test_sqlite3/test_cli.py @@ -1,14 +1,22 @@ """sqlite3 CLI tests.""" import sqlite3 +import sys +import textwrap import unittest +import unittest.mock +import os from sqlite3.__main__ import main as cli +from test.support.import_helper import import_module from test.support.os_helper import TESTFN, unlink +from test.support.pty_helper import run_pty from test.support import ( captured_stdout, captured_stderr, captured_stdin, force_not_colorized_test_class, + requires_subprocess, + verbose, ) @@ -200,5 +208,108 @@ def test_color(self): self.assertIn('\x1b[1;35mOperationalError (SQLITE_ERROR)\x1b[0m: ' '\x1b[35mnear "sel": syntax error\x1b[0m', err) + +@requires_subprocess() +@force_not_colorized_test_class +class Completion(unittest.TestCase): + PS1 = "sqlite> " + + @classmethod + def setUpClass(cls): + _sqlite3 = import_module("_sqlite3") + if not hasattr(_sqlite3, "SQLITE_KEYWORDS"): + raise unittest.SkipTest("unable to determine SQLite keywords") + + readline = import_module("readline") + if readline.backend == "editline": + raise unittest.SkipTest("libedit readline is not supported") + + def write_input(self, input_, env=None): + script = textwrap.dedent(""" + import readline + from sqlite3.__main__ import main + + readline.parse_and_bind("set colored-completion-prefix off") + main() + """) + return run_pty(script, input_, env) + + def test_complete_sql_keywords(self): + # List candidates starting with 'S', there should be multiple matches. + input_ = b"S\t\tEL\t 1;\n.quit\n" + output = self.write_input(input_) + self.assertIn(b"SELECT", output) + self.assertIn(b"SET", output) + self.assertIn(b"SAVEPOINT", output) + self.assertIn(b"(1,)", output) + + # Keywords are completed in upper case for even lower case user input. + input_ = b"sel\t\t 1;\n.quit\n" + output = self.write_input(input_) + self.assertIn(b"SELECT", output) + self.assertIn(b"(1,)", output) + + @unittest.skipIf(sys.platform.startswith("freebsd"), + "Two actual tabs are inserted when there are no matching" + " completions in the pseudo-terminal opened by run_pty()" + " on FreeBSD") + def test_complete_no_match(self): + input_ = b"xyzzy\t\t\b\b\b\b\b\b\b.quit\n" + # Set NO_COLOR to disable coloring for self.PS1. + output = self.write_input(input_, env={**os.environ, "NO_COLOR": "1"}) + lines = output.decode().splitlines() + indices = ( + i for i, line in enumerate(lines, 1) + if line.startswith(f"{self.PS1}xyzzy") + ) + line_num = next(indices, -1) + self.assertNotEqual(line_num, -1) + # Completions occupy lines, assert no extra lines when there is nothing + # to complete. + self.assertEqual(line_num, len(lines)) + + def test_complete_no_input(self): + from _sqlite3 import SQLITE_KEYWORDS + + script = textwrap.dedent(""" + import readline + from sqlite3.__main__ import main + + # Configure readline to ...: + # - hide control sequences surrounding each candidate + # - hide "Display all xxx possibilities? (y or n)" + # - hide "--More--" + # - show candidates one per line + readline.parse_and_bind("set colored-completion-prefix off") + readline.parse_and_bind("set colored-stats off") + readline.parse_and_bind("set completion-query-items 0") + readline.parse_and_bind("set page-completions off") + readline.parse_and_bind("set completion-display-width 0") + readline.parse_and_bind("set show-all-if-ambiguous off") + readline.parse_and_bind("set show-all-if-unmodified off") + + main() + """) + input_ = b"\t\t.quit\n" + output = run_pty(script, input_, env={**os.environ, "NO_COLOR": "1"}) + try: + lines = output.decode().splitlines() + indices = [ + i for i, line in enumerate(lines) + if line.startswith(self.PS1) + ] + self.assertEqual(len(indices), 2) + start, end = indices + candidates = [l.strip() for l in lines[start+1:end]] + self.assertEqual(candidates, sorted(SQLITE_KEYWORDS)) + except: + if verbose: + print(' PTY output: '.center(30, '-')) + print(output.decode(errors='replace')) + print(' end PTY output '.center(30, '-')) + raise + + + if __name__ == "__main__": unittest.main() diff --git a/Misc/ACKS b/Misc/ACKS index 0be31560387ccb..d4557a03eb5400 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1869,6 +1869,7 @@ Neil Tallim Geoff Talvola Anish Tambe Musashi Tamura +Long Tan William Tanksley Christian Tanzer Steven Taschuk diff --git a/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst b/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst new file mode 100644 index 00000000000000..38d5c311b1d437 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst @@ -0,0 +1 @@ +Support keyword completion in the :mod:`sqlite3` command-line interface. diff --git a/Modules/_sqlite/module.c b/Modules/_sqlite/module.c index 909ddd1f990e19..5464fd1227ad20 100644 --- a/Modules/_sqlite/module.c +++ b/Modules/_sqlite/module.c @@ -32,6 +32,7 @@ #include "microprotocols.h" #include "row.h" #include "blob.h" +#include "util.h" #if SQLITE_VERSION_NUMBER < 3015002 #error "SQLite 3.15.2 or higher required" @@ -404,6 +405,40 @@ pysqlite_error_name(int rc) return NULL; } +static int +add_keyword_tuple(PyObject *module) +{ +#if SQLITE_VERSION_NUMBER >= 3024000 + int count = sqlite3_keyword_count(); + PyObject *keywords = PyTuple_New(count); + if (keywords == NULL) { + return -1; + } + for (int i = 0; i < count; i++) { + const char *keyword; + int size; + int result = sqlite3_keyword_name(i, &keyword, &size); + if (result != SQLITE_OK) { + pysqlite_state *state = pysqlite_get_state(module); + set_error_from_code(state, result); + goto error; + } + PyObject *kwd = PyUnicode_FromStringAndSize(keyword, size); + if (!kwd) { + goto error; + } + PyTuple_SET_ITEM(keywords, i, kwd); + } + return PyModule_Add(module, "SQLITE_KEYWORDS", keywords); + +error: + Py_DECREF(keywords); + return -1; +#else + return 0; +#endif +} + static int add_integer_constants(PyObject *module) { #define ADD_INT(ival) \ @@ -702,6 +737,10 @@ module_exec(PyObject *module) goto error; } + if (add_keyword_tuple(module) < 0) { + goto error; + } + if (PyModule_AddStringConstant(module, "sqlite_version", sqlite3_libversion())) { goto error; } From f273fd77d790300506c6443baa94d027b643f603 Mon Sep 17 00:00:00 2001 From: Eric Hanchrow Date: Thu, 12 Jun 2025 10:47:12 -0700 Subject: [PATCH 545/989] doc: Remove what was essentially duplicate wording. (GH-135431) --- Doc/library/logging.config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/logging.config.rst b/Doc/library/logging.config.rst index 0e9dc33ae2123a..f8c71005a53028 100644 --- a/Doc/library/logging.config.rst +++ b/Doc/library/logging.config.rst @@ -548,7 +548,7 @@ mnemonic that the corresponding value is a callable. The ``filters`` member of ``handlers`` and ``loggers`` can take filter instances in addition to ids. -You can also specify a special key ``'.'`` whose value is a dictionary is a +You can also specify a special key ``'.'`` whose value is a mapping of attribute names to values. If found, the specified attributes will be set on the user-defined object before it is returned. Thus, with the following configuration:: From b03309fe5fca2eef51bf739fb13d9acef70cb964 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 12 Jun 2025 14:46:47 -0700 Subject: [PATCH 546/989] gh-135429: Fix the argument mismatch in lsprof throw event (#135442) --- Lib/test/test_cprofile.py | 19 +++++----- ...-06-12-18-15-31.gh-issue-135429.mch75_.rst | 1 + Modules/_lsprof.c | 24 ++++++++++++- Modules/clinic/_lsprof.c.h | 35 ++++++++++++++++++- 4 files changed, 68 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-12-18-15-31.gh-issue-135429.mch75_.rst diff --git a/Lib/test/test_cprofile.py b/Lib/test/test_cprofile.py index 192c8eab26ebff..57e818b1c68b38 100644 --- a/Lib/test/test_cprofile.py +++ b/Lib/test/test_cprofile.py @@ -125,21 +125,22 @@ def test_throw(self): """ gh-106152 generator.throw() should trigger a call in cProfile - In the any() call below, there should be two entries for the generator: - * one for the call to __next__ which gets a True and terminates any - * one when the generator is garbage collected which will effectively - do a throw. """ + + def gen(): + yield + pr = self.profilerclass() pr.enable() - any(a == 1 for a in (1, 2)) + g = gen() + try: + g.throw(SyntaxError) + except SyntaxError: + pass pr.disable() pr.create_stats() - for func, (cc, nc, _, _, _) in pr.stats.items(): - if func[2] == "": - self.assertEqual(cc, 1) - self.assertEqual(nc, 1) + self.assertTrue(any("throw" in func[2] for func in pr.stats.keys())), def test_bad_descriptor(self): # gh-132250 diff --git a/Misc/NEWS.d/next/Library/2025-06-12-18-15-31.gh-issue-135429.mch75_.rst b/Misc/NEWS.d/next/Library/2025-06-12-18-15-31.gh-issue-135429.mch75_.rst new file mode 100644 index 00000000000000..b5213520a957f3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-12-18-15-31.gh-issue-135429.mch75_.rst @@ -0,0 +1 @@ +Fix the argument mismatch in ``_lsprof`` for ``PY_THROW`` event. diff --git a/Modules/_lsprof.c b/Modules/_lsprof.c index bbad5eb69032da..d0074b2a0d1f4d 100644 --- a/Modules/_lsprof.c +++ b/Modules/_lsprof.c @@ -631,6 +631,27 @@ _lsprof_Profiler__pystart_callback_impl(ProfilerObject *self, PyObject *code, Py_RETURN_NONE; } +/*[clinic input] +_lsprof.Profiler._pythrow_callback + + code: object + instruction_offset: object + exception: object + / + +[clinic start generated code]*/ + +static PyObject * +_lsprof_Profiler__pythrow_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *exception) +/*[clinic end generated code: output=0a32988919dfb94c input=fd728fc2c074f5e6]*/ +{ + ptrace_enter_call((PyObject*)self, (void *)code, code); + + Py_RETURN_NONE; +} + /*[clinic input] _lsprof.Profiler._pyreturn_callback @@ -747,7 +768,7 @@ static const struct { } callback_table[] = { {PY_MONITORING_EVENT_PY_START, "_pystart_callback"}, {PY_MONITORING_EVENT_PY_RESUME, "_pystart_callback"}, - {PY_MONITORING_EVENT_PY_THROW, "_pystart_callback"}, + {PY_MONITORING_EVENT_PY_THROW, "_pythrow_callback"}, {PY_MONITORING_EVENT_PY_RETURN, "_pyreturn_callback"}, {PY_MONITORING_EVENT_PY_YIELD, "_pyreturn_callback"}, {PY_MONITORING_EVENT_PY_UNWIND, "_pyreturn_callback"}, @@ -1002,6 +1023,7 @@ static PyMethodDef profiler_methods[] = { _LSPROF_PROFILER_DISABLE_METHODDEF _LSPROF_PROFILER_CLEAR_METHODDEF _LSPROF_PROFILER__PYSTART_CALLBACK_METHODDEF + _LSPROF_PROFILER__PYTHROW_CALLBACK_METHODDEF _LSPROF_PROFILER__PYRETURN_CALLBACK_METHODDEF _LSPROF_PROFILER__CCALL_CALLBACK_METHODDEF _LSPROF_PROFILER__CRETURN_CALLBACK_METHODDEF diff --git a/Modules/clinic/_lsprof.c.h b/Modules/clinic/_lsprof.c.h index 2918a6bc7abe74..c426cd6fe02f39 100644 --- a/Modules/clinic/_lsprof.c.h +++ b/Modules/clinic/_lsprof.c.h @@ -82,6 +82,39 @@ _lsprof_Profiler__pystart_callback(PyObject *self, PyObject *const *args, Py_ssi return return_value; } +PyDoc_STRVAR(_lsprof_Profiler__pythrow_callback__doc__, +"_pythrow_callback($self, code, instruction_offset, exception, /)\n" +"--\n" +"\n"); + +#define _LSPROF_PROFILER__PYTHROW_CALLBACK_METHODDEF \ + {"_pythrow_callback", _PyCFunction_CAST(_lsprof_Profiler__pythrow_callback), METH_FASTCALL, _lsprof_Profiler__pythrow_callback__doc__}, + +static PyObject * +_lsprof_Profiler__pythrow_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *exception); + +static PyObject * +_lsprof_Profiler__pythrow_callback(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *code; + PyObject *instruction_offset; + PyObject *exception; + + if (!_PyArg_CheckPositional("_pythrow_callback", nargs, 3, 3)) { + goto exit; + } + code = args[0]; + instruction_offset = args[1]; + exception = args[2]; + return_value = _lsprof_Profiler__pythrow_callback_impl((ProfilerObject *)self, code, instruction_offset, exception); + +exit: + return return_value; +} + PyDoc_STRVAR(_lsprof_Profiler__pyreturn_callback__doc__, "_pyreturn_callback($self, code, instruction_offset, retval, /)\n" "--\n" @@ -411,4 +444,4 @@ profiler_init(PyObject *self, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=fe231309776df7a7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9e46985561166c37 input=a9049054013a1b77]*/ From 73431356d324842cac0d545018c3ad21c85bc883 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Thu, 12 Jun 2025 18:32:27 -0400 Subject: [PATCH 547/989] Revert "gh-135410: use a critical section around `StringIO.__next__` (#135412)" (#135439) This reverts commit e6c3039cb39e68ae9af9ddcaca341c5af8f9cf23. --- Lib/test/test_memoryio.py | 19 ------------------- ...-06-11-19-05-49.gh-issue-135410.E89Boi.rst | 2 -- Modules/_io/stringio.c | 12 +----------- 3 files changed, 1 insertion(+), 32 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2025-06-11-19-05-49.gh-issue-135410.E89Boi.rst diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 249e0f3ba32f29..63998a86c45b53 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -5,7 +5,6 @@ import unittest from test import support -from test.support import threading_helper import gc import io @@ -13,7 +12,6 @@ import pickle import sys import weakref -import threading class IntLike: def __init__(self, num): @@ -725,22 +723,6 @@ def test_newline_argument(self): for newline in (None, "", "\n", "\r", "\r\n"): self.ioclass(newline=newline) - @unittest.skipUnless(support.Py_GIL_DISABLED, "only meaningful under free-threading") - @threading_helper.requires_working_threading() - def test_concurrent_use(self): - memio = self.ioclass("") - - def use(): - memio.write("x" * 10) - memio.readlines() - - threads = [threading.Thread(target=use) for _ in range(8)] - with threading_helper.catch_threading_exception() as cm: - with threading_helper.start_threads(threads): - pass - - self.assertIsNone(cm.exc_value) - class PyStringIOTest(MemoryTestMixin, MemorySeekTestMixin, TextIOTestMixin, unittest.TestCase): @@ -908,7 +890,6 @@ def test_setstate(self): self.assertRaises(ValueError, memio.__setstate__, ("closed", "", 0, None)) - class CStringIOPickleTest(PyStringIOPickleTest): UnsupportedOperation = io.UnsupportedOperation diff --git a/Misc/NEWS.d/next/Library/2025-06-11-19-05-49.gh-issue-135410.E89Boi.rst b/Misc/NEWS.d/next/Library/2025-06-11-19-05-49.gh-issue-135410.E89Boi.rst deleted file mode 100644 index a5917fba3f7bb9..00000000000000 --- a/Misc/NEWS.d/next/Library/2025-06-11-19-05-49.gh-issue-135410.E89Boi.rst +++ /dev/null @@ -1,2 +0,0 @@ -Fix a crash when iterating over :class:`io.StringIO` on the :term:`free -threaded ` build. diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index 8482c268176c5b..56913fafefba8b 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -404,7 +404,7 @@ _io_StringIO_readline_impl(stringio *self, Py_ssize_t size) } static PyObject * -stringio_iternext_lock_held(PyObject *op) +stringio_iternext(PyObject *op) { PyObject *line; stringio *self = stringio_CAST(op); @@ -441,16 +441,6 @@ stringio_iternext_lock_held(PyObject *op) return line; } -static PyObject * -stringio_iternext(PyObject *op) -{ - PyObject *res; - Py_BEGIN_CRITICAL_SECTION(op); - res = stringio_iternext_lock_held(op); - Py_END_CRITICAL_SECTION(); - return res; -} - /*[clinic input] @critical_section _io.StringIO.truncate From 2b0c684e0759dc3fec0e9dd0fc8383e6c75b7b5c Mon Sep 17 00:00:00 2001 From: Zanie Blue Date: Thu, 12 Jun 2025 18:11:08 -0500 Subject: [PATCH 548/989] GH-134273: Allow setting JIT compiler flags at build time with CFLAGS_JIT (GH134276) --- .../Build/2025-05-19-18-09-20.gh-issue-134273.ZAliyy.rst | 1 + Tools/jit/_targets.py | 5 +++++ Tools/jit/build.py | 4 ++++ configure | 2 +- configure.ac | 2 +- 5 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-05-19-18-09-20.gh-issue-134273.ZAliyy.rst diff --git a/Misc/NEWS.d/next/Build/2025-05-19-18-09-20.gh-issue-134273.ZAliyy.rst b/Misc/NEWS.d/next/Build/2025-05-19-18-09-20.gh-issue-134273.ZAliyy.rst new file mode 100644 index 00000000000000..3eb13cefbe6dfc --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-05-19-18-09-20.gh-issue-134273.ZAliyy.rst @@ -0,0 +1 @@ +Add support for configuring compiler flags for the JIT with ``CFLAGS_JIT`` diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index d0a1c081ffecc2..b383e39da19456 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -10,6 +10,7 @@ import sys import tempfile import typing +import shlex import _llvm import _schema @@ -46,6 +47,7 @@ class _Target(typing.Generic[_S, _R]): stable: bool = False debug: bool = False verbose: bool = False + cflags: str = "" known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve() @@ -62,6 +64,7 @@ def _compute_digest(self) -> str: hasher = hashlib.sha256() hasher.update(self.triple.encode()) hasher.update(self.debug.to_bytes()) + hasher.update(self.cflags.encode()) # These dependencies are also reflected in _JITSources in regen.targets: hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) hasher.update((self.pyconfig_dir / "pyconfig.h").read_bytes()) @@ -155,6 +158,8 @@ async def _compile( f"{o}", f"{c}", *self.args, + # Allow user-provided CFLAGS to override any defaults + *shlex.split(self.cflags), ] await _llvm.run("clang", args, echo=self.verbose) return await self._parse(o) diff --git a/Tools/jit/build.py b/Tools/jit/build.py index 1afd0c76bad1d2..a0733005929bf2 100644 --- a/Tools/jit/build.py +++ b/Tools/jit/build.py @@ -39,11 +39,15 @@ parser.add_argument( "-v", "--verbose", action="store_true", help="echo commands as they are run" ) + parser.add_argument( + "--cflags", help="additional flags to pass to the compiler", default="" + ) args = parser.parse_args() for target in args.target: target.debug = args.debug target.force = args.force target.verbose = args.verbose + target.cflags = args.cflags target.pyconfig_dir = args.pyconfig_dir target.build( comment=comment, diff --git a/configure b/configure index 029bf527da4e3d..fef9f2d7da935c 100755 --- a/configure +++ b/configure @@ -10863,7 +10863,7 @@ then : else case e in #( e) as_fn_append CFLAGS_NODIST " $jit_flags" - REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir ." + REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\"" JIT_STENCILS_H="jit_stencils.h" if test "x$Py_DEBUG" = xtrue then : diff --git a/configure.ac b/configure.ac index 371b2e8ed73525..cc37a636c522ba 100644 --- a/configure.ac +++ b/configure.ac @@ -2776,7 +2776,7 @@ AS_VAR_IF([jit_flags], [], [AS_VAR_APPEND([CFLAGS_NODIST], [" $jit_flags"]) AS_VAR_SET([REGEN_JIT_COMMAND], - ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir ."]) + ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\""]) AS_VAR_SET([JIT_STENCILS_H], ["jit_stencils.h"]) AS_VAR_IF([Py_DEBUG], [true], From 747d390045036325a7dbce12f7f0a4bc9c84c68a Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 13 Jun 2025 09:30:44 +0300 Subject: [PATCH 549/989] gh-135308: clarify math.issubnormal() description (GH-135324) --- Doc/library/math.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/math.rst b/Doc/library/math.rst index c8061fb16380cd..ecb1d4102cac31 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -387,8 +387,8 @@ Floating point manipulation functions .. function:: issubnormal(x) Return ``True`` if *x* is a subnormal number, that is a finite - nonzero number with a magnitude smaller than the smallest positive normal - number, see :data:`sys.float_info.min`. Return ``False`` otherwise. + nonzero number with a magnitude smaller than :data:`sys.float_info.min`. + Return ``False`` otherwise. .. versionadded:: next From 273459562e3eaee4dd242fe8ee098cfdb4eebe9b Mon Sep 17 00:00:00 2001 From: Blaise Pabon Date: Fri, 13 Jun 2025 11:32:07 +0000 Subject: [PATCH 550/989] gh-106318: Add example for `str.encode()` (#134520) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/stdtypes.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index b75e5ceecf874e..4854a2c077bb28 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1841,6 +1841,14 @@ expression support in the :mod:`re` module). unless an encoding error actually occurs, :ref:`devmode` is enabled or a :ref:`debug build ` is used. + For example:: + + >>> encoded_str_to_bytes = 'Python'.encode() + >>> type(encoded_str_to_bytes) + + >>> encoded_str_to_bytes + b'Python' + .. versionchanged:: 3.1 Added support for keyword arguments. From eed827ed091c6e55f11164046d287a76e30fbc0e Mon Sep 17 00:00:00 2001 From: Blaise Pabon Date: Fri, 13 Jun 2025 11:54:16 +0000 Subject: [PATCH 551/989] gh-106318: Add example for `str.endswith()` (#134523) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/stdtypes.rst | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 4854a2c077bb28..0954e3409a2dcc 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1863,7 +1863,19 @@ expression support in the :mod:`re` module). Return ``True`` if the string ends with the specified *suffix*, otherwise return ``False``. *suffix* can also be a tuple of suffixes to look for. With optional *start*, test beginning at that position. With optional *end*, stop comparing - at that position. + at that position. Using *start* and *end* is equivalent to + ``str[start:end].endswith(suffix)``. For example:: + + >>> 'Python'.endswith('on') + True + >>> 'a tuple of suffixes'.endswith(('at', 'in')) + False + >>> 'a tuple of suffixes'.endswith(('at', 'es')) + True + >>> 'Python is amazing'.endswith('is', 0, 9) + True + + See also :meth:`startswith` and :meth:`removesuffix`. .. method:: str.expandtabs(tabsize=8) From c646846c1e8f60e52b636b68bfd0df5e84229098 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 13 Jun 2025 16:32:26 +0300 Subject: [PATCH 552/989] gh-126703: Add freelist for PyComplexObject's (gh-135233) --- Include/internal/pycore_freelist_state.h | 2 ++ Objects/complexobject.c | 28 +++++++++++++++++++----- Objects/object.c | 1 + 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 4828dfd948f70a..59beb92f3f7b9c 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -16,6 +16,7 @@ extern "C" { # define Py_dicts_MAXFREELIST 80 # define Py_dictkeys_MAXFREELIST 80 # define Py_floats_MAXFREELIST 100 +# define Py_complexes_MAXFREELIST 100 # define Py_ints_MAXFREELIST 100 # define Py_slices_MAXFREELIST 1 # define Py_ranges_MAXFREELIST 6 @@ -43,6 +44,7 @@ struct _Py_freelist { struct _Py_freelists { struct _Py_freelist floats; + struct _Py_freelist complexes; struct _Py_freelist ints; struct _Py_freelist tuples[PyTuple_MAXSAVESIZE]; struct _Py_freelist lists; diff --git a/Objects/complexobject.c b/Objects/complexobject.c index c2dd320ae73988..b66ebe131ae605 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -1,4 +1,3 @@ - /* Complex object implementation */ /* Borrows heavily from floatobject.c */ @@ -9,6 +8,7 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_complexobject.h" // _PyComplex_FormatAdvancedWriter() #include "pycore_floatobject.h" // _Py_convert_int_to_double() +#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_object.h" // _PyObject_Init() #include "pycore_pymath.h" // _Py_ADJUST_ERANGE2() @@ -410,16 +410,32 @@ complex_subtype_from_c_complex(PyTypeObject *type, Py_complex cval) PyObject * PyComplex_FromCComplex(Py_complex cval) { - /* Inline PyObject_New */ - PyComplexObject *op = PyObject_Malloc(sizeof(PyComplexObject)); + PyComplexObject *op = _Py_FREELIST_POP(PyComplexObject, complexes); + if (op == NULL) { - return PyErr_NoMemory(); + /* Inline PyObject_New */ + op = PyObject_Malloc(sizeof(PyComplexObject)); + if (op == NULL) { + return PyErr_NoMemory(); + } + _PyObject_Init((PyObject*)op, &PyComplex_Type); } - _PyObject_Init((PyObject*)op, &PyComplex_Type); op->cval = cval; return (PyObject *) op; } +static void +complex_dealloc(PyObject *op) +{ + assert(PyComplex_Check(op)); + if (PyComplex_CheckExact(op)) { + _Py_FREELIST_FREE(complexes, op, PyObject_Free); + } + else { + Py_TYPE(op)->tp_free(op); + } +} + static PyObject * complex_subtype_from_doubles(PyTypeObject *type, double real, double imag) { @@ -1383,7 +1399,7 @@ PyTypeObject PyComplex_Type = { "complex", sizeof(PyComplexObject), 0, - 0, /* tp_dealloc */ + complex_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ diff --git a/Objects/object.c b/Objects/object.c index 9fe61ba7f1593a..eff3a9862129a2 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -925,6 +925,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) // In the free-threaded build, freelists are per-PyThreadState and cleared in PyThreadState_Clear() // In the default build, freelists are per-interpreter and cleared in finalize_interp_types() clear_freelist(&freelists->floats, is_finalization, free_object); + clear_freelist(&freelists->complexes, is_finalization, free_object); for (Py_ssize_t i = 0; i < PyTuple_MAXSAVESIZE; i++) { clear_freelist(&freelists->tuples[i], is_finalization, free_object); } From 394d7985da50189e123d0908e9a282f6da31ed78 Mon Sep 17 00:00:00 2001 From: LamentXU <108666168+LamentXU123@users.noreply.github.com> Date: Fri, 13 Jun 2025 22:06:11 +0800 Subject: [PATCH 553/989] gh-135244: improve wording of `uuid8` docs about CSPRNG (#135433) --- Doc/library/uuid.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 747ee3ee0e1951..92d58024e84d4e 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -257,7 +257,7 @@ The :mod:`uuid` module defines the following functions: non-specified arguments are substituted for a pseudo-random integer of appropriate size. - By default, *a*, *b* and *c* are generated by a non-cryptographically + By default, *a*, *b* and *c* are not generated by a cryptographically secure pseudo-random number generator (CSPRNG). Use :func:`uuid4` when a UUID needs to be used in a security-sensitive context. From f4bc3a932082411243da9bb909841138ee2eea97 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 13 Jun 2025 16:39:35 +0200 Subject: [PATCH 554/989] gh-134160: Split extension module init from PyModule docs; emphasize multi-phase init (GH-135126) Document behaviour of single-phase init. Call it "legacy". Reorganize PyModule docs. Move PyInit_modulename docs from the tutorial to reference documentation. Move PyMODINIT_FUNC docs from generic macros to the new page. Add doc stubs for `PYTHON_API_VERSION` & `PYTHON_ABI_VERSION` Remove incorrect refcounts.dat entry for `PyModuleDef_Init`. This removes the "Return value: Borrowed reference." note. Instead, note that the function sometimes returns a borrowed reference, sometimes as strong one. (IMO, it's best to not think of `PyModuleDef` as a `PyObject` at all, and act like it can't be reference-counted.) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/c-api/extension-modules.rst | 247 +++++++++++++++++++++++++++++ Doc/c-api/index.rst | 1 + Doc/c-api/intro.rst | 26 +-- Doc/c-api/module.rst | 271 +++++++++++++++----------------- Doc/data/refcounts.dat | 3 - Doc/extending/building.rst | 49 ++---- 6 files changed, 387 insertions(+), 210 deletions(-) create mode 100644 Doc/c-api/extension-modules.rst diff --git a/Doc/c-api/extension-modules.rst b/Doc/c-api/extension-modules.rst new file mode 100644 index 00000000000000..4c8212f2f5e7f7 --- /dev/null +++ b/Doc/c-api/extension-modules.rst @@ -0,0 +1,247 @@ +.. highlight:: c + +.. _extension-modules: + +Defining extension modules +-------------------------- + +A C extension for CPython is a shared library (for example, a ``.so`` file +on Linux, ``.pyd`` DLL on Windows), which is loadable into the Python process +(for example, it is compiled with compatible compiler settings), and which +exports an :ref:`initialization function `. + +To be importable by default (that is, by +:py:class:`importlib.machinery.ExtensionFileLoader`), +the shared library must be available on :py:attr:`sys.path`, +and must be named after the module name plus an extension listed in +:py:attr:`importlib.machinery.EXTENSION_SUFFIXES`. + +.. note:: + + Building, packaging and distributing extension modules is best done with + third-party tools, and is out of scope of this document. + One suitable tool is Setuptools, whose documentation can be found at + https://setuptools.pypa.io/en/latest/setuptools.html. + +Normally, the initialization function returns a module definition initialized +using :c:func:`PyModuleDef_Init`. +This allows splitting the creation process into several phases: + +- Before any substantial code is executed, Python can determine which + capabilities the module supports, and it can adjust the environment or + refuse loading an incompatible extension. +- By default, Python itself creates the module object -- that is, it does + the equivalent of :py:meth:`object.__new__` for classes. + It also sets initial attributes like :attr:`~module.__package__` and + :attr:`~module.__loader__`. +- Afterwards, the module object is initialized using extension-specific + code -- the equivalent of :py:meth:`~object.__init__` on classes. + +This is called *multi-phase initialization* to distinguish it from the legacy +(but still supported) *single-phase initialization* scheme, +where the initialization function returns a fully constructed module. +See the :ref:`single-phase-initialization section below ` +for details. + +.. versionchanged:: 3.5 + + Added support for multi-phase initialization (:pep:`489`). + + +Multiple module instances +......................... + +By default, extension modules are not singletons. +For example, if the :py:attr:`sys.modules` entry is removed and the module +is re-imported, a new module object is created, and typically populated with +fresh method and type objects. +The old module is subject to normal garbage collection. +This mirrors the behavior of pure-Python modules. + +Additional module instances may be created in +:ref:`sub-interpreters ` +or after Python runtime reinitialization +(:c:func:`Py_Finalize` and :c:func:`Py_Initialize`). +In these cases, sharing Python objects between module instances would likely +cause crashes or undefined behavior. + +To avoid such issues, each instance of an extension module should +be *isolated*: changes to one instance should not implicitly affect the others, +and all state owned by the module, including references to Python objects, +should be specific to a particular module instance. +See :ref:`isolating-extensions-howto` for more details and a practical guide. + +A simpler way to avoid these issues is +:ref:`raising an error on repeated initialization `. + +All modules are expected to support +:ref:`sub-interpreters `, or otherwise explicitly +signal a lack of support. +This is usually achieved by isolation or blocking repeated initialization, +as above. +A module may also be limited to the main interpreter using +the :c:data:`Py_mod_multiple_interpreters` slot. + + +.. _extension-export-hook: + +Initialization function +....................... + +The initialization function defined by an extension module has the +following signature: + +.. c:function:: PyObject* PyInit_modulename(void) + +Its name should be :samp:`PyInit_{}`, with ```` replaced by the +name of the module. + +For modules with ASCII-only names, the function must instead be named +:samp:`PyInit_{}`, with ```` replaced by the name of the module. +When using :ref:`multi-phase-initialization`, non-ASCII module names +are allowed. In this case, the initialization function name is +:samp:`PyInitU_{}`, with ```` encoded using Python's +*punycode* encoding with hyphens replaced by underscores. In Python: + +.. code-block:: python + + def initfunc_name(name): + try: + suffix = b'_' + name.encode('ascii') + except UnicodeEncodeError: + suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') + return b'PyInit' + suffix + +It is recommended to define the initialization function using a helper macro: + +.. c:macro:: PyMODINIT_FUNC + + Declare an extension module initialization function. + This macro: + + * specifies the :c:expr:`PyObject*` return type, + * adds any special linkage declarations required by the platform, and + * for C++, declares the function as ``extern "C"``. + +For example, a module called ``spam`` would be defined like this:: + + static struct PyModuleDef spam_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "spam", + ... + }; + + PyMODINIT_FUNC + PyInit_spam(void) + { + return PyModuleDef_Init(&spam_module); + } + +It is possible to export multiple modules from a single shared library by +defining multiple initialization functions. However, importing them requires +using symbolic links or a custom importer, because by default only the +function corresponding to the filename is found. +See the `Multiple modules in one library `__ +section in :pep:`489` for details. + +The initialization function is typically the only non-\ ``static`` +item defined in the module's C source. + + +.. _multi-phase-initialization: + +Multi-phase initialization +.......................... + +Normally, the :ref:`initialization function ` +(``PyInit_modulename``) returns a :c:type:`PyModuleDef` instance with +non-``NULL`` :c:member:`~PyModuleDef.m_slots`. +Before it is returned, the ``PyModuleDef`` instance must be initialized +using the following function: + + +.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def) + + Ensure a module definition is a properly initialized Python object that + correctly reports its type and a reference count. + + Return *def* cast to ``PyObject*``, or ``NULL`` if an error occurred. + + Calling this function is required for :ref:`multi-phase-initialization`. + It should not be used in other contexts. + + Note that Python assumes that ``PyModuleDef`` structures are statically + allocated. + This function may return either a new reference or a borrowed one; + this reference must not be released. + + .. versionadded:: 3.5 + + +.. _single-phase-initialization: + +Legacy single-phase initialization +.................................. + +.. attention:: + Single-phase initialization is a legacy mechanism to initialize extension + modules, with known drawbacks and design flaws. Extension module authors + are encouraged to use multi-phase initialization instead. + +In single-phase initialization, the +:ref:`initialization function ` (``PyInit_modulename``) +should create, populate and return a module object. +This is typically done using :c:func:`PyModule_Create` and functions like +:c:func:`PyModule_AddObjectRef`. + +Single-phase initialization differs from the :ref:`default ` +in the following ways: + +* Single-phase modules are, or rather *contain*, “singletons”. + + When the module is first initialized, Python saves the contents of + the module's ``__dict__`` (that is, typically, the module's functions and + types). + + For subsequent imports, Python does not call the initialization function + again. + Instead, it creates a new module object with a new ``__dict__``, and copies + the saved contents to it. + For example, given a single-phase module ``_testsinglephase`` + [#testsinglephase]_ that defines a function ``sum`` and an exception class + ``error``: + + .. code-block:: python + + >>> import sys + >>> import _testsinglephase as one + >>> del sys.modules['_testsinglephase'] + >>> import _testsinglephase as two + >>> one is two + False + >>> one.__dict__ is two.__dict__ + False + >>> one.sum is two.sum + True + >>> one.error is two.error + True + + The exact behavior should be considered a CPython implementation detail. + +* To work around the fact that ``PyInit_modulename`` does not take a *spec* + argument, some state of the import machinery is saved and applied to the + first suitable module created during the ``PyInit_modulename`` call. + Specifically, when a sub-module is imported, this mechanism prepends the + parent package name to the name of the module. + + A single-phase ``PyInit_modulename`` function should create “its” module + object as soon as possible, before any other module objects can be created. + +* Non-ASCII module names (``PyInitU_modulename``) are not supported. + +* Single-phase modules support module lookup functions like + :c:func:`PyState_FindModule`. + +.. [#testsinglephase] ``_testsinglephase`` is an internal module used \ + in CPython's self-test suite; your installation may or may not \ + include it. diff --git a/Doc/c-api/index.rst b/Doc/c-api/index.rst index ba56b03c6ac8e7..e9df2a304d975b 100644 --- a/Doc/c-api/index.rst +++ b/Doc/c-api/index.rst @@ -17,6 +17,7 @@ document the API functions in detail. veryhigh.rst refcounting.rst exceptions.rst + extension-modules.rst utilities.rst abstract.rst concrete.rst diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index 41856922110eee..acce3dc215d157 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -111,33 +111,11 @@ Useful macros ============= Several useful macros are defined in the Python header files. Many are -defined closer to where they are useful (e.g. :c:macro:`Py_RETURN_NONE`). +defined closer to where they are useful (for example, :c:macro:`Py_RETURN_NONE`, +:c:macro:`PyMODINIT_FUNC`). Others of a more general utility are defined here. This is not necessarily a complete listing. -.. c:macro:: PyMODINIT_FUNC - - Declare an extension module ``PyInit`` initialization function. The function - return type is :c:expr:`PyObject*`. The macro declares any special linkage - declarations required by the platform, and for C++ declares the function as - ``extern "C"``. - - The initialization function must be named :samp:`PyInit_{name}`, where - *name* is the name of the module, and should be the only non-\ ``static`` - item defined in the module file. Example:: - - static struct PyModuleDef spam_module = { - .m_base = PyModuleDef_HEAD_INIT, - .m_name = "spam", - ... - }; - - PyMODINIT_FUNC - PyInit_spam(void) - { - return PyModuleDef_Init(&spam_module); - } - .. c:macro:: Py_ABS(x) diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst index 710135dca89eda..c8edcecc5b419f 100644 --- a/Doc/c-api/module.rst +++ b/Doc/c-api/module.rst @@ -127,25 +127,36 @@ Module Objects unencodable filenames, use :c:func:`PyModule_GetFilenameObject` instead. -.. _initializing-modules: +.. _pymoduledef: -Initializing C modules -^^^^^^^^^^^^^^^^^^^^^^ +Module definitions +------------------ -Modules objects are usually created from extension modules (shared libraries -which export an initialization function), or compiled-in modules -(where the initialization function is added using :c:func:`PyImport_AppendInittab`). -See :ref:`building` or :ref:`extending-with-embedding` for details. +The functions in the previous section work on any module object, including +modules imported from Python code. -The initialization function can either pass a module definition instance -to :c:func:`PyModule_Create`, and return the resulting module object, -or request "multi-phase initialization" by returning the definition struct itself. +Modules defined using the C API typically use a *module definition*, +:c:type:`PyModuleDef` -- a statically allocated, constant “description" of +how a module should be created. + +The definition is usually used to define an extension's “main” module object +(see :ref:`extension-modules` for details). +It is also used to +:ref:`create extension modules dynamically `. + +Unlike :c:func:`PyModule_New`, the definition allows management of +*module state* -- a piece of memory that is allocated and cleared together +with the module object. +Unlike the module's Python attributes, Python code cannot replace or delete +data stored in module state. .. c:type:: PyModuleDef The module definition struct, which holds all information needed to create - a module object. There is usually only one statically initialized variable - of this type for each module. + a module object. + This structure must be statically allocated (or be otherwise guaranteed + to be valid while any modules created from it exist). + Usually, there is only one variable of this type for each extension module. .. c:member:: PyModuleDef_Base m_base @@ -170,13 +181,15 @@ or request "multi-phase initialization" by returning the definition struct itsel and freed when the module object is deallocated, after the :c:member:`~PyModuleDef.m_free` function has been called, if present. - Setting ``m_size`` to ``-1`` means that the module does not support - sub-interpreters, because it has global state. - Setting it to a non-negative value means that the module can be re-initialized and specifies the additional amount of memory it requires - for its state. Non-negative ``m_size`` is required for multi-phase - initialization. + for its state. + + Setting ``m_size`` to ``-1`` means that the module does not support + sub-interpreters, because it has global state. + Negative ``m_size`` is only allowed when using + :ref:`legacy single-phase initialization ` + or when :ref:`creating modules dynamically `. See :PEP:`3121` for more details. @@ -189,7 +202,7 @@ or request "multi-phase initialization" by returning the definition struct itsel An array of slot definitions for multi-phase initialization, terminated by a ``{0, NULL}`` entry. - When using single-phase initialization, *m_slots* must be ``NULL``. + When using legacy single-phase initialization, *m_slots* must be ``NULL``. .. versionchanged:: 3.5 @@ -249,96 +262,9 @@ or request "multi-phase initialization" by returning the definition struct itsel .. versionchanged:: 3.9 No longer called before the module state is allocated. -Single-phase initialization -........................... - -The module initialization function may create and return the module object -directly. This is referred to as "single-phase initialization", and uses one -of the following two module creation functions: - -.. c:function:: PyObject* PyModule_Create(PyModuleDef *def) - - Create a new module object, given the definition in *def*. This behaves - like :c:func:`PyModule_Create2` with *module_api_version* set to - :c:macro:`PYTHON_API_VERSION`. - - -.. c:function:: PyObject* PyModule_Create2(PyModuleDef *def, int module_api_version) - - Create a new module object, given the definition in *def*, assuming the - API version *module_api_version*. If that version does not match the version - of the running interpreter, a :exc:`RuntimeWarning` is emitted. - - Return ``NULL`` with an exception set on error. - - .. note:: - - Most uses of this function should be using :c:func:`PyModule_Create` - instead; only use this if you are sure you need it. -Before it is returned from in the initialization function, the resulting module -object is typically populated using functions like :c:func:`PyModule_AddObjectRef`. - -.. _multi-phase-initialization: - -Multi-phase initialization -.......................... - -An alternate way to specify extensions is to request "multi-phase initialization". -Extension modules created this way behave more like Python modules: the -initialization is split between the *creation phase*, when the module object -is created, and the *execution phase*, when it is populated. -The distinction is similar to the :py:meth:`~object.__new__` and -:py:meth:`~object.__init__` methods of classes. - -Unlike modules created using single-phase initialization, these modules are not -singletons. -For example, if the :py:attr:`sys.modules` entry is removed and the module -is re-imported, a new module object is created, and typically populated with -fresh method and type objects. -The old module is subject to normal garbage collection. -This mirrors the behavior of pure-Python modules. - -Additional module instances may be created in -:ref:`sub-interpreters ` -or after after Python runtime reinitialization -(:c:func:`Py_Finalize` and :c:func:`Py_Initialize`). -In these cases, sharing Python objects between module instances would likely -cause crashes or undefined behavior. - -To avoid such issues, each instance of an extension module should -be *isolated*: changes to one instance should not implicitly affect the others, -and all state, including references to Python objects, should be specific to -a particular module instance. -See :ref:`isolating-extensions-howto` for more details and a practical guide. - -A simpler way to avoid these issues is -:ref:`raising an error on repeated initialization `. - -All modules created using multi-phase initialization are expected to support -:ref:`sub-interpreters `, or otherwise explicitly -signal a lack of support. -This is usually achieved by isolation or blocking repeated initialization, -as above. -A module may also be limited to the main interpreter using -the :c:data:`Py_mod_multiple_interpreters` slot. - -To request multi-phase initialization, the initialization function -(PyInit_modulename) returns a :c:type:`PyModuleDef` instance with non-empty -:c:member:`~PyModuleDef.m_slots`. Before it is returned, the ``PyModuleDef`` -instance must be initialized with the following function: - -.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def) - - Ensures a module definition is a properly initialized Python object that - correctly reports its type and reference count. - - Returns *def* cast to ``PyObject*``, or ``NULL`` if an error occurred. - - .. versionadded:: 3.5 - -The *m_slots* member of the module definition must point to an array of -``PyModuleDef_Slot`` structures: +Module slots +............ .. c:type:: PyModuleDef_Slot @@ -352,8 +278,6 @@ The *m_slots* member of the module definition must point to an array of .. versionadded:: 3.5 -The *m_slots* array must be terminated by a slot with id 0. - The available slot types are: .. c:macro:: Py_mod_create @@ -464,21 +388,48 @@ The available slot types are: .. versionadded:: 3.13 -See :PEP:`489` for more details on multi-phase initialization. -Low-level module creation functions -................................... +.. _moduledef-dynamic: + +Creating extension modules dynamically +-------------------------------------- + +The following functions may be used to create a module outside of an +extension's :ref:`initialization function `. +They are also used in +:ref:`single-phase initialization `. + +.. c:function:: PyObject* PyModule_Create(PyModuleDef *def) + + Create a new module object, given the definition in *def*. + This is a macro that calls :c:func:`PyModule_Create2` with + *module_api_version* set to :c:macro:`PYTHON_API_VERSION`, or + to :c:macro:`PYTHON_ABI_VERSION` if using the + :ref:`limited API `. + +.. c:function:: PyObject* PyModule_Create2(PyModuleDef *def, int module_api_version) + + Create a new module object, given the definition in *def*, assuming the + API version *module_api_version*. If that version does not match the version + of the running interpreter, a :exc:`RuntimeWarning` is emitted. + + Return ``NULL`` with an exception set on error. + + This function does not support slots. + The :c:member:`~PyModuleDef.m_slots` member of *def* must be ``NULL``. + -The following functions are called under the hood when using multi-phase -initialization. They can be used directly, for example when creating module -objects dynamically. Note that both ``PyModule_FromDefAndSpec`` and -``PyModule_ExecDef`` must be called to fully initialize a module. + .. note:: + + Most uses of this function should be using :c:func:`PyModule_Create` + instead; only use this if you are sure you need it. .. c:function:: PyObject * PyModule_FromDefAndSpec(PyModuleDef *def, PyObject *spec) - Create a new module object, given the definition in *def* and the - ModuleSpec *spec*. This behaves like :c:func:`PyModule_FromDefAndSpec2` - with *module_api_version* set to :c:macro:`PYTHON_API_VERSION`. + This macro calls :c:func:`PyModule_FromDefAndSpec2` with + *module_api_version* set to :c:macro:`PYTHON_API_VERSION`, or + to :c:macro:`PYTHON_ABI_VERSION` if using the + :ref:`limited API `. .. versionadded:: 3.5 @@ -491,6 +442,10 @@ objects dynamically. Note that both ``PyModule_FromDefAndSpec`` and Return ``NULL`` with an exception set on error. + Note that this does not process execution slots (:c:data:`Py_mod_exec`). + Both ``PyModule_FromDefAndSpec`` and ``PyModule_ExecDef`` must be called + to fully initialize a module. + .. note:: Most uses of this function should be using :c:func:`PyModule_FromDefAndSpec` @@ -504,35 +459,29 @@ objects dynamically. Note that both ``PyModule_FromDefAndSpec`` and .. versionadded:: 3.5 -.. c:function:: int PyModule_SetDocString(PyObject *module, const char *docstring) +.. c:macro:: PYTHON_API_VERSION - Set the docstring for *module* to *docstring*. - This function is called automatically when creating a module from - ``PyModuleDef``, using either ``PyModule_Create`` or - ``PyModule_FromDefAndSpec``. + The C API version. Defined for backwards compatibility. - .. versionadded:: 3.5 + Currently, this constant is not updated in new Python versions, and is not + useful for versioning. This may change in the future. -.. c:function:: int PyModule_AddFunctions(PyObject *module, PyMethodDef *functions) +.. c:macro:: PYTHON_ABI_VERSION - Add the functions from the ``NULL`` terminated *functions* array to *module*. - Refer to the :c:type:`PyMethodDef` documentation for details on individual - entries (due to the lack of a shared module namespace, module level - "functions" implemented in C typically receive the module as their first - parameter, making them similar to instance methods on Python classes). - This function is called automatically when creating a module from - ``PyModuleDef``, using either ``PyModule_Create`` or - ``PyModule_FromDefAndSpec``. + Defined as ``3`` for backwards compatibility. + + Currently, this constant is not updated in new Python versions, and is not + useful for versioning. This may change in the future. - .. versionadded:: 3.5 Support functions -................. +----------------- -The module initialization function (if using single phase initialization) or -a function called from a module execution slot (if using multi-phase -initialization), can use the following functions to help initialize the module -state: +The following functions are provided to help initialize a module +state. +They are intended for a module's execution slots (:c:data:`Py_mod_exec`), +the initialization function for legacy :ref:`single-phase initialization `, +or code that creates modules dynamically. .. c:function:: int PyModule_AddObjectRef(PyObject *module, const char *name, PyObject *value) @@ -681,12 +630,39 @@ state: .. versionadded:: 3.9 +.. c:function:: int PyModule_AddFunctions(PyObject *module, PyMethodDef *functions) + + Add the functions from the ``NULL`` terminated *functions* array to *module*. + Refer to the :c:type:`PyMethodDef` documentation for details on individual + entries (due to the lack of a shared module namespace, module level + "functions" implemented in C typically receive the module as their first + parameter, making them similar to instance methods on Python classes). + + This function is called automatically when creating a module from + ``PyModuleDef`` (such as when using :ref:`multi-phase-initialization`, + ``PyModule_Create``, or ``PyModule_FromDefAndSpec``). + Some module authors may prefer defining functions in multiple + :c:type:`PyMethodDef` arrays; in that case they should call this function + directly. + + .. versionadded:: 3.5 + +.. c:function:: int PyModule_SetDocString(PyObject *module, const char *docstring) + + Set the docstring for *module* to *docstring*. + This function is called automatically when creating a module from + ``PyModuleDef`` (such as when using :ref:`multi-phase-initialization`, + ``PyModule_Create``, or ``PyModule_FromDefAndSpec``). + + .. versionadded:: 3.5 + .. c:function:: int PyUnstable_Module_SetGIL(PyObject *module, void *gil) Indicate that *module* does or does not support running without the global interpreter lock (GIL), using one of the values from :c:macro:`Py_mod_gil`. It must be called during *module*'s initialization - function. If this function is not called during module initialization, the + function when using :ref:`single-phase-initialization`. + If this function is not called during module initialization, the import machinery assumes the module does not support running without the GIL. This function is only available in Python builds configured with :option:`--disable-gil`. @@ -695,10 +671,11 @@ state: .. versionadded:: 3.13 -Module lookup -^^^^^^^^^^^^^ +Module lookup (single-phase initialization) +........................................... -Single-phase initialization creates singleton modules that can be looked up +The legacy :ref:`single-phase initialization ` +initialization scheme creates singleton modules that can be looked up in the context of the current interpreter. This allows the module object to be retrieved later with only a reference to the module definition. @@ -719,7 +696,8 @@ since multiple such modules can be created from a single definition. Only effective on modules created using single-phase initialization. - Python calls ``PyState_AddModule`` automatically after importing a module, + Python calls ``PyState_AddModule`` automatically after importing a module + that uses :ref:`single-phase initialization `, so it is unnecessary (but harmless) to call it from module initialization code. An explicit call is needed only if the module's own init code subsequently calls ``PyState_FindModule``. @@ -727,6 +705,9 @@ since multiple such modules can be created from a single definition. mechanisms (either by calling it directly, or by referring to its implementation for details of the required state updates). + If a module was attached previously using the same *def*, it is replaced + by the new *module*. + The caller must have an :term:`attached thread state`. Return ``-1`` with an exception set on error, ``0`` on success. diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 59b31ccf7bc471..f5f02f0a79c93d 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1489,9 +1489,6 @@ PyModule_SetDocString:int::: PyModule_SetDocString:PyObject*:module:0: PyModule_SetDocString:const char*:docstring:: -PyModuleDef_Init:PyObject*::0: -PyModuleDef_Init:PyModuleDef*:def:: - PyNumber_Absolute:PyObject*::+1: PyNumber_Absolute:PyObject*:o:0: diff --git a/Doc/extending/building.rst b/Doc/extending/building.rst index a58eb40d431c59..098dde39ea597e 100644 --- a/Doc/extending/building.rst +++ b/Doc/extending/building.rst @@ -6,41 +6,10 @@ Building C and C++ Extensions ***************************** -A C extension for CPython is a shared library (e.g. a ``.so`` file on Linux, -``.pyd`` on Windows), which exports an *initialization function*. +A C extension for CPython is a shared library (for example, a ``.so`` file on +Linux, ``.pyd`` on Windows), which exports an *initialization function*. -To be importable, the shared library must be available on :envvar:`PYTHONPATH`, -and must be named after the module name, with an appropriate extension. -When using setuptools, the correct filename is generated automatically. - -The initialization function has the signature: - -.. c:function:: PyObject* PyInit_modulename(void) - -It returns either a fully initialized module, or a :c:type:`PyModuleDef` -instance. See :ref:`initializing-modules` for details. - -.. highlight:: python - -For modules with ASCII-only names, the function must be named -:samp:`PyInit_{}`, with ```` replaced by the name of the module. -When using :ref:`multi-phase-initialization`, non-ASCII module names -are allowed. In this case, the initialization function name is -:samp:`PyInitU_{}`, with ```` encoded using Python's -*punycode* encoding with hyphens replaced by underscores. In Python:: - - def initfunc_name(name): - try: - suffix = b'_' + name.encode('ascii') - except UnicodeEncodeError: - suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') - return b'PyInit' + suffix - -It is possible to export multiple modules from a single shared library by -defining multiple initialization functions. However, importing them requires -using symbolic links or a custom importer, because by default only the -function corresponding to the filename is found. -See the *"Multiple modules in one library"* section in :pep:`489` for details. +See :ref:`extension-modules` for details. .. highlight:: c @@ -51,7 +20,11 @@ See the *"Multiple modules in one library"* section in :pep:`489` for details. Building C and C++ Extensions with setuptools ============================================= -Python 3.12 and newer no longer come with distutils. Please refer to the -``setuptools`` documentation at -https://setuptools.readthedocs.io/en/latest/setuptools.html -to learn more about how build and distribute C/C++ extensions with setuptools. + +Building, packaging and distributing extension modules is best done with +third-party tools, and is out of scope of this document. +One suitable tool is Setuptools, whose documentation can be found at +https://setuptools.pypa.io/en/latest/setuptools.html. + +The :mod:`distutils` module, which was included in the standard library +until Python 3.12, is now maintained as part of Setuptools. From afc5ab6cce9d7095b99c1410a6762bc4a96504dd Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 13 Jun 2025 16:04:43 +0100 Subject: [PATCH 555/989] gh-135455: Fix version and architecture detection in PC/layout script. (GH-135461) --- PC/layout/main.py | 28 +++++++++++++-------- PC/layout/support/arch.py | 34 +++++++++++++++++++++++++ PC/layout/support/constants.py | 45 ++++++++++++++++++++++++++++++++-- 3 files changed, 95 insertions(+), 12 deletions(-) create mode 100644 PC/layout/support/arch.py diff --git a/PC/layout/main.py b/PC/layout/main.py index 7324a135133b66..8543e7c56e1c41 100644 --- a/PC/layout/main.py +++ b/PC/layout/main.py @@ -247,9 +247,15 @@ def in_build(f, dest="", new_name=None, no_lib=False): if ns.include_freethreaded: yield from in_build("venvlaunchert.exe", "Lib/venv/scripts/nt/") yield from in_build("venvwlaunchert.exe", "Lib/venv/scripts/nt/") - else: + elif (VER_MAJOR, VER_MINOR) > (3, 12): yield from in_build("venvlauncher.exe", "Lib/venv/scripts/nt/") yield from in_build("venvwlauncher.exe", "Lib/venv/scripts/nt/") + else: + # Older versions of venv expected the scripts to be named 'python' + # and they were renamed at this stage. We need to replicate that + # when packaging older versions. + yield from in_build("venvlauncher.exe", "Lib/venv/scripts/nt/", "python") + yield from in_build("venvwlauncher.exe", "Lib/venv/scripts/nt/", "pythonw") if ns.include_tools: @@ -652,15 +658,6 @@ def main(): ns.doc_build = (Path.cwd() / ns.doc_build).resolve() if ns.include_cat and not ns.include_cat.is_absolute(): ns.include_cat = (Path.cwd() / ns.include_cat).resolve() - if not ns.arch: - # TODO: Calculate arch from files in ns.build instead - if sys.winver.endswith("-arm64"): - ns.arch = "arm64" - elif sys.winver.endswith("-32"): - ns.arch = "win32" - else: - ns.arch = "amd64" - if ns.zip and not ns.zip.is_absolute(): ns.zip = (Path.cwd() / ns.zip).resolve() if ns.catalog and not ns.catalog.is_absolute(): @@ -668,6 +665,17 @@ def main(): configure_logger(ns) + if not ns.arch: + from .support.arch import calculate_from_build_dir + ns.arch = calculate_from_build_dir(ns.build) + + expect = f"{VER_MAJOR}.{VER_MINOR}.{VER_MICRO}{VER_SUFFIX}" + actual = check_patchlevel_version(ns.source) + if actual and actual != expect: + log_error(f"Inferred version {expect} does not match {actual} from patchlevel.h. " + "You should set %PYTHONINCLUDE% or %PYTHON_HEXVERSION% before launching.") + return 5 + log_info( """OPTIONS Source: {ns.source} diff --git a/PC/layout/support/arch.py b/PC/layout/support/arch.py new file mode 100644 index 00000000000000..daf4efbc7ab674 --- /dev/null +++ b/PC/layout/support/arch.py @@ -0,0 +1,34 @@ +from struct import unpack +from .constants import * +from .logging import * + +def calculate_from_build_dir(root): + candidates = [ + root / PYTHON_DLL_NAME, + root / FREETHREADED_PYTHON_DLL_NAME, + *root.glob("*.dll"), + *root.glob("*.pyd"), + # Check EXE last because it's easier to have cross-platform EXE + *root.glob("*.exe"), + ] + + ARCHS = { + b"PE\0\0\x4c\x01": "win32", + b"PE\0\0\x64\x86": "amd64", + b"PE\0\0\x64\xAA": "arm64" + } + + first_exc = None + for pe in candidates: + try: + # Read the PE header to grab the machine type + with open(pe, "rb") as f: + f.seek(0x3C) + offset = int.from_bytes(f.read(4), "little") + f.seek(offset) + arch = ARCHS[f.read(6)] + except (FileNotFoundError, PermissionError, LookupError) as ex: + log_debug("Failed to open {}: {}", pe, ex) + continue + log_info("Inferred architecture {} from {}", arch, pe) + return arch diff --git a/PC/layout/support/constants.py b/PC/layout/support/constants.py index ae22aa16ebfa5d..6b8c915e519743 100644 --- a/PC/layout/support/constants.py +++ b/PC/layout/support/constants.py @@ -6,6 +6,8 @@ __version__ = "3.8" import os +import pathlib +import re import struct import sys @@ -13,9 +15,15 @@ def _unpack_hexversion(): try: hexversion = int(os.getenv("PYTHON_HEXVERSION"), 16) + return struct.pack(">i", hexversion) except (TypeError, ValueError): - hexversion = sys.hexversion - return struct.pack(">i", hexversion) + pass + if os.getenv("PYTHONINCLUDE"): + try: + return _read_patchlevel_version(pathlib.Path(os.getenv("PYTHONINCLUDE"))) + except OSError: + pass + return struct.pack(">i", sys.hexversion) def _get_suffix(field4): @@ -26,6 +34,39 @@ def _get_suffix(field4): return "" +def _read_patchlevel_version(sources): + if not sources.match("Include"): + sources /= "Include" + values = {} + with open(sources / "patchlevel.h", "r", encoding="utf-8") as f: + for line in f: + m = re.match(r'#\s*define\s+(PY_\S+?)\s+(\S+)', line.strip(), re.I) + if m and m.group(2): + v = m.group(2) + if v.startswith('"'): + v = v[1:-1] + else: + v = values.get(v, v) + if isinstance(v, str): + try: + v = int(v, 16 if v.startswith("0x") else 10) + except ValueError: + pass + values[m.group(1)] = v + return ( + values["PY_MAJOR_VERSION"], + values["PY_MINOR_VERSION"], + values["PY_MICRO_VERSION"], + values["PY_RELEASE_LEVEL"] << 4 | values["PY_RELEASE_SERIAL"], + ) + + +def check_patchlevel_version(sources): + got = _read_patchlevel_version(sources) + if got != (VER_MAJOR, VER_MINOR, VER_MICRO, VER_FIELD4): + return f"{got[0]}.{got[1]}.{got[2]}{_get_suffix(got[3])}" + + VER_MAJOR, VER_MINOR, VER_MICRO, VER_FIELD4 = _unpack_hexversion() VER_SUFFIX = _get_suffix(VER_FIELD4) VER_FIELD3 = VER_MICRO << 8 | VER_FIELD4 From 14c1d093d52d8de32c486808c1e1e85e0f61c11c Mon Sep 17 00:00:00 2001 From: Blaise Pabon Date: Fri, 13 Jun 2025 15:44:21 +0000 Subject: [PATCH 556/989] gh-106318: Add example for `str.expandtabs()` (#134525) --- Doc/library/stdtypes.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 0954e3409a2dcc..394c302fd354b9 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1891,12 +1891,15 @@ expression support in the :mod:`re` module). (``\n``) or return (``\r``), it is copied and the current column is reset to zero. Any other character is copied unchanged and the current column is incremented by one regardless of how the character is represented when - printed. + printed. For example:: >>> '01\t012\t0123\t01234'.expandtabs() '01 012 0123 01234' >>> '01\t012\t0123\t01234'.expandtabs(4) '01 012 0123 01234' + >>> print('01\t012\n0123\t01234'.expandtabs(4)) + 01 012 + 0123 01234 .. method:: str.find(sub[, start[, end]]) From 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 13 Jun 2025 19:57:48 +0300 Subject: [PATCH 557/989] gh-135462: Fix quadratic complexity in processing special input in HTMLParser (GH-135464) End-of-file errors are now handled according to the HTML5 specs -- comments and declarations are automatically closed, tags are ignored. --- Lib/html/parser.py | 41 +++++--- Lib/test/test_htmlparser.py | 97 +++++++++++++++---- ...-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4 + 3 files changed, 111 insertions(+), 31 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 1e30956fe24f83..ba416e7fa6e3fe 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -27,6 +27,7 @@ attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?') starttagopen = re.compile('<[a-zA-Z]') +endtagopen = re.compile('') commentclose = re.compile(r'--\s*>') # Note: @@ -195,7 +196,7 @@ def goahead(self, end): k = self.parse_pi(i) elif startswith("', i + 1) - if k < 0: - k = rawdata.find('<', i + 1) - if k < 0: - k = i + 1 - else: - k += 1 - if self.convert_charrefs and not self.cdata_elem: - self.handle_data(unescape(rawdata[i:k])) + if starttagopen.match(rawdata, i): # < + letter + pass + elif startswith("'), - ('comment', '/img'), - ('endtag', 'html<')]) + ('data', '\n')]) def test_starttag_junk_chars(self): + self._run_check("<", [('data', '<')]) + self._run_check("<>", [('data', '<>')]) + self._run_check("< >", [('data', '< >')]) + self._run_check("< ", [('data', '< ')]) self._run_check("", []) + self._run_check("<$>", [('data', '<$>')]) self._run_check("", [('comment', '$')]) self._run_check("", [('endtag', 'a')]) + self._run_check("", [('starttag', 'a", [('endtag', 'a'", [('data', "'", []) + self._run_check("", [('starttag', 'a$b', [])]) self._run_check("", [('startendtag', 'a$b', [])]) self._run_check("", [('starttag', 'a$b', [])]) self._run_check("", [('startendtag', 'a$b', [])]) + self._run_check("", [('endtag', 'a$b')]) def test_slashes_in_starttag(self): self._run_check('', [('startendtag', 'a', [('foo', 'var')])]) @@ -576,21 +583,50 @@ def test_EOF_in_charref(self): for html, expected in data: self._run_check(html, expected) - def test_EOF_in_comments_or_decls(self): + def test_eof_in_comments(self): data = [ - ('', [('comment', '-!>')]), + ('' '' '' @@ -604,6 +640,7 @@ def test_bogus_comments(self): '' # required '[' after CDATA ) expected = [ + ('comment', 'ELEMENT br EMPTY'), ('comment', ' not really a comment '), ('comment', ' not a comment either --'), ('comment', ' -- close enough --'), @@ -684,6 +721,26 @@ def test_convert_charrefs_dropped_text(self): ('endtag', 'a'), ('data', ' bar & baz')] ) + @support.requires_resource('cpu') + def test_eof_no_quadratic_complexity(self): + # Each of these examples used to take about an hour. + # Now they take a fraction of a second. + def check(source): + parser = html.parser.HTMLParser() + parser.feed(source) + parser.close() + n = 120_000 + check(" Date: Fri, 13 Jun 2025 16:45:21 -0600 Subject: [PATCH 558/989] gh-132775: Clean Up Cross-Interpreter Error Handling (gh-135369) In this refactor we: * move some code around * make a couple of typedefs opaque * decouple errors from session state * improve tracebacks for propagated exceptions This change helps simplify several upcoming changes. --- Include/internal/pycore_crossinterp.h | 37 +- Modules/_interpretersmodule.c | 90 ++-- Python/crossinterp.c | 673 ++++++++++++++++---------- Python/crossinterp_data_lookup.h | 27 ++ Python/crossinterp_exceptions.h | 7 - 5 files changed, 523 insertions(+), 311 deletions(-) diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index 713ddc66ba7382..81faffac194171 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -303,10 +303,10 @@ typedef struct _excinfo { const char *errdisplay; } _PyXI_excinfo; -PyAPI_FUNC(int) _PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc); +PyAPI_FUNC(_PyXI_excinfo *) _PyXI_NewExcInfo(PyObject *exc); +PyAPI_FUNC(void) _PyXI_FreeExcInfo(_PyXI_excinfo *info); PyAPI_FUNC(PyObject *) _PyXI_FormatExcInfo(_PyXI_excinfo *info); PyAPI_FUNC(PyObject *) _PyXI_ExcInfoAsObject(_PyXI_excinfo *info); -PyAPI_FUNC(void) _PyXI_ClearExcInfo(_PyXI_excinfo *info); typedef enum error_code { @@ -322,19 +322,20 @@ typedef enum error_code { _PyXI_ERR_NOT_SHAREABLE = -9, } _PyXI_errcode; +typedef struct xi_failure _PyXI_failure; -typedef struct _sharedexception { - // The originating interpreter. - PyInterpreterState *interp; - // The kind of error to propagate. - _PyXI_errcode code; - // The exception information to propagate, if applicable. - // This is populated only for some error codes, - // but always for _PyXI_ERR_UNCAUGHT_EXCEPTION. - _PyXI_excinfo uncaught; -} _PyXI_error; +PyAPI_FUNC(_PyXI_failure *) _PyXI_NewFailure(void); +PyAPI_FUNC(void) _PyXI_FreeFailure(_PyXI_failure *); +PyAPI_FUNC(_PyXI_errcode) _PyXI_GetFailureCode(_PyXI_failure *); +PyAPI_FUNC(int) _PyXI_InitFailure(_PyXI_failure *, _PyXI_errcode, PyObject *); +PyAPI_FUNC(void) _PyXI_InitFailureUTF8( + _PyXI_failure *, + _PyXI_errcode, + const char *); -PyAPI_FUNC(PyObject *) _PyXI_ApplyError(_PyXI_error *err); +PyAPI_FUNC(int) _PyXI_UnwrapNotShareableError( + PyThreadState *, + _PyXI_failure *); // A cross-interpreter session involves entering an interpreter @@ -366,19 +367,21 @@ PyAPI_FUNC(int) _PyXI_Enter( _PyXI_session_result *); PyAPI_FUNC(int) _PyXI_Exit( _PyXI_session *, - _PyXI_errcode, + _PyXI_failure *, _PyXI_session_result *); PyAPI_FUNC(PyObject *) _PyXI_GetMainNamespace( _PyXI_session *, - _PyXI_errcode *); + _PyXI_failure *); PyAPI_FUNC(int) _PyXI_Preserve( _PyXI_session *, const char *, PyObject *, - _PyXI_errcode *); -PyAPI_FUNC(PyObject *) _PyXI_GetPreserved(_PyXI_session_result *, const char *); + _PyXI_failure *); +PyAPI_FUNC(PyObject *) _PyXI_GetPreserved( + _PyXI_session_result *, + const char *); /*************/ diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 037e9544543c4d..fdfb3e6dd3482d 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -80,21 +80,11 @@ is_notshareable_raised(PyThreadState *tstate) } static void -unwrap_not_shareable(PyThreadState *tstate) +unwrap_not_shareable(PyThreadState *tstate, _PyXI_failure *failure) { - if (!is_notshareable_raised(tstate)) { - return; - } - PyObject *exc = _PyErr_GetRaisedException(tstate); - PyObject *cause = PyException_GetCause(exc); - if (cause != NULL) { - Py_DECREF(exc); - exc = cause; + if (_PyXI_UnwrapNotShareableError(tstate, failure) < 0) { + _PyErr_Clear(tstate); } - else { - assert(PyException_GetContext(exc) == NULL); - } - _PyErr_SetRaisedException(tstate, exc); } @@ -532,13 +522,30 @@ _interp_call_pack(PyThreadState *tstate, struct interp_call *call, return 0; } +static void +wrap_notshareable(PyThreadState *tstate, const char *label) +{ + if (!is_notshareable_raised(tstate)) { + return; + } + assert(label != NULL && strlen(label) > 0); + PyObject *cause = _PyErr_GetRaisedException(tstate); + _PyXIData_FormatNotShareableError(tstate, "%s not shareable", label); + PyObject *exc = _PyErr_GetRaisedException(tstate); + PyException_SetCause(exc, cause); + _PyErr_SetRaisedException(tstate, exc); +} + static int _interp_call_unpack(struct interp_call *call, PyObject **p_func, PyObject **p_args, PyObject **p_kwargs) { + PyThreadState *tstate = PyThreadState_Get(); + // Unpack the func. PyObject *func = _PyXIData_NewObject(call->func); if (func == NULL) { + wrap_notshareable(tstate, "func"); return -1; } // Unpack the args. @@ -553,6 +560,7 @@ _interp_call_unpack(struct interp_call *call, else { args = _PyXIData_NewObject(call->args); if (args == NULL) { + wrap_notshareable(tstate, "args"); Py_DECREF(func); return -1; } @@ -563,6 +571,7 @@ _interp_call_unpack(struct interp_call *call, if (call->kwargs != NULL) { kwargs = _PyXIData_NewObject(call->kwargs); if (kwargs == NULL) { + wrap_notshareable(tstate, "kwargs"); Py_DECREF(func); Py_DECREF(args); return -1; @@ -577,7 +586,7 @@ _interp_call_unpack(struct interp_call *call, static int _make_call(struct interp_call *call, - PyObject **p_result, _PyXI_errcode *p_errcode) + PyObject **p_result, _PyXI_failure *failure) { assert(call != NULL && call->func != NULL); PyThreadState *tstate = _PyThreadState_GET(); @@ -588,12 +597,10 @@ _make_call(struct interp_call *call, assert(func == NULL); assert(args == NULL); assert(kwargs == NULL); - *p_errcode = is_notshareable_raised(tstate) - ? _PyXI_ERR_NOT_SHAREABLE - : _PyXI_ERR_OTHER; + _PyXI_InitFailure(failure, _PyXI_ERR_OTHER, NULL); + unwrap_not_shareable(tstate, failure); return -1; } - *p_errcode = _PyXI_ERR_NO_ERROR; // Make the call. PyObject *resobj = PyObject_Call(func, args, kwargs); @@ -608,17 +615,17 @@ _make_call(struct interp_call *call, } static int -_run_script(_PyXIData_t *script, PyObject *ns, _PyXI_errcode *p_errcode) +_run_script(_PyXIData_t *script, PyObject *ns, _PyXI_failure *failure) { PyObject *code = _PyXIData_NewObject(script); if (code == NULL) { - *p_errcode = _PyXI_ERR_NOT_SHAREABLE; + _PyXI_InitFailure(failure, _PyXI_ERR_NOT_SHAREABLE, NULL); return -1; } PyObject *result = PyEval_EvalCode(code, ns, ns); Py_DECREF(code); if (result == NULL) { - *p_errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; + _PyXI_InitFailure(failure, _PyXI_ERR_UNCAUGHT_EXCEPTION, NULL); return -1; } assert(result == Py_None); @@ -644,8 +651,14 @@ _run_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, PyObject *shareables, struct run_result *runres) { assert(!_PyErr_Occurred(tstate)); + int res = -1; + _PyXI_failure *failure = _PyXI_NewFailure(); + if (failure == NULL) { + return -1; + } _PyXI_session *session = _PyXI_NewSession(); if (session == NULL) { + _PyXI_FreeFailure(failure); return -1; } _PyXI_session_result result = {0}; @@ -655,43 +668,44 @@ _run_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, // If an error occured at this step, it means that interp // was not prepared and switched. _PyXI_FreeSession(session); + _PyXI_FreeFailure(failure); assert(result.excinfo == NULL); return -1; } // Run in the interpreter. - int res = -1; - _PyXI_errcode errcode = _PyXI_ERR_NO_ERROR; if (script != NULL) { assert(call == NULL); - PyObject *mainns = _PyXI_GetMainNamespace(session, &errcode); + PyObject *mainns = _PyXI_GetMainNamespace(session, failure); if (mainns == NULL) { goto finally; } - res = _run_script(script, mainns, &errcode); + res = _run_script(script, mainns, failure); } else { assert(call != NULL); PyObject *resobj; - res = _make_call(call, &resobj, &errcode); + res = _make_call(call, &resobj, failure); if (res == 0) { - res = _PyXI_Preserve(session, "resobj", resobj, &errcode); + res = _PyXI_Preserve(session, "resobj", resobj, failure); Py_DECREF(resobj); if (res < 0) { goto finally; } } } - int exitres; finally: // Clean up and switch back. - exitres = _PyXI_Exit(session, errcode, &result); + (void)res; + int exitres = _PyXI_Exit(session, failure, &result); assert(res == 0 || exitres != 0); _PyXI_FreeSession(session); + _PyXI_FreeFailure(failure); res = exitres; if (_PyErr_Occurred(tstate)) { + // It's a directly propagated exception. assert(res < 0); } else if (res < 0) { @@ -1064,7 +1078,7 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) // Clean up and switch back. assert(!PyErr_Occurred()); - int res = _PyXI_Exit(session, _PyXI_ERR_NO_ERROR, NULL); + int res = _PyXI_Exit(session, NULL, NULL); _PyXI_FreeSession(session); assert(res == 0); if (res < 0) { @@ -1124,7 +1138,7 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) // global variables. They will be resolved against __main__. _PyXIData_t xidata = {0}; if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { - unwrap_not_shareable(tstate); + unwrap_not_shareable(tstate, NULL); return NULL; } @@ -1188,7 +1202,7 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) _PyXIData_t xidata = {0}; if (_PyCode_GetScriptXIData(tstate, script, &xidata) < 0) { - unwrap_not_shareable(tstate); + unwrap_not_shareable(tstate, NULL); return NULL; } @@ -1251,7 +1265,7 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) _PyXIData_t xidata = {0}; if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { - unwrap_not_shareable(tstate); + unwrap_not_shareable(tstate, NULL); return NULL; } @@ -1542,16 +1556,16 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) } PyObject *captured = NULL; - _PyXI_excinfo info = {0}; - if (_PyXI_InitExcInfo(&info, exc) < 0) { + _PyXI_excinfo *info = _PyXI_NewExcInfo(exc); + if (info == NULL) { goto finally; } - captured = _PyXI_ExcInfoAsObject(&info); + captured = _PyXI_ExcInfoAsObject(info); if (captured == NULL) { goto finally; } - PyObject *formatted = _PyXI_FormatExcInfo(&info); + PyObject *formatted = _PyXI_FormatExcInfo(info); if (formatted == NULL) { Py_CLEAR(captured); goto finally; @@ -1564,7 +1578,7 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) } finally: - _PyXI_ClearExcInfo(&info); + _PyXI_FreeExcInfo(info); if (exc != exc_arg) { if (PyErr_Occurred()) { PyErr_SetRaisedException(exc); diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 5e73ab28f2b663..39c7ea698904bd 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -1324,6 +1324,12 @@ _excinfo_normalize_type(struct _excinfo_type *info, *p_module = module; } +static int +excinfo_is_set(_PyXI_excinfo *info) +{ + return info->type.name != NULL || info->msg != NULL; +} + static void _PyXI_excinfo_clear(_PyXI_excinfo *info) { @@ -1485,6 +1491,11 @@ _PyXI_excinfo_Apply(_PyXI_excinfo *info, PyObject *exctype) if (tbexc == NULL) { PyErr_Clear(); } + else { + PyErr_SetObject(exctype, tbexc); + Py_DECREF(tbexc); + return; + } } PyObject *formatted = _PyXI_excinfo_format(info); @@ -1630,13 +1641,17 @@ _PyXI_excinfo_AsObject(_PyXI_excinfo *info) } -int -_PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc) +_PyXI_excinfo * +_PyXI_NewExcInfo(PyObject *exc) { assert(!PyErr_Occurred()); if (exc == NULL || exc == Py_None) { PyErr_SetString(PyExc_ValueError, "missing exc"); - return -1; + return NULL; + } + _PyXI_excinfo *info = PyMem_RawCalloc(1, sizeof(_PyXI_excinfo)); + if (info == NULL) { + return NULL; } const char *failure; if (PyExceptionInstance_Check(exc) || PyExceptionClass_Check(exc)) { @@ -1646,10 +1661,18 @@ _PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc) failure = _PyXI_excinfo_InitFromObject(info, exc); } if (failure != NULL) { - PyErr_SetString(PyExc_Exception, failure); - return -1; + PyMem_RawFree(info); + set_exc_with_cause(PyExc_Exception, failure); + return NULL; } - return 0; + return info; +} + +void +_PyXI_FreeExcInfo(_PyXI_excinfo *info) +{ + _PyXI_excinfo_clear(info); + PyMem_RawFree(info); } PyObject * @@ -1664,12 +1687,6 @@ _PyXI_ExcInfoAsObject(_PyXI_excinfo *info) return _PyXI_excinfo_AsObject(info); } -void -_PyXI_ClearExcInfo(_PyXI_excinfo *info) -{ - _PyXI_excinfo_clear(info); -} - /***************************/ /* short-term data sharing */ @@ -1727,70 +1744,267 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) return -1; } +/* basic failure info */ + +struct xi_failure { + // The kind of error to propagate. + _PyXI_errcode code; + // The propagated message. + const char *msg; + int msg_owned; +}; // _PyXI_failure + +#define XI_FAILURE_INIT (_PyXI_failure){ .code = _PyXI_ERR_NO_ERROR } + +static void +clear_xi_failure(_PyXI_failure *failure) +{ + if (failure->msg != NULL && failure->msg_owned) { + PyMem_RawFree((void*)failure->msg); + } + *failure = XI_FAILURE_INIT; +} + +static void +copy_xi_failure(_PyXI_failure *dest, _PyXI_failure *src) +{ + *dest = *src; + dest->msg_owned = 0; +} + +_PyXI_failure * +_PyXI_NewFailure(void) +{ + _PyXI_failure *failure = PyMem_RawMalloc(sizeof(_PyXI_failure)); + if (failure == NULL) { + PyErr_NoMemory(); + return NULL; + } + *failure = XI_FAILURE_INIT; + return failure; +} + +void +_PyXI_FreeFailure(_PyXI_failure *failure) +{ + clear_xi_failure(failure); + PyMem_RawFree(failure); +} + +_PyXI_errcode +_PyXI_GetFailureCode(_PyXI_failure *failure) +{ + if (failure == NULL) { + return _PyXI_ERR_NO_ERROR; + } + return failure->code; +} + +void +_PyXI_InitFailureUTF8(_PyXI_failure *failure, + _PyXI_errcode code, const char *msg) +{ + *failure = (_PyXI_failure){ + .code = code, + .msg = msg, + .msg_owned = 0, + }; +} + +int +_PyXI_InitFailure(_PyXI_failure *failure, _PyXI_errcode code, PyObject *obj) +{ + PyObject *msgobj = PyObject_Str(obj); + if (msgobj == NULL) { + return -1; + } + // This will leak if not paired with clear_xi_failure(). + // That happens automatically in _capture_current_exception(). + const char *msg = _copy_string_obj_raw(msgobj, NULL); + Py_DECREF(msgobj); + if (PyErr_Occurred()) { + return -1; + } + *failure = (_PyXI_failure){ + .code = code, + .msg = msg, + .msg_owned = 1, + }; + return 0; +} + /* shared exceptions */ -static const char * -_PyXI_InitError(_PyXI_error *error, PyObject *excobj, _PyXI_errcode code) +typedef struct { + // The originating interpreter. + PyInterpreterState *interp; + // The error to propagate, if different from the uncaught exception. + _PyXI_failure *override; + _PyXI_failure _override; + // The exception information to propagate, if applicable. + // This is populated only for some error codes, + // but always for _PyXI_ERR_UNCAUGHT_EXCEPTION. + _PyXI_excinfo uncaught; +} _PyXI_error; + +static void +xi_error_clear(_PyXI_error *err) { - if (error->interp == NULL) { - error->interp = PyInterpreterState_Get(); + err->interp = NULL; + if (err->override != NULL) { + clear_xi_failure(err->override); } + _PyXI_excinfo_clear(&err->uncaught); +} - const char *failure = NULL; - if (code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { - // There is an unhandled exception we need to propagate. - failure = _PyXI_excinfo_InitFromException(&error->uncaught, excobj); - if (failure != NULL) { - // We failed to initialize error->uncaught. - // XXX Print the excobj/traceback? Emit a warning? - // XXX Print the current exception/traceback? - if (PyErr_ExceptionMatches(PyExc_MemoryError)) { - error->code = _PyXI_ERR_NO_MEMORY; - } - else { - error->code = _PyXI_ERR_OTHER; - } - PyErr_Clear(); +static int +xi_error_is_set(_PyXI_error *error) +{ + if (error->override != NULL) { + assert(error->override->code != _PyXI_ERR_NO_ERROR); + assert(error->override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION + || excinfo_is_set(&error->uncaught)); + return 1; + } + return excinfo_is_set(&error->uncaught); +} + +static int +xi_error_has_override(_PyXI_error *err) +{ + if (err->override == NULL) { + return 0; + } + return (err->override->code != _PyXI_ERR_NO_ERROR + && err->override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION); +} + +static PyObject * +xi_error_resolve_current_exc(PyThreadState *tstate, + _PyXI_failure *override) +{ + assert(override == NULL || override->code != _PyXI_ERR_NO_ERROR); + + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (exc == NULL) { + assert(override == NULL + || override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION); + } + else if (override == NULL) { + // This is equivalent to _PyXI_ERR_UNCAUGHT_EXCEPTION. + } + else if (override->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { + // We want to actually capture the current exception. + } + else if (exc != NULL) { + // It might make sense to do similarly for other codes. + if (override->code == _PyXI_ERR_ALREADY_RUNNING) { + // We don't need the exception info. + Py_CLEAR(exc); + } + // ...else we want to actually capture the current exception. + } + return exc; +} + +static void +xi_error_set_override(PyThreadState *tstate, _PyXI_error *err, + _PyXI_failure *override) +{ + assert(err->override == NULL); + assert(override != NULL); + assert(override->code != _PyXI_ERR_NO_ERROR); + // Use xi_error_set_exc() instead of setting _PyXI_ERR_UNCAUGHT_EXCEPTION.. + assert(override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION); + err->override = &err->_override; + // The caller still owns override->msg. + copy_xi_failure(&err->_override, override); + err->interp = tstate->interp; +} + +static void +xi_error_set_override_code(PyThreadState *tstate, _PyXI_error *err, + _PyXI_errcode code) +{ + _PyXI_failure override = XI_FAILURE_INIT; + override.code = code; + xi_error_set_override(tstate, err, &override); +} + +static const char * +xi_error_set_exc(PyThreadState *tstate, _PyXI_error *err, PyObject *exc) +{ + assert(!_PyErr_Occurred(tstate)); + assert(!xi_error_is_set(err)); + assert(err->override == NULL); + assert(err->interp == NULL); + assert(exc != NULL); + const char *failure = + _PyXI_excinfo_InitFromException(&err->uncaught, exc); + if (failure != NULL) { + // We failed to initialize err->uncaught. + // XXX Print the excobj/traceback? Emit a warning? + // XXX Print the current exception/traceback? + if (_PyErr_ExceptionMatches(tstate, PyExc_MemoryError)) { + xi_error_set_override_code(tstate, err, _PyXI_ERR_NO_MEMORY); } else { - error->code = code; + xi_error_set_override_code(tstate, err, _PyXI_ERR_OTHER); } - assert(error->code != _PyXI_ERR_NO_ERROR); - } - else { - // There is an error code we need to propagate. - assert(excobj == NULL); - assert(code != _PyXI_ERR_NO_ERROR); - error->code = code; - _PyXI_excinfo_clear(&error->uncaught); + PyErr_Clear(); } return failure; } -PyObject * -_PyXI_ApplyError(_PyXI_error *error) +static PyObject * +_PyXI_ApplyError(_PyXI_error *error, const char *failure) { PyThreadState *tstate = PyThreadState_Get(); - if (error->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { + + if (failure != NULL) { + xi_error_clear(error); + return NULL; + } + + _PyXI_errcode code = _PyXI_ERR_UNCAUGHT_EXCEPTION; + if (error->override != NULL) { + code = error->override->code; + assert(code != _PyXI_ERR_NO_ERROR); + } + + if (code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { // We will raise an exception that proxies the propagated exception. return _PyXI_excinfo_AsObject(&error->uncaught); } - else if (error->code == _PyXI_ERR_NOT_SHAREABLE) { + else if (code == _PyXI_ERR_NOT_SHAREABLE) { // Propagate the exception directly. assert(!_PyErr_Occurred(tstate)); - _set_xid_lookup_failure(tstate, NULL, error->uncaught.msg, NULL); + PyObject *cause = NULL; + if (excinfo_is_set(&error->uncaught)) { + // Maybe instead set a PyExc_ExceptionSnapshot as __cause__? + // That type doesn't exist currently + // but would look like interpreters.ExecutionFailed. + _PyXI_excinfo_Apply(&error->uncaught, PyExc_Exception); + cause = _PyErr_GetRaisedException(tstate); + } + const char *msg = error->override != NULL + ? error->override->msg + : error->uncaught.msg; + _set_xid_lookup_failure(tstate, NULL, msg, cause); + Py_XDECREF(cause); } else { // Raise an exception corresponding to the code. - assert(error->code != _PyXI_ERR_NO_ERROR); - (void)_PyXI_ApplyErrorCode(error->code, error->interp); - if (error->uncaught.type.name != NULL || error->uncaught.msg != NULL) { + (void)_PyXI_ApplyErrorCode(code, error->interp); + assert(error->override == NULL || error->override->msg == NULL); + if (excinfo_is_set(&error->uncaught)) { // __context__ will be set to a proxy of the propagated exception. - PyObject *exc = PyErr_GetRaisedException(); + // (or use PyExc_ExceptionSnapshot like _PyXI_ERR_NOT_SHAREABLE?) + PyObject *exc = _PyErr_GetRaisedException(tstate); _PyXI_excinfo_Apply(&error->uncaught, PyExc_InterpreterError); - PyObject *exc2 = PyErr_GetRaisedException(); + PyObject *exc2 = _PyErr_GetRaisedException(tstate); PyException_SetContext(exc, exc2); - PyErr_SetRaisedException(exc); + _PyErr_SetRaisedException(tstate, exc); } } assert(PyErr_Occurred()); @@ -2164,20 +2378,22 @@ _create_sharedns(PyObject *names) return NULL; } -static void _propagate_not_shareable_error(_PyXI_errcode *); +static void _propagate_not_shareable_error(PyThreadState *, + _PyXI_failure *); static int _fill_sharedns(_PyXI_namespace *ns, PyObject *nsobj, - xidata_fallback_t fallback, _PyXI_errcode *p_errcode) + xidata_fallback_t fallback, _PyXI_failure *p_err) { // All items are expected to be shareable. assert(_sharedns_check_counts(ns)); assert(ns->numnames == ns->maxitems); assert(ns->numvalues == 0); + PyThreadState *tstate = PyThreadState_Get(); for (Py_ssize_t i=0; i < ns->maxitems; i++) { if (_sharednsitem_copy_from_ns(&ns->items[i], nsobj, fallback) < 0) { - if (p_errcode != NULL) { - _propagate_not_shareable_error(p_errcode); + if (p_err != NULL) { + _propagate_not_shareable_error(tstate, p_err); } // Clear out the ones we set so far. for (Py_ssize_t j=0; j < i; j++) { @@ -2244,18 +2460,6 @@ _apply_sharedns(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) /* switched-interpreter sessions */ /*********************************/ -struct xi_session_error { - // This is set if the interpreter is entered and raised an exception - // that needs to be handled in some special way during exit. - _PyXI_errcode *override; - // This is set if exit captured an exception to propagate. - _PyXI_error *info; - - // -- pre-allocated memory -- - _PyXI_error _info; - _PyXI_errcode _override; -}; - struct xi_session { #define SESSION_UNUSED 0 #define SESSION_ACTIVE 1 @@ -2288,8 +2492,6 @@ struct xi_session { // once the session exits. Do not access this directly; use // _PyXI_Preserve() and _PyXI_GetPreserved() instead; PyObject *_preserved; - - struct xi_session_error error; }; _PyXI_session * @@ -2317,26 +2519,6 @@ _session_is_active(_PyXI_session *session) return session->status == SESSION_ACTIVE; } -static int -_session_pop_error(_PyXI_session *session, struct xi_session_error *err) -{ - if (session->error.info == NULL) { - assert(session->error.override == NULL); - *err = (struct xi_session_error){0}; - return 0; - } - *err = session->error; - err->info = &err->_info; - if (err->override != NULL) { - err->override = &err->_override; - } - session->error = (struct xi_session_error){0}; - return 1; -} - -static int _ensure_main_ns(_PyXI_session *, _PyXI_errcode *); -static inline void _session_set_error(_PyXI_session *, _PyXI_errcode); - /* enter/exit a cross-interpreter session */ @@ -2351,10 +2533,6 @@ _enter_session(_PyXI_session *session, PyInterpreterState *interp) // Set elsewhere and cleared in _exit_session(). assert(!session->running); assert(session->main_ns == NULL); - // Set elsewhere and cleared in _capture_current_exception(). - assert(session->error.override == NULL); - // Set elsewhere and cleared in _PyXI_Exit(). - assert(session->error.info == NULL); // Switch to interpreter. PyThreadState *tstate = PyThreadState_Get(); @@ -2409,17 +2587,14 @@ _exit_session(_PyXI_session *session) assert(!session->own_init_tstate); } - assert(session->error.info == NULL); - assert(session->error.override == _PyXI_ERR_NO_ERROR); - *session = (_PyXI_session){0}; } static void -_propagate_not_shareable_error(_PyXI_errcode *p_errcode) +_propagate_not_shareable_error(PyThreadState *tstate, + _PyXI_failure *override) { - assert(p_errcode != NULL); - PyThreadState *tstate = PyThreadState_Get(); + assert(override != NULL); PyObject *exctype = get_notshareableerror_type(tstate); if (exctype == NULL) { PyErr_FormatUnraisable( @@ -2428,15 +2603,24 @@ _propagate_not_shareable_error(_PyXI_errcode *p_errcode) } if (PyErr_ExceptionMatches(exctype)) { // We want to propagate the exception directly. - *p_errcode = _PyXI_ERR_NOT_SHAREABLE; + *override = (_PyXI_failure){ + .code = _PyXI_ERR_NOT_SHAREABLE, + }; } } + +static int _ensure_main_ns(_PyXI_session *, _PyXI_failure *); +static const char * capture_session_error(_PyXI_session *, _PyXI_error *, + _PyXI_failure *); + int _PyXI_Enter(_PyXI_session *session, PyInterpreterState *interp, PyObject *nsupdates, _PyXI_session_result *result) { + PyThreadState *tstate = _PyThreadState_GET(); + // Convert the attrs for cross-interpreter use. _PyXI_namespace *sharedns = NULL; if (nsupdates != NULL) { @@ -2457,16 +2641,16 @@ _PyXI_Enter(_PyXI_session *session, } // For now we limit it to shareable objects. xidata_fallback_t fallback = _PyXIDATA_XIDATA_ONLY; - _PyXI_errcode errcode = _PyXI_ERR_NO_ERROR; - if (_fill_sharedns(sharedns, nsupdates, fallback, &errcode) < 0) { - assert(PyErr_Occurred()); - assert(session->error.info == NULL); - if (errcode == _PyXI_ERR_NO_ERROR) { - errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; + _PyXI_failure _err = XI_FAILURE_INIT; + if (_fill_sharedns(sharedns, nsupdates, fallback, &_err) < 0) { + assert(_PyErr_Occurred(tstate)); + if (_err.code == _PyXI_ERR_NO_ERROR) { + _err.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; } _destroy_sharedns(sharedns); if (result != NULL) { - result->errcode = errcode; + assert(_err.msg == NULL); + result->errcode = _err.code; } return -1; } @@ -2475,52 +2659,53 @@ _PyXI_Enter(_PyXI_session *session, // Switch to the requested interpreter (if necessary). _enter_session(session, interp); - _PyXI_errcode errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; + _PyXI_failure override = XI_FAILURE_INIT; + override.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; + tstate = _PyThreadState_GET(); // Ensure this thread owns __main__. if (_PyInterpreterState_SetRunningMain(interp) < 0) { // In the case where we didn't switch interpreters, it would // be more efficient to leave the exception in place and return // immediately. However, life is simpler if we don't. - errcode = _PyXI_ERR_ALREADY_RUNNING; + override.code = _PyXI_ERR_ALREADY_RUNNING; goto error; } session->running = 1; // Apply the cross-interpreter data. if (sharedns != NULL) { - if (_ensure_main_ns(session, &errcode) < 0) { + if (_ensure_main_ns(session, &override) < 0) { goto error; } if (_apply_sharedns(sharedns, session->main_ns, NULL) < 0) { - errcode = _PyXI_ERR_APPLY_NS_FAILURE; + override.code = _PyXI_ERR_APPLY_NS_FAILURE; goto error; } _destroy_sharedns(sharedns); } - errcode = _PyXI_ERR_NO_ERROR; - assert(!PyErr_Occurred()); + override.code = _PyXI_ERR_NO_ERROR; + assert(!_PyErr_Occurred(tstate)); return 0; error: // We want to propagate all exceptions here directly (best effort). - assert(errcode != _PyXI_ERR_NO_ERROR); - _session_set_error(session, errcode); - assert(!PyErr_Occurred()); + assert(override.code != _PyXI_ERR_NO_ERROR); + _PyXI_error err = {0}; + const char *failure = capture_session_error(session, &err, &override); // Exit the session. - struct xi_session_error err; - (void)_session_pop_error(session, &err); _exit_session(session); + tstate = _PyThreadState_GET(); if (sharedns != NULL) { _destroy_sharedns(sharedns); } // Apply the error from the other interpreter. - PyObject *excinfo = _PyXI_ApplyError(err.info); - _PyXI_excinfo_clear(&err.info->uncaught); + PyObject *excinfo = _PyXI_ApplyError(&err, failure); + xi_error_clear(&err); if (excinfo != NULL) { if (result != NULL) { result->excinfo = excinfo; @@ -2529,84 +2714,100 @@ _PyXI_Enter(_PyXI_session *session, #ifdef Py_DEBUG fprintf(stderr, "_PyXI_Enter(): uncaught exception discarded"); #endif + Py_DECREF(excinfo); } } - assert(PyErr_Occurred()); + assert(_PyErr_Occurred(tstate)); return -1; } static int _pop_preserved(_PyXI_session *, _PyXI_namespace **, PyObject **, - _PyXI_errcode *); + _PyXI_failure *); static int _finish_preserved(_PyXI_namespace *, PyObject **); int -_PyXI_Exit(_PyXI_session *session, _PyXI_errcode errcode, +_PyXI_Exit(_PyXI_session *session, _PyXI_failure *override, _PyXI_session_result *result) { + PyThreadState *tstate = _PyThreadState_GET(); int res = 0; // Capture the raised exception, if any. - assert(session->error.info == NULL); - if (PyErr_Occurred()) { - _session_set_error(session, errcode); - assert(!PyErr_Occurred()); + _PyXI_error err = {0}; + const char *failure = NULL; + if (override != NULL && override->code == _PyXI_ERR_NO_ERROR) { + assert(override->msg == NULL); + override = NULL; + } + if (_PyErr_Occurred(tstate)) { + failure = capture_session_error(session, &err, override); } else { - assert(errcode == _PyXI_ERR_NO_ERROR); - assert(session->error.override == NULL); + assert(override == NULL); } // Capture the preserved namespace. _PyXI_namespace *preserved = NULL; PyObject *preservedobj = NULL; if (result != NULL) { - errcode = _PyXI_ERR_NO_ERROR; - if (_pop_preserved(session, &preserved, &preservedobj, &errcode) < 0) { - if (session->error.info != NULL) { + assert(!_PyErr_Occurred(tstate)); + _PyXI_failure _override = XI_FAILURE_INIT; + if (_pop_preserved( + session, &preserved, &preservedobj, &_override) < 0) + { + assert(preserved == NULL); + assert(preservedobj == NULL); + if (xi_error_is_set(&err)) { // XXX Chain the exception (i.e. set __context__)? PyErr_FormatUnraisable( "Exception ignored while capturing preserved objects"); + clear_xi_failure(&_override); } else { - _session_set_error(session, errcode); + if (_override.code == _PyXI_ERR_NO_ERROR) { + _override.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; + } + failure = capture_session_error(session, &err, &_override); } } } // Exit the session. - struct xi_session_error err; - (void)_session_pop_error(session, &err); + assert(!_PyErr_Occurred(tstate)); _exit_session(session); + tstate = _PyThreadState_GET(); // Restore the preserved namespace. assert(preserved == NULL || preservedobj == NULL); if (_finish_preserved(preserved, &preservedobj) < 0) { assert(preservedobj == NULL); - if (err.info != NULL) { + if (xi_error_is_set(&err)) { // XXX Chain the exception (i.e. set __context__)? PyErr_FormatUnraisable( "Exception ignored while capturing preserved objects"); } else { - errcode = _PyXI_ERR_PRESERVE_FAILURE; - _propagate_not_shareable_error(&errcode); + xi_error_set_override_code( + tstate, &err, _PyXI_ERR_PRESERVE_FAILURE); + _propagate_not_shareable_error(tstate, err.override); } } if (result != NULL) { result->preserved = preservedobj; - result->errcode = errcode; + result->errcode = err.override != NULL + ? err.override->code + : _PyXI_ERR_NO_ERROR; } // Apply the error from the other interpreter, if any. - if (err.info != NULL) { + if (xi_error_is_set(&err)) { res = -1; - assert(!PyErr_Occurred()); - PyObject *excinfo = _PyXI_ApplyError(err.info); - _PyXI_excinfo_clear(&err.info->uncaught); + assert(!_PyErr_Occurred(tstate)); + PyObject *excinfo = _PyXI_ApplyError(&err, failure); if (excinfo == NULL) { - assert(PyErr_Occurred()); - if (result != NULL) { + assert(_PyErr_Occurred(tstate)); + if (result != NULL && !xi_error_has_override(&err)) { _PyXI_ClearResult(result); *result = (_PyXI_session_result){ .errcode = _PyXI_ERR_EXC_PROPAGATION_FAILURE, @@ -2620,7 +2821,9 @@ _PyXI_Exit(_PyXI_session *session, _PyXI_errcode errcode, #ifdef Py_DEBUG fprintf(stderr, "_PyXI_Exit(): uncaught exception discarded"); #endif + Py_DECREF(excinfo); } + xi_error_clear(&err); } return res; } @@ -2628,106 +2831,72 @@ _PyXI_Exit(_PyXI_session *session, _PyXI_errcode errcode, /* in an active cross-interpreter session */ -static void -_capture_current_exception(_PyXI_session *session) +static const char * +capture_session_error(_PyXI_session *session, _PyXI_error *err, + _PyXI_failure *override) { - assert(session->error.info == NULL); - if (!PyErr_Occurred()) { - assert(session->error.override == NULL); - return; - } - - // Handle the exception override. - _PyXI_errcode *override = session->error.override; - session->error.override = NULL; - _PyXI_errcode errcode = override != NULL - ? *override - : _PyXI_ERR_UNCAUGHT_EXCEPTION; + assert(_session_is_active(session)); + assert(!xi_error_is_set(err)); + PyThreadState *tstate = session->init_tstate; - // Pop the exception object. - PyObject *excval = NULL; - if (errcode == _PyXI_ERR_UNCAUGHT_EXCEPTION) { - // We want to actually capture the current exception. - excval = PyErr_GetRaisedException(); - } - else if (errcode == _PyXI_ERR_ALREADY_RUNNING) { - // We don't need the exception info. - PyErr_Clear(); - } - else { - // We could do a variety of things here, depending on errcode. - // However, for now we simply capture the exception and save - // the errcode. - excval = PyErr_GetRaisedException(); + // Normalize the exception override. + if (override != NULL) { + if (override->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { + assert(override->msg == NULL); + override = NULL; + } + else { + assert(override->code != _PyXI_ERR_NO_ERROR); + } } - // Capture the exception. - _PyXI_error *err = &session->error._info; - *err = (_PyXI_error){ - .interp = session->init_tstate->interp, - }; - const char *failure; - if (excval == NULL) { - failure = _PyXI_InitError(err, NULL, errcode); - } - else { - failure = _PyXI_InitError(err, excval, _PyXI_ERR_UNCAUGHT_EXCEPTION); - Py_DECREF(excval); - if (failure == NULL && override != NULL) { - err->code = errcode; + // Handle the exception, if any. + const char *failure = NULL; + PyObject *exc = xi_error_resolve_current_exc(tstate, override); + if (exc != NULL) { + // There is an unhandled exception we need to preserve. + failure = xi_error_set_exc(tstate, err, exc); + Py_DECREF(exc); + if (_PyErr_Occurred(tstate)) { + PyErr_FormatUnraisable(failure); } } - // Handle capture failure. - if (failure != NULL) { - // XXX Make this error message more generic. - fprintf(stderr, - "RunFailedError: script raised an uncaught exception (%s)", - failure); - err = NULL; + // Handle the override. + if (override != NULL && failure == NULL) { + xi_error_set_override(tstate, err, override); } // Finished! - assert(!PyErr_Occurred()); - session->error.info = err; -} - -static inline void -_session_set_error(_PyXI_session *session, _PyXI_errcode errcode) -{ - assert(_session_is_active(session)); - assert(PyErr_Occurred()); - if (errcode == _PyXI_ERR_NO_ERROR) { - // We're a bit forgiving here. - errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; - } - if (errcode != _PyXI_ERR_UNCAUGHT_EXCEPTION) { - session->error._override = errcode; - session->error.override = &session->error._override; - } - _capture_current_exception(session); + assert(!_PyErr_Occurred(tstate)); + return failure; } static int -_ensure_main_ns(_PyXI_session *session, _PyXI_errcode *p_errcode) +_ensure_main_ns(_PyXI_session *session, _PyXI_failure *failure) { assert(_session_is_active(session)); + PyThreadState *tstate = session->init_tstate; if (session->main_ns != NULL) { return 0; } // Cache __main__.__dict__. - PyObject *main_mod = _Py_GetMainModule(session->init_tstate); + PyObject *main_mod = _Py_GetMainModule(tstate); if (_Py_CheckMainModule(main_mod) < 0) { - if (p_errcode != NULL) { - *p_errcode = _PyXI_ERR_MAIN_NS_FAILURE; + if (failure != NULL) { + *failure = (_PyXI_failure){ + .code = _PyXI_ERR_MAIN_NS_FAILURE, + }; } return -1; } PyObject *ns = PyModule_GetDict(main_mod); // borrowed Py_DECREF(main_mod); if (ns == NULL) { - if (p_errcode != NULL) { - *p_errcode = _PyXI_ERR_MAIN_NS_FAILURE; + if (failure != NULL) { + *failure = (_PyXI_failure){ + .code = _PyXI_ERR_MAIN_NS_FAILURE, + }; } return -1; } @@ -2736,13 +2905,13 @@ _ensure_main_ns(_PyXI_session *session, _PyXI_errcode *p_errcode) } PyObject * -_PyXI_GetMainNamespace(_PyXI_session *session, _PyXI_errcode *p_errcode) +_PyXI_GetMainNamespace(_PyXI_session *session, _PyXI_failure *failure) { if (!_session_is_active(session)) { PyErr_SetString(PyExc_RuntimeError, "session not active"); return NULL; } - if (_ensure_main_ns(session, p_errcode) < 0) { + if (_ensure_main_ns(session, failure) < 0) { return NULL; } return session->main_ns; @@ -2752,9 +2921,12 @@ _PyXI_GetMainNamespace(_PyXI_session *session, _PyXI_errcode *p_errcode) static int _pop_preserved(_PyXI_session *session, _PyXI_namespace **p_xidata, PyObject **p_obj, - _PyXI_errcode *p_errcode) + _PyXI_failure *p_failure) { + _PyXI_failure failure = XI_FAILURE_INIT; + _PyXI_namespace *xidata = NULL; assert(_PyThreadState_GET() == session->init_tstate); // active session + if (session->_preserved == NULL) { *p_xidata = NULL; *p_obj = NULL; @@ -2772,10 +2944,8 @@ _pop_preserved(_PyXI_session *session, // We did switch interpreters. Py_ssize_t len = PyDict_Size(session->_preserved); if (len < 0) { - if (p_errcode != NULL) { - *p_errcode = _PyXI_ERR_PRESERVE_FAILURE; - } - return -1; + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; } else if (len == 0) { *p_xidata = NULL; @@ -2783,29 +2953,31 @@ _pop_preserved(_PyXI_session *session, else { _PyXI_namespace *xidata = _create_sharedns(session->_preserved); if (xidata == NULL) { - if (p_errcode != NULL) { - *p_errcode = _PyXI_ERR_PRESERVE_FAILURE; - } - return -1; + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; } - _PyXI_errcode errcode = _PyXI_ERR_NO_ERROR; if (_fill_sharedns(xidata, session->_preserved, - _PyXIDATA_FULL_FALLBACK, &errcode) < 0) + _PyXIDATA_FULL_FALLBACK, &failure) < 0) { - assert(session->error.info == NULL); - if (errcode != _PyXI_ERR_NOT_SHAREABLE) { - errcode = _PyXI_ERR_PRESERVE_FAILURE; + if (failure.code != _PyXI_ERR_NOT_SHAREABLE) { + assert(failure.msg != NULL); + failure.code = _PyXI_ERR_PRESERVE_FAILURE; } - if (p_errcode != NULL) { - *p_errcode = errcode; - } - _destroy_sharedns(xidata); - return -1; + goto error; } *p_xidata = xidata; } Py_CLEAR(session->_preserved); return 0; + +error: + if (p_failure != NULL) { + *p_failure = failure; + } + if (xidata != NULL) { + _destroy_sharedns(xidata); + } + return -1; } static int @@ -2835,8 +3007,9 @@ _finish_preserved(_PyXI_namespace *xidata, PyObject **p_preserved) int _PyXI_Preserve(_PyXI_session *session, const char *name, PyObject *value, - _PyXI_errcode *p_errcode) + _PyXI_failure *p_failure) { + _PyXI_failure failure = XI_FAILURE_INIT; if (!_session_is_active(session)) { PyErr_SetString(PyExc_RuntimeError, "session not active"); return -1; @@ -2846,20 +3019,22 @@ _PyXI_Preserve(_PyXI_session *session, const char *name, PyObject *value, if (session->_preserved == NULL) { set_exc_with_cause(PyExc_RuntimeError, "failed to initialize preserved objects"); - if (p_errcode != NULL) { - *p_errcode = _PyXI_ERR_PRESERVE_FAILURE; - } - return -1; + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; } } if (PyDict_SetItemString(session->_preserved, name, value) < 0) { set_exc_with_cause(PyExc_RuntimeError, "failed to preserve object"); - if (p_errcode != NULL) { - *p_errcode = _PyXI_ERR_PRESERVE_FAILURE; - } - return -1; + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; } return 0; + +error: + if (p_failure != NULL) { + *p_failure = failure; + } + return -1; } PyObject * diff --git a/Python/crossinterp_data_lookup.h b/Python/crossinterp_data_lookup.h index b16f38b847fc66..6d0b93eb82ac1e 100644 --- a/Python/crossinterp_data_lookup.h +++ b/Python/crossinterp_data_lookup.h @@ -88,6 +88,33 @@ _PyXIData_FormatNotShareableError(PyThreadState *tstate, va_end(vargs); } +int +_PyXI_UnwrapNotShareableError(PyThreadState * tstate, _PyXI_failure *failure) +{ + PyObject *exctype = get_notshareableerror_type(tstate); + assert(exctype != NULL); + if (!_PyErr_ExceptionMatches(tstate, exctype)) { + return -1; + } + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (failure != NULL) { + _PyXI_errcode code = _PyXI_ERR_NOT_SHAREABLE; + if (_PyXI_InitFailure(failure, code, exc) < 0) { + return -1; + } + } + PyObject *cause = PyException_GetCause(exc); + if (cause != NULL) { + Py_DECREF(exc); + exc = cause; + } + else { + assert(PyException_GetContext(exc) == NULL); + } + _PyErr_SetRaisedException(tstate, exc); + return 0; +} + _PyXIData_getdata_t _PyXIData_Lookup(PyThreadState *tstate, PyObject *obj) diff --git a/Python/crossinterp_exceptions.h b/Python/crossinterp_exceptions.h index 12cd61db1b6762..98411adc5eb3f6 100644 --- a/Python/crossinterp_exceptions.h +++ b/Python/crossinterp_exceptions.h @@ -7,13 +7,6 @@ _ensure_current_cause(PyThreadState *tstate, PyObject *cause) } PyObject *exc = _PyErr_GetRaisedException(tstate); assert(exc != NULL); - PyObject *ctx = PyException_GetContext(exc); - if (ctx == NULL) { - PyException_SetContext(exc, Py_NewRef(cause)); - } - else { - Py_DECREF(ctx); - } assert(PyException_GetCause(exc) == NULL); PyException_SetCause(exc, Py_NewRef(cause)); _PyErr_SetRaisedException(tstate, exc); From 56eabea056ae1da49b55e0f794c9957008f8d20a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 13 Jun 2025 16:47:49 -0600 Subject: [PATCH 559/989] gh-135437: Account For Duplicate Names in _PyCode_SetUnboundVarCounts() (gh-135438) --- Lib/test/_code_definitions.py | 10 ++++++++++ Lib/test/test_code.py | 5 +++++ Objects/codeobject.c | 30 +++++++++++++++++++++++++----- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/Lib/test/_code_definitions.py b/Lib/test/_code_definitions.py index 274beb65a6d0f4..70c44da2ec6382 100644 --- a/Lib/test/_code_definitions.py +++ b/Lib/test/_code_definitions.py @@ -57,6 +57,13 @@ def spam_with_globals_and_builtins(): print(res) +def spam_with_global_and_attr_same_name(): + try: + spam_minimal.spam_minimal + except AttributeError: + pass + + def spam_full_args(a, b, /, c, d, *args, e, f, **kwargs): return (a, b, c, d, e, f, args, kwargs) @@ -190,6 +197,7 @@ def ham_C_closure(z): spam_minimal, spam_with_builtins, spam_with_globals_and_builtins, + spam_with_global_and_attr_same_name, spam_full_args, spam_full_args_with_defaults, spam_args_attrs_and_builtins, @@ -258,6 +266,7 @@ def ham_C_closure(z): script_with_globals, spam_full_args_with_defaults, spam_with_globals_and_builtins, + spam_with_global_and_attr_same_name, spam_full, ] @@ -275,6 +284,7 @@ def ham_C_closure(z): *PURE_SCRIPT_FUNCTIONS, script_with_globals, spam_with_globals_and_builtins, + spam_with_global_and_attr_same_name, ] diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 9fc2b047bef719..655f5a9be7fa31 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -701,6 +701,7 @@ def test_local_kinds(self): 'checks': CO_FAST_LOCAL, 'res': CO_FAST_LOCAL, }, + defs.spam_with_global_and_attr_same_name: {}, defs.spam_full_args: { 'a': POSONLY, 'b': POSONLY, @@ -955,6 +956,10 @@ def new_var_counts(*, purelocals=5, globalvars=6, ), + defs.spam_with_global_and_attr_same_name: new_var_counts( + globalvars=2, + attrs=1, + ), defs.spam_full_args: new_var_counts( posonly=2, posorkw=2, diff --git a/Objects/codeobject.c b/Objects/codeobject.c index ee869d991d93cd..34b50ef97d544b 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1714,7 +1714,7 @@ static int identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, PyObject *globalnames, PyObject *attrnames, PyObject *globalsns, PyObject *builtinsns, - struct co_unbound_counts *counts) + struct co_unbound_counts *counts, int *p_numdupes) { // This function is inspired by inspect.getclosurevars(). // It would be nicer if we had something similar to co_localspluskinds, @@ -1729,6 +1729,7 @@ identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, assert(builtinsns == NULL || PyDict_Check(builtinsns)); assert(counts == NULL || counts->total == 0); struct co_unbound_counts unbound = {0}; + int numdupes = 0; Py_ssize_t len = Py_SIZE(co); for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); @@ -1747,6 +1748,12 @@ identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, if (PySet_Add(attrnames, name) < 0) { return -1; } + if (PySet_Contains(globalnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } } else if (inst.op.code == LOAD_GLOBAL) { int oparg = GET_OPARG(co, i, inst.op.arg); @@ -1778,11 +1785,20 @@ identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, if (PySet_Add(globalnames, name) < 0) { return -1; } + if (PySet_Contains(attrnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } } } if (counts != NULL) { *counts = unbound; } + if (p_numdupes != NULL) { + *p_numdupes = numdupes; + } return 0; } @@ -1932,20 +1948,24 @@ _PyCode_SetUnboundVarCounts(PyThreadState *tstate, // Fill in unbound.globals and unbound.numattrs. struct co_unbound_counts unbound = {0}; + int numdupes = 0; Py_BEGIN_CRITICAL_SECTION(co); res = identify_unbound_names( tstate, co, globalnames, attrnames, globalsns, builtinsns, - &unbound); + &unbound, &numdupes); Py_END_CRITICAL_SECTION(); if (res < 0) { goto finally; } assert(unbound.numunknown == 0); - assert(unbound.total <= counts->unbound.total); + assert(unbound.total - numdupes <= counts->unbound.total); assert(counts->unbound.numunknown == counts->unbound.total); - unbound.numunknown = counts->unbound.total - unbound.total; - unbound.total = counts->unbound.total; + // There may be a name that is both a global and an attr. + int totalunbound = counts->unbound.total + numdupes; + unbound.numunknown = totalunbound - unbound.total; + unbound.total = totalunbound; counts->unbound = unbound; + counts->total += numdupes; res = 0; finally: From c2bb3f9843bc4763d6d41e883dbe9525f5155a4a Mon Sep 17 00:00:00 2001 From: GiGaGon <107241144+MeGaGiGaGon@users.noreply.github.com> Date: Fri, 13 Jun 2025 21:40:42 -0700 Subject: [PATCH 560/989] gh-135496: Fix f string exclamation mark error typo (#135495) --- Lib/test/test_fstring.py | 2 +- .../2025-06-14-01-01-14.gh-issue-135496.ER0Me3.rst | 1 + Parser/action_helpers.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-14-01-01-14.gh-issue-135496.ER0Me3.rst diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index dd58e032a8befe..f54557056784f2 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1380,7 +1380,7 @@ def test_conversions(self): for conv in ' s', ' s ': self.assertAllRaise(SyntaxError, "f-string: conversion type must come right after the" - " exclamanation mark", + " exclamation mark", ["f'{3!" + conv + "}'"]) self.assertAllRaise(SyntaxError, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-14-01-01-14.gh-issue-135496.ER0Me3.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-14-01-01-14.gh-issue-135496.ER0Me3.rst new file mode 100644 index 00000000000000..03b1f4590c5846 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-14-01-01-14.gh-issue-135496.ER0Me3.rst @@ -0,0 +1 @@ +Fix typo in the f-string conversion type error ("exclamanation" -> "exclamation"). diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 3bcc0870882a29..0d362bf7a9111a 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -965,7 +965,7 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv) if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) { return RAISE_SYNTAX_ERROR_KNOWN_RANGE( conv_token, conv, - "%c-string: conversion type must come right after the exclamanation mark", + "%c-string: conversion type must come right after the exclamation mark", TOK_GET_STRING_PREFIX(p->tok) ); } From c8319a3fea9ff7f9b49008be3b5d681112bbe7f3 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sat, 14 Jun 2025 11:46:43 +0300 Subject: [PATCH 561/989] gh-135368: Fix mocks on dataclass specs with `instance=True` (#135421) * gh-135368: Fix mocks on dataclass specs with `instance=True` * Extend dataclass mock_methods --------- Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com> --- .../test_unittest/testmock/testhelpers.py | 21 +++++++++++++++++++ Lib/unittest/mock.py | 13 ++++++++++-- ...-06-12-10-45-02.gh-issue-135368.OjWVHL.rst | 2 ++ 3 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-12-10-45-02.gh-issue-135368.OjWVHL.rst diff --git a/Lib/test/test_unittest/testmock/testhelpers.py b/Lib/test/test_unittest/testmock/testhelpers.py index d1e48bde982040..0e82c723ec3eaa 100644 --- a/Lib/test/test_unittest/testmock/testhelpers.py +++ b/Lib/test/test_unittest/testmock/testhelpers.py @@ -1050,6 +1050,7 @@ def __post_init__(self): create_autospec(WithPostInit()), ]: with self.subTest(mock=mock): + self.assertIsInstance(mock, WithPostInit) self.assertIsInstance(mock.a, int) self.assertIsInstance(mock.b, int) @@ -1072,6 +1073,7 @@ class WithDefault: create_autospec(WithDefault(1)), ]: with self.subTest(mock=mock): + self.assertIsInstance(mock, WithDefault) self.assertIsInstance(mock.a, int) self.assertIsInstance(mock.b, int) @@ -1087,6 +1089,7 @@ def b(self) -> int: create_autospec(WithMethod(1)), ]: with self.subTest(mock=mock): + self.assertIsInstance(mock, WithMethod) self.assertIsInstance(mock.a, int) mock.b.assert_not_called() @@ -1102,11 +1105,29 @@ class WithNonFields: create_autospec(WithNonFields(1)), ]: with self.subTest(mock=mock): + self.assertIsInstance(mock, WithNonFields) with self.assertRaisesRegex(AttributeError, msg): mock.a with self.assertRaisesRegex(AttributeError, msg): mock.b + def test_dataclass_special_attrs(self): + @dataclass + class Description: + name: str + + for mock in [ + create_autospec(Description, instance=True), + create_autospec(Description(1)), + ]: + with self.subTest(mock=mock): + self.assertIsInstance(mock, Description) + self.assertIs(mock.__class__, Description) + self.assertIsInstance(mock.__dataclass_fields__, MagicMock) + self.assertIsInstance(mock.__dataclass_params__, MagicMock) + self.assertIsInstance(mock.__match_args__, MagicMock) + self.assertIsInstance(mock.__hash__, MagicMock) + class TestCallList(unittest.TestCase): def test_args_list_contains_call_list(self): diff --git a/Lib/unittest/mock.py b/Lib/unittest/mock.py index 55cb4b1f6aff90..e370aa48b7c703 100644 --- a/Lib/unittest/mock.py +++ b/Lib/unittest/mock.py @@ -569,6 +569,11 @@ def _mock_add_spec(self, spec, spec_set, _spec_as_instance=False, __dict__['_mock_methods'] = spec __dict__['_spec_asyncs'] = _spec_asyncs + def _mock_extend_spec_methods(self, spec_methods): + methods = self.__dict__.get('_mock_methods') or [] + methods.extend(spec_methods) + self.__dict__['_mock_methods'] = methods + def __get_return_value(self): ret = self._mock_return_value if self._mock_delegate is not None: @@ -2766,14 +2771,16 @@ def create_autospec(spec, spec_set=False, instance=False, _parent=None, raise InvalidSpecError(f'Cannot autospec a Mock object. ' f'[object={spec!r}]') is_async_func = _is_async_func(spec) + _kwargs = {'spec': spec} entries = [(entry, _missing) for entry in dir(spec)] if is_type and instance and is_dataclass(spec): + is_dataclass_spec = True dataclass_fields = fields(spec) entries.extend((f.name, f.type) for f in dataclass_fields) - _kwargs = {'spec': [f.name for f in dataclass_fields]} + dataclass_spec_list = [f.name for f in dataclass_fields] else: - _kwargs = {'spec': spec} + is_dataclass_spec = False if spec_set: _kwargs = {'spec_set': spec} @@ -2810,6 +2817,8 @@ def create_autospec(spec, spec_set=False, instance=False, _parent=None, mock = Klass(parent=_parent, _new_parent=_parent, _new_name=_new_name, name=_name, **_kwargs) + if is_dataclass_spec: + mock._mock_extend_spec_methods(dataclass_spec_list) if isinstance(spec, FunctionTypes): # should only happen at the top level because we don't diff --git a/Misc/NEWS.d/next/Library/2025-06-12-10-45-02.gh-issue-135368.OjWVHL.rst b/Misc/NEWS.d/next/Library/2025-06-12-10-45-02.gh-issue-135368.OjWVHL.rst new file mode 100644 index 00000000000000..b9973d88a859c6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-12-10-45-02.gh-issue-135368.OjWVHL.rst @@ -0,0 +1,2 @@ +Fix :class:`unittest.mock.Mock` generation on :func:`dataclasses.dataclass` +objects. Now all special attributes are set as it was before :gh:`124429`. From 7b15873ed00766ef6760048449f44ea0e41f4a12 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 14 Jun 2025 17:13:32 +0800 Subject: [PATCH 562/989] gh-135474: Specialize arithmetic only on compact ints (GH-135479) Specialize arithmetic only on compact ints. This also makes int operations non-escaping in the JIT and in tier 1. --- Doc/howto/perf_profiling.rst | 4 +- Include/internal/pycore_long.h | 6 +- Include/internal/pycore_opcode_metadata.h | 6 +- Include/internal/pycore_uop_metadata.h | 6 +- ...-06-13-16-05-24.gh-issue-135474.67nOl3.rst | 1 + Objects/longobject.c | 17 ++-- Python/bytecodes.c | 9 ++- Python/executor_cases.c.h | 24 +++--- Python/generated_cases.c.h | 27 ++++--- Python/optimizer_bytecodes.c | 51 +----------- Python/optimizer_cases.c.h | 81 +++---------------- Tools/cases_generator/analyzer.py | 4 + 12 files changed, 81 insertions(+), 155 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-13-16-05-24.gh-issue-135474.67nOl3.rst diff --git a/Doc/howto/perf_profiling.rst b/Doc/howto/perf_profiling.rst index b579d776576365..96d757ac452b5e 100644 --- a/Doc/howto/perf_profiling.rst +++ b/Doc/howto/perf_profiling.rst @@ -92,7 +92,7 @@ Then we can use ``perf report`` to analyze the data: | | | | | | | |--51.67%--_PyEval_EvalFrameDefault | | | | | - | | | | |--11.52%--_PyLong_Add + | | | | |--11.52%--_PyCompactLong_Add | | | | | | | | | | | |--2.97%--_PyObject_Malloc ... @@ -142,7 +142,7 @@ Instead, if we run the same experiment with ``perf`` support enabled we get: | | | | | | | |--51.81%--_PyEval_EvalFrameDefault | | | | | - | | | | |--13.77%--_PyLong_Add + | | | | |--13.77%--_PyCompactLong_Add | | | | | | | | | | | |--3.26%--_PyObject_Malloc diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 3196d1b82084b9..b8efba74bdc421 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -112,9 +112,9 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, int64_t); // Export for 'math' shared extension PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, int64_t); -PyAPI_FUNC(PyObject*) _PyLong_Add(PyLongObject *left, PyLongObject *right); -PyAPI_FUNC(PyObject*) _PyLong_Multiply(PyLongObject *left, PyLongObject *right); -PyAPI_FUNC(PyObject*) _PyLong_Subtract(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(PyObject*) _PyCompactLong_Add(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(PyObject*) _PyCompactLong_Multiply(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(PyObject*) _PyCompactLong_Subtract(PyLongObject *left, PyLongObject *right); // Export for 'binascii' shared extension. PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 00e918cb8f0cd1..59c85ddeb3d663 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1072,12 +1072,12 @@ extern const struct opcode_metadata _PyOpcode_opcode_metadata[267]; const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP] = { true, INSTR_FMT_IBC0000, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_SUBSCR_DICT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG }, [BINARY_OP_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, @@ -1085,7 +1085,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_INTERPOLATION] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index b08909e72c4f43..cd36023c25cbb4 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -85,9 +85,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_NOS_INT] = HAS_EXIT_FLAG, [_GUARD_TOS_INT] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_MULTIPLY_INT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_INT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_INT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG, [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG, [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG, [_BINARY_OP_MULTIPLY_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-13-16-05-24.gh-issue-135474.67nOl3.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-13-16-05-24.gh-issue-135474.67nOl3.rst new file mode 100644 index 00000000000000..716d9b78748fa2 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-13-16-05-24.gh-issue-135474.67nOl3.rst @@ -0,0 +1 @@ +Specialize integer operations only on compact integers. This is a CPython internal change. diff --git a/Objects/longobject.c b/Objects/longobject.c index 2b533312fee673..dfa02851cd8877 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3772,9 +3772,11 @@ long_add(PyLongObject *a, PyLongObject *b) } PyObject * -_PyLong_Add(PyLongObject *a, PyLongObject *b) +_PyCompactLong_Add(PyLongObject *a, PyLongObject *b) { - return (PyObject*)long_add(a, b); + assert(_PyLong_BothAreCompact(a, b)); + stwodigits z = medium_value(a) + medium_value(b); + return (PyObject *)_PyLong_FromSTwoDigits(z); } static PyObject * @@ -3815,9 +3817,10 @@ long_sub(PyLongObject *a, PyLongObject *b) } PyObject * -_PyLong_Subtract(PyLongObject *a, PyLongObject *b) +_PyCompactLong_Subtract(PyLongObject *a, PyLongObject *b) { - return (PyObject*)long_sub(a, b); + assert(_PyLong_BothAreCompact(a, b)); + return (PyObject *)_PyLong_FromSTwoDigits(medium_value(a) - medium_value(b)); } static PyObject * @@ -4262,9 +4265,11 @@ long_mul(PyLongObject *a, PyLongObject *b) } PyObject * -_PyLong_Multiply(PyLongObject *a, PyLongObject *b) +_PyCompactLong_Multiply(PyLongObject *a, PyLongObject *b) { - return (PyObject*)long_mul(a, b); + assert(_PyLong_BothAreCompact(a, b)); + stwodigits v = medium_value(a) * medium_value(b); + return (PyObject *)_PyLong_FromSTwoDigits(v); } static PyObject * diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 032e76f72af42c..971e97a5784692 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -582,9 +582,10 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); @@ -597,9 +598,10 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); @@ -612,9 +614,10 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4f772f916d1152..dbfb2391bf0623 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -880,10 +880,12 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) { @@ -908,10 +910,12 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) { @@ -936,10 +940,12 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 5ac519bb1b6093..2cf027c539b992 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -182,10 +182,13 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) { @@ -507,10 +510,13 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) { @@ -1088,10 +1094,13 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index babd3e46b8d1da..c2469547d77943 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -222,60 +222,15 @@ dummy_func(void) { } op(_BINARY_OP_ADD_INT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! - } - else { - res = sym_new_type(ctx, &PyLong_Type); - } + res = sym_new_type(ctx, &PyLong_Type); } op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! - } - else { - res = sym_new_type(ctx, &PyLong_Type); - } + res = sym_new_type(ctx, &PyLong_Type); } op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! - } - else { - res = sym_new_type(ctx, &PyLong_Type); - } + res = sym_new_type(ctx, &PyLong_Type); } op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index adab110c5ced66..d9313be0bb0df6 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -319,86 +319,29 @@ } case _BINARY_OP_MULTIPLY_INT: { - JitOptSymbol *right; - JitOptSymbol *left; JitOptSymbol *res; - right = stack_pointer[-1]; - left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - stack_pointer[-2] = res; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyLong_Type); - stack_pointer += -1; - } - stack_pointer[-1] = res; + res = sym_new_type(ctx, &PyLong_Type); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_OP_ADD_INT: { - JitOptSymbol *right; - JitOptSymbol *left; JitOptSymbol *res; - right = stack_pointer[-1]; - left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - stack_pointer[-2] = res; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyLong_Type); - stack_pointer += -1; - } - stack_pointer[-1] = res; + res = sym_new_type(ctx, &PyLong_Type); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_OP_SUBTRACT_INT: { - JitOptSymbol *right; - JitOptSymbol *left; JitOptSymbol *res; - right = stack_pointer[-1]; - left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - stack_pointer[-2] = res; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyLong_Type); - stack_pointer += -1; - } - stack_pointer[-1] = res; + res = sym_new_type(ctx, &PyLong_Type); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index fca9b29f9ebc2e..ca6d0301f3572d 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -635,6 +635,10 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "_PyLong_IsNegative", "_PyLong_IsNonNegativeCompact", "_PyLong_IsZero", + "_PyLong_BothAreCompact", + "_PyCompactLong_Add", + "_PyCompactLong_Multiply", + "_PyCompactLong_Subtract", "_PyManagedDictPointer_IsValues", "_PyObject_GC_IS_SHARED", "_PyObject_GC_IS_TRACKED", From 028309fb47869b665f55d10e9eabf7952bf7dbd3 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sat, 14 Jun 2025 13:48:25 +0100 Subject: [PATCH 563/989] gh-135371: Fix asyncio introspection output to include internal coroutine chains (#135436) --- Doc/whatsnew/3.14.rst | 59 +- Lib/asyncio/tools.py | 162 +- Lib/test/test_asyncio/test_tools.py | 1560 +++++++++++++---- Lib/test/test_external_inspection.py | 395 +++-- ...-06-12-18-12-42.gh-issue-135371.R_YUtR.rst | 4 + Modules/_remote_debugging_module.c | 339 +++- 6 files changed, 1847 insertions(+), 672 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-12-18-12-42.gh-issue-135371.R_YUtR.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index ca330a32b33c4b..705bf46d603697 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -816,43 +816,58 @@ Executing the new tool on the running process will yield a table like this: python -m asyncio ps 12345 - tid task id task name coroutine chain awaiter name awaiter id - --------------------------------------------------------------------------------------------------------------------------------------- - 8138752 0x564bd3d0210 Task-1 0x0 - 8138752 0x564bd3d0410 Sundowning _aexit -> __aexit__ -> main Task-1 0x564bd3d0210 - 8138752 0x564bd3d0610 TMBTE _aexit -> __aexit__ -> main Task-1 0x564bd3d0210 - 8138752 0x564bd3d0810 TNDNBTG _aexit -> __aexit__ -> album Sundowning 0x564bd3d0410 - 8138752 0x564bd3d0a10 Levitate _aexit -> __aexit__ -> album Sundowning 0x564bd3d0410 - 8138752 0x564bd3e0550 DYWTYLM _aexit -> __aexit__ -> album TMBTE 0x564bd3d0610 - 8138752 0x564bd3e0710 Aqua Regia _aexit -> __aexit__ -> album TMBTE 0x564bd3d0610 - - -or: + tid task id task name coroutine stack awaiter chain awaiter name awaiter id + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + 1935500 0x7fc930c18050 Task-1 TaskGroup._aexit -> TaskGroup.__aexit__ -> main 0x0 + 1935500 0x7fc930c18230 Sundowning TaskGroup._aexit -> TaskGroup.__aexit__ -> album TaskGroup._aexit -> TaskGroup.__aexit__ -> main Task-1 0x7fc930c18050 + 1935500 0x7fc93173fa50 TMBTE TaskGroup._aexit -> TaskGroup.__aexit__ -> album TaskGroup._aexit -> TaskGroup.__aexit__ -> main Task-1 0x7fc930c18050 + 1935500 0x7fc93173fdf0 TNDNBTG sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album Sundowning 0x7fc930c18230 + 1935500 0x7fc930d32510 Levitate sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album Sundowning 0x7fc930c18230 + 1935500 0x7fc930d32890 DYWTYLM sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album TMBTE 0x7fc93173fa50 + 1935500 0x7fc93161ec30 Aqua Regia sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album TMBTE 0x7fc93173fa50 + +or a tree like this: .. code-block:: bash python -m asyncio pstree 12345 └── (T) Task-1 - └── main - └── __aexit__ - └── _aexit + └── main example.py:13 + └── TaskGroup.__aexit__ Lib/asyncio/taskgroups.py:72 + └── TaskGroup._aexit Lib/asyncio/taskgroups.py:121 ├── (T) Sundowning - │ └── album - │ └── __aexit__ - │ └── _aexit + │ └── album example.py:8 + │ └── TaskGroup.__aexit__ Lib/asyncio/taskgroups.py:72 + │ └── TaskGroup._aexit Lib/asyncio/taskgroups.py:121 │ ├── (T) TNDNBTG + │ │ └── play example.py:4 + │ │ └── sleep Lib/asyncio/tasks.py:702 │ └── (T) Levitate + │ └── play example.py:4 + │ └── sleep Lib/asyncio/tasks.py:702 └── (T) TMBTE - └── album - └── __aexit__ - └── _aexit + └── album example.py:8 + └── TaskGroup.__aexit__ Lib/asyncio/taskgroups.py:72 + └── TaskGroup._aexit Lib/asyncio/taskgroups.py:121 ├── (T) DYWTYLM + │ └── play example.py:4 + │ └── sleep Lib/asyncio/tasks.py:702 └── (T) Aqua Regia + └── play example.py:4 + └── sleep Lib/asyncio/tasks.py:702 If a cycle is detected in the async await graph (which could indicate a programming issue), the tool raises an error and lists the cycle paths that -prevent tree construction. +prevent tree construction: + +.. code-block:: bash + + python -m asyncio pstree 12345 + + ERROR: await-graph contains cycles - cannot print a tree! + + cycle: Task-2 → Task-3 → Task-2 (Contributed by Pablo Galindo, Łukasz Langa, Yury Selivanov, and Marta Gomez Macias in :gh:`91048`.) diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py index 3fc4524c008db6..2683f34cc7113b 100644 --- a/Lib/asyncio/tools.py +++ b/Lib/asyncio/tools.py @@ -1,11 +1,10 @@ """Tools to analyze tasks running in asyncio programs.""" -from collections import defaultdict +from collections import defaultdict, namedtuple from itertools import count from enum import Enum import sys -from _remote_debugging import RemoteUnwinder - +from _remote_debugging import RemoteUnwinder, FrameInfo class NodeType(Enum): COROUTINE = 1 @@ -26,51 +25,75 @@ def __init__( # ─── indexing helpers ─────────────────────────────────────────── -def _format_stack_entry(elem: tuple[str, str, int] | str) -> str: - if isinstance(elem, tuple): - fqname, path, line_no = elem - return f"{fqname} {path}:{line_no}" - +def _format_stack_entry(elem: str|FrameInfo) -> str: + if not isinstance(elem, str): + if elem.lineno == 0 and elem.filename == "": + return f"{elem.funcname}" + else: + return f"{elem.funcname} {elem.filename}:{elem.lineno}" return elem def _index(result): - id2name, awaits = {}, [] - for _thr_id, tasks in result: - for tid, tname, awaited in tasks: - id2name[tid] = tname - for stack, parent_id in awaited: - stack = [_format_stack_entry(elem) for elem in stack] - awaits.append((parent_id, stack, tid)) - return id2name, awaits - - -def _build_tree(id2name, awaits): + id2name, awaits, task_stacks = {}, [], {} + for awaited_info in result: + for task_info in awaited_info.awaited_by: + task_id = task_info.task_id + task_name = task_info.task_name + id2name[task_id] = task_name + + # Store the internal coroutine stack for this task + if task_info.coroutine_stack: + for coro_info in task_info.coroutine_stack: + call_stack = coro_info.call_stack + internal_stack = [_format_stack_entry(frame) for frame in call_stack] + task_stacks[task_id] = internal_stack + + # Add the awaited_by relationships (external dependencies) + if task_info.awaited_by: + for coro_info in task_info.awaited_by: + call_stack = coro_info.call_stack + parent_task_id = coro_info.task_name + stack = [_format_stack_entry(frame) for frame in call_stack] + awaits.append((parent_task_id, stack, task_id)) + return id2name, awaits, task_stacks + + +def _build_tree(id2name, awaits, task_stacks): id2label = {(NodeType.TASK, tid): name for tid, name in id2name.items()} children = defaultdict(list) - cor_names = defaultdict(dict) # (parent) -> {frame: node} - cor_id_seq = count(1) - - def _cor_node(parent_key, frame_name): - """Return an existing or new (NodeType.COROUTINE, …) node under *parent_key*.""" - bucket = cor_names[parent_key] - if frame_name in bucket: - return bucket[frame_name] - node_key = (NodeType.COROUTINE, f"c{next(cor_id_seq)}") - id2label[node_key] = frame_name - children[parent_key].append(node_key) - bucket[frame_name] = node_key + cor_nodes = defaultdict(dict) # Maps parent -> {frame_name: node_key} + next_cor_id = count(1) + + def get_or_create_cor_node(parent, frame): + """Get existing coroutine node or create new one under parent""" + if frame in cor_nodes[parent]: + return cor_nodes[parent][frame] + + node_key = (NodeType.COROUTINE, f"c{next(next_cor_id)}") + id2label[node_key] = frame + children[parent].append(node_key) + cor_nodes[parent][frame] = node_key return node_key - # lay down parent ➜ …frames… ➜ child paths + # Build task dependency tree with coroutine frames for parent_id, stack, child_id in awaits: cur = (NodeType.TASK, parent_id) - for frame in reversed(stack): # outer-most → inner-most - cur = _cor_node(cur, frame) + for frame in reversed(stack): + cur = get_or_create_cor_node(cur, frame) + child_key = (NodeType.TASK, child_id) if child_key not in children[cur]: children[cur].append(child_key) + # Add coroutine stacks for leaf tasks + awaiting_tasks = {parent_id for parent_id, _, _ in awaits} + for task_id in id2name: + if task_id not in awaiting_tasks and task_id in task_stacks: + cur = (NodeType.TASK, task_id) + for frame in reversed(task_stacks[task_id]): + cur = get_or_create_cor_node(cur, frame) + return id2label, children @@ -129,12 +152,12 @@ def build_async_tree(result, task_emoji="(T)", cor_emoji=""): The call tree is produced by `get_all_async_stacks()`, prefixing tasks with `task_emoji` and coroutine frames with `cor_emoji`. """ - id2name, awaits = _index(result) + id2name, awaits, task_stacks = _index(result) g = _task_graph(awaits) cycles = _find_cycles(g) if cycles: raise CycleFoundException(cycles, id2name) - labels, children = _build_tree(id2name, awaits) + labels, children = _build_tree(id2name, awaits, task_stacks) def pretty(node): flag = task_emoji if node[0] == NodeType.TASK else cor_emoji @@ -154,35 +177,40 @@ def render(node, prefix="", last=True, buf=None): def build_task_table(result): - id2name, awaits = _index(result) + id2name, _, _ = _index(result) table = [] - for tid, tasks in result: - for task_id, task_name, awaited in tasks: - if not awaited: - table.append( - [ - tid, - hex(task_id), - task_name, - "", - "", - "0x0" - ] - ) - for stack, awaiter_id in awaited: - stack = [elem[0] if isinstance(elem, tuple) else elem for elem in stack] - coroutine_chain = " -> ".join(stack) - awaiter_name = id2name.get(awaiter_id, "Unknown") - table.append( - [ - tid, - hex(task_id), - task_name, - coroutine_chain, - awaiter_name, - hex(awaiter_id), - ] - ) + + for awaited_info in result: + thread_id = awaited_info.thread_id + for task_info in awaited_info.awaited_by: + # Get task info + task_id = task_info.task_id + task_name = task_info.task_name + + # Build coroutine stack string + frames = [frame for coro in task_info.coroutine_stack + for frame in coro.call_stack] + coro_stack = " -> ".join(_format_stack_entry(x).split(" ")[0] + for x in frames) + + # Handle tasks with no awaiters + if not task_info.awaited_by: + table.append([thread_id, hex(task_id), task_name, coro_stack, + "", "", "0x0"]) + continue + + # Handle tasks with awaiters + for coro_info in task_info.awaited_by: + parent_id = coro_info.task_name + awaiter_frames = [_format_stack_entry(x).split(" ")[0] + for x in coro_info.call_stack] + awaiter_chain = " -> ".join(awaiter_frames) + awaiter_name = id2name.get(parent_id, "Unknown") + parent_id_str = (hex(parent_id) if isinstance(parent_id, int) + else str(parent_id)) + + table.append([thread_id, hex(task_id), task_name, coro_stack, + awaiter_chain, awaiter_name, parent_id_str]) return table @@ -211,11 +239,11 @@ def display_awaited_by_tasks_table(pid: int) -> None: table = build_task_table(tasks) # Print the table in a simple tabular format print( - f"{'tid':<10} {'task id':<20} {'task name':<20} {'coroutine chain':<50} {'awaiter name':<20} {'awaiter id':<15}" + f"{'tid':<10} {'task id':<20} {'task name':<20} {'coroutine stack':<50} {'awaiter chain':<50} {'awaiter name':<15} {'awaiter id':<15}" ) - print("-" * 135) + print("-" * 180) for row in table: - print(f"{row[0]:<10} {row[1]:<20} {row[2]:<20} {row[3]:<50} {row[4]:<20} {row[5]:<15}") + print(f"{row[0]:<10} {row[1]:<20} {row[2]:<20} {row[3]:<50} {row[4]:<50} {row[5]:<15} {row[6]:<15}") def display_awaited_by_tasks_tree(pid: int) -> None: diff --git a/Lib/test/test_asyncio/test_tools.py b/Lib/test/test_asyncio/test_tools.py index ba36e759ccdd61..34e94830204cd8 100644 --- a/Lib/test/test_asyncio/test_tools.py +++ b/Lib/test/test_asyncio/test_tools.py @@ -2,6 +2,13 @@ from asyncio import tools +from collections import namedtuple + +FrameInfo = namedtuple('FrameInfo', ['funcname', 'filename', 'lineno']) +CoroInfo = namedtuple('CoroInfo', ['call_stack', 'task_name']) +TaskInfo = namedtuple('TaskInfo', ['task_id', 'task_name', 'coroutine_stack', 'awaited_by']) +AwaitedInfo = namedtuple('AwaitedInfo', ['thread_id', 'awaited_by']) + # mock output of get_all_awaited_by function. TEST_INPUTS_TREE = [ @@ -10,81 +17,151 @@ # different subtasks part of a TaskGroup (root1 and root2) which call # awaiter functions. ( - ( - 1, - [ - (2, "Task-1", []), - ( - 3, - "timer", - [ - [[("awaiter3", "/path/to/app.py", 130), - ("awaiter2", "/path/to/app.py", 120), - ("awaiter", "/path/to/app.py", 110)], 4], - [[("awaiterB3", "/path/to/app.py", 190), - ("awaiterB2", "/path/to/app.py", 180), - ("awaiterB", "/path/to/app.py", 170)], 5], - [[("awaiterB3", "/path/to/app.py", 190), - ("awaiterB2", "/path/to/app.py", 180), - ("awaiterB", "/path/to/app.py", 170)], 6], - [[("awaiter3", "/path/to/app.py", 130), - ("awaiter2", "/path/to/app.py", 120), - ("awaiter", "/path/to/app.py", 110)], 7], - ], - ), - ( - 8, - "root1", - [[["_aexit", "__aexit__", "main"], 2]], - ), - ( - 9, - "root2", - [[["_aexit", "__aexit__", "main"], 2]], - ), - ( - 4, - "child1_1", - [ - [ - ["_aexit", "__aexit__", "blocho_caller", "bloch"], - 8, - ] - ], - ), - ( - 6, - "child2_1", - [ - [ - ["_aexit", "__aexit__", "blocho_caller", "bloch"], - 8, - ] - ], - ), - ( - 7, - "child1_2", - [ - [ - ["_aexit", "__aexit__", "blocho_caller", "bloch"], - 9, - ] - ], - ), - ( - 5, - "child2_2", - [ - [ - ["_aexit", "__aexit__", "blocho_caller", "bloch"], - 9, - ] - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] ), - ], + TaskInfo( + task_id=3, + task_name="timer", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("awaiter3", "/path/to/app.py", 130), + FrameInfo("awaiter2", "/path/to/app.py", 120), + FrameInfo("awaiter", "/path/to/app.py", 110) + ], + task_name=4 + ), + CoroInfo( + call_stack=[ + FrameInfo("awaiterB3", "/path/to/app.py", 190), + FrameInfo("awaiterB2", "/path/to/app.py", 180), + FrameInfo("awaiterB", "/path/to/app.py", 170) + ], + task_name=5 + ), + CoroInfo( + call_stack=[ + FrameInfo("awaiterB3", "/path/to/app.py", 190), + FrameInfo("awaiterB2", "/path/to/app.py", 180), + FrameInfo("awaiterB", "/path/to/app.py", 170) + ], + task_name=6 + ), + CoroInfo( + call_stack=[ + FrameInfo("awaiter3", "/path/to/app.py", 130), + FrameInfo("awaiter2", "/path/to/app.py", 120), + FrameInfo("awaiter", "/path/to/app.py", 110) + ], + task_name=7 + ) + ] + ), + TaskInfo( + task_id=8, + task_name="root1", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("main", "", 0) + ], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=9, + task_name="root2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("main", "", 0) + ], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=4, + task_name="child1_1", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("blocho_caller", "", 0), + FrameInfo("bloch", "", 0) + ], + task_name=8 + ) + ] + ), + TaskInfo( + task_id=6, + task_name="child2_1", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("blocho_caller", "", 0), + FrameInfo("bloch", "", 0) + ], + task_name=8 + ) + ] + ), + TaskInfo( + task_id=7, + task_name="child1_2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("blocho_caller", "", 0), + FrameInfo("bloch", "", 0) + ], + task_name=9 + ) + ] + ), + TaskInfo( + task_id=5, + task_name="child2_2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("blocho_caller", "", 0), + FrameInfo("bloch", "", 0) + ], + task_name=9 + ) + ] + ) + ] ), - (0, []), + AwaitedInfo(thread_id=0, awaited_by=[]) ), ( [ @@ -130,26 +207,96 @@ [ # test case containing two roots ( - ( - 9, - [ - (5, "Task-5", []), - (6, "Task-6", [[["main2"], 5]]), - (7, "Task-7", [[["main2"], 5]]), - (8, "Task-8", [[["main2"], 5]]), - ], + AwaitedInfo( + thread_id=9, + awaited_by=[ + TaskInfo( + task_id=5, + task_name="Task-5", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=6, + task_name="Task-6", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main2", "", 0)], + task_name=5 + ) + ] + ), + TaskInfo( + task_id=7, + task_name="Task-7", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main2", "", 0)], + task_name=5 + ) + ] + ), + TaskInfo( + task_id=8, + task_name="Task-8", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main2", "", 0)], + task_name=5 + ) + ] + ) + ] ), - ( - 10, - [ - (1, "Task-1", []), - (2, "Task-2", [[["main"], 1]]), - (3, "Task-3", [[["main"], 1]]), - (4, "Task-4", [[["main"], 1]]), - ], + AwaitedInfo( + thread_id=10, + awaited_by=[ + TaskInfo( + task_id=1, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=2, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=1 + ) + ] + ), + TaskInfo( + task_id=3, + task_name="Task-3", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=1 + ) + ] + ), + TaskInfo( + task_id=4, + task_name="Task-4", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=1 + ) + ] + ) + ] ), - (11, []), - (0, []), + AwaitedInfo(thread_id=11, awaited_by=[]), + AwaitedInfo(thread_id=0, awaited_by=[]) ), ( [ @@ -174,18 +321,63 @@ # test case containing two roots, one of them without subtasks ( [ - (1, [(2, "Task-5", [])]), - ( - 3, - [ - (4, "Task-1", []), - (5, "Task-2", [[["main"], 4]]), - (6, "Task-3", [[["main"], 4]]), - (7, "Task-4", [[["main"], 4]]), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-5", + coroutine_stack=[], + awaited_by=[] + ) + ] ), - (8, []), - (0, []), + AwaitedInfo( + thread_id=3, + awaited_by=[ + TaskInfo( + task_id=4, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=5, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=4 + ) + ] + ), + TaskInfo( + task_id=6, + task_name="Task-3", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=4 + ) + ] + ), + TaskInfo( + task_id=7, + task_name="Task-4", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=4 + ) + ] + ) + ] + ), + AwaitedInfo(thread_id=8, awaited_by=[]), + AwaitedInfo(thread_id=0, awaited_by=[]) ] ), ( @@ -208,19 +400,44 @@ # this test case contains a cycle: two tasks awaiting each other. ( [ - ( - 1, - [ - (2, "Task-1", []), - ( - 3, - "a", - [[["awaiter2"], 4], [["main"], 2]], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] ), - (4, "b", [[["awaiter"], 3]]), - ], + TaskInfo( + task_id=3, + task_name="a", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("awaiter2", "", 0)], + task_name=4 + ), + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=4, + task_name="b", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("awaiter", "", 0)], + task_name=3 + ) + ] + ) + ] ), - (0, []), + AwaitedInfo(thread_id=0, awaited_by=[]) ] ), ([[4, 3, 4]]), @@ -229,32 +446,85 @@ # this test case contains two cycles ( [ - ( - 1, - [ - (2, "Task-1", []), - ( - 3, - "A", - [[["nested", "nested", "task_b"], 4]], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] ), - ( - 4, - "B", - [ - [["nested", "nested", "task_c"], 5], - [["nested", "nested", "task_a"], 3], - ], + TaskInfo( + task_id=3, + task_name="A", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0), + FrameInfo("task_b", "", 0) + ], + task_name=4 + ) + ] ), - (5, "C", [[["nested", "nested"], 6]]), - ( - 6, - "Task-2", - [[["nested", "nested", "task_b"], 4]], + TaskInfo( + task_id=4, + task_name="B", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0), + FrameInfo("task_c", "", 0) + ], + task_name=5 + ), + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0), + FrameInfo("task_a", "", 0) + ], + task_name=3 + ) + ] ), - ], + TaskInfo( + task_id=5, + task_name="C", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0) + ], + task_name=6 + ) + ] + ), + TaskInfo( + task_id=6, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0), + FrameInfo("task_b", "", 0) + ], + task_name=4 + ) + ] + ) + ] ), - (0, []), + AwaitedInfo(thread_id=0, awaited_by=[]) ] ), ([[4, 3, 4], [4, 6, 5, 4]]), @@ -267,81 +537,160 @@ # different subtasks part of a TaskGroup (root1 and root2) which call # awaiter functions. ( - ( - 1, - [ - (2, "Task-1", []), - ( - 3, - "timer", - [ - [["awaiter3", "awaiter2", "awaiter"], 4], - [["awaiter1_3", "awaiter1_2", "awaiter1"], 5], - [["awaiter1_3", "awaiter1_2", "awaiter1"], 6], - [["awaiter3", "awaiter2", "awaiter"], 7], - ], - ), - ( - 8, - "root1", - [[["_aexit", "__aexit__", "main"], 2]], - ), - ( - 9, - "root2", - [[["_aexit", "__aexit__", "main"], 2]], - ), - ( - 4, - "child1_1", - [ - [ - ["_aexit", "__aexit__", "blocho_caller", "bloch"], - 8, - ] - ], - ), - ( - 6, - "child2_1", - [ - [ - ["_aexit", "__aexit__", "blocho_caller", "bloch"], - 8, - ] - ], - ), - ( - 7, - "child1_2", - [ - [ - ["_aexit", "__aexit__", "blocho_caller", "bloch"], - 9, - ] - ], - ), - ( - 5, - "child2_2", - [ - [ - ["_aexit", "__aexit__", "blocho_caller", "bloch"], - 9, - ] - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] ), - ], + TaskInfo( + task_id=3, + task_name="timer", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("awaiter3", "", 0), + FrameInfo("awaiter2", "", 0), + FrameInfo("awaiter", "", 0) + ], + task_name=4 + ), + CoroInfo( + call_stack=[ + FrameInfo("awaiter1_3", "", 0), + FrameInfo("awaiter1_2", "", 0), + FrameInfo("awaiter1", "", 0) + ], + task_name=5 + ), + CoroInfo( + call_stack=[ + FrameInfo("awaiter1_3", "", 0), + FrameInfo("awaiter1_2", "", 0), + FrameInfo("awaiter1", "", 0) + ], + task_name=6 + ), + CoroInfo( + call_stack=[ + FrameInfo("awaiter3", "", 0), + FrameInfo("awaiter2", "", 0), + FrameInfo("awaiter", "", 0) + ], + task_name=7 + ) + ] + ), + TaskInfo( + task_id=8, + task_name="root1", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("main", "", 0) + ], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=9, + task_name="root2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("main", "", 0) + ], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=4, + task_name="child1_1", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("blocho_caller", "", 0), + FrameInfo("bloch", "", 0) + ], + task_name=8 + ) + ] + ), + TaskInfo( + task_id=6, + task_name="child2_1", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("blocho_caller", "", 0), + FrameInfo("bloch", "", 0) + ], + task_name=8 + ) + ] + ), + TaskInfo( + task_id=7, + task_name="child1_2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("blocho_caller", "", 0), + FrameInfo("bloch", "", 0) + ], + task_name=9 + ) + ] + ), + TaskInfo( + task_id=5, + task_name="child2_2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("_aexit", "", 0), + FrameInfo("__aexit__", "", 0), + FrameInfo("blocho_caller", "", 0), + FrameInfo("bloch", "", 0) + ], + task_name=9 + ) + ] + ) + ] ), - (0, []), + AwaitedInfo(thread_id=0, awaited_by=[]) ), ( [ - [1, "0x2", "Task-1", "", "", "0x0"], + [1, "0x2", "Task-1", "", "", "", "0x0"], [ 1, "0x3", "timer", + "", "awaiter3 -> awaiter2 -> awaiter", "child1_1", "0x4", @@ -350,6 +699,7 @@ 1, "0x3", "timer", + "", "awaiter1_3 -> awaiter1_2 -> awaiter1", "child2_2", "0x5", @@ -358,6 +708,7 @@ 1, "0x3", "timer", + "", "awaiter1_3 -> awaiter1_2 -> awaiter1", "child2_1", "0x6", @@ -366,6 +717,7 @@ 1, "0x3", "timer", + "", "awaiter3 -> awaiter2 -> awaiter", "child1_2", "0x7", @@ -374,6 +726,7 @@ 1, "0x8", "root1", + "", "_aexit -> __aexit__ -> main", "Task-1", "0x2", @@ -382,6 +735,7 @@ 1, "0x9", "root2", + "", "_aexit -> __aexit__ -> main", "Task-1", "0x2", @@ -390,6 +744,7 @@ 1, "0x4", "child1_1", + "", "_aexit -> __aexit__ -> blocho_caller -> bloch", "root1", "0x8", @@ -398,6 +753,7 @@ 1, "0x6", "child2_1", + "", "_aexit -> __aexit__ -> blocho_caller -> bloch", "root1", "0x8", @@ -406,6 +762,7 @@ 1, "0x7", "child1_2", + "", "_aexit -> __aexit__ -> blocho_caller -> bloch", "root2", "0x9", @@ -414,6 +771,7 @@ 1, "0x5", "child2_2", + "", "_aexit -> __aexit__ -> blocho_caller -> bloch", "root2", "0x9", @@ -424,37 +782,107 @@ [ # test case containing two roots ( - ( - 9, - [ - (5, "Task-5", []), - (6, "Task-6", [[["main2"], 5]]), - (7, "Task-7", [[["main2"], 5]]), - (8, "Task-8", [[["main2"], 5]]), - ], + AwaitedInfo( + thread_id=9, + awaited_by=[ + TaskInfo( + task_id=5, + task_name="Task-5", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=6, + task_name="Task-6", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main2", "", 0)], + task_name=5 + ) + ] + ), + TaskInfo( + task_id=7, + task_name="Task-7", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main2", "", 0)], + task_name=5 + ) + ] + ), + TaskInfo( + task_id=8, + task_name="Task-8", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main2", "", 0)], + task_name=5 + ) + ] + ) + ] ), - ( - 10, - [ - (1, "Task-1", []), - (2, "Task-2", [[["main"], 1]]), - (3, "Task-3", [[["main"], 1]]), - (4, "Task-4", [[["main"], 1]]), - ], + AwaitedInfo( + thread_id=10, + awaited_by=[ + TaskInfo( + task_id=1, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=2, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=1 + ) + ] + ), + TaskInfo( + task_id=3, + task_name="Task-3", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=1 + ) + ] + ), + TaskInfo( + task_id=4, + task_name="Task-4", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=1 + ) + ] + ) + ] ), - (11, []), - (0, []), + AwaitedInfo(thread_id=11, awaited_by=[]), + AwaitedInfo(thread_id=0, awaited_by=[]) ), ( [ - [9, "0x5", "Task-5", "", "", "0x0"], - [9, "0x6", "Task-6", "main2", "Task-5", "0x5"], - [9, "0x7", "Task-7", "main2", "Task-5", "0x5"], - [9, "0x8", "Task-8", "main2", "Task-5", "0x5"], - [10, "0x1", "Task-1", "", "", "0x0"], - [10, "0x2", "Task-2", "main", "Task-1", "0x1"], - [10, "0x3", "Task-3", "main", "Task-1", "0x1"], - [10, "0x4", "Task-4", "main", "Task-1", "0x1"], + [9, "0x5", "Task-5", "", "", "", "0x0"], + [9, "0x6", "Task-6", "", "main2", "Task-5", "0x5"], + [9, "0x7", "Task-7", "", "main2", "Task-5", "0x5"], + [9, "0x8", "Task-8", "", "main2", "Task-5", "0x5"], + [10, "0x1", "Task-1", "", "", "", "0x0"], + [10, "0x2", "Task-2", "", "main", "Task-1", "0x1"], + [10, "0x3", "Task-3", "", "main", "Task-1", "0x1"], + [10, "0x4", "Task-4", "", "main", "Task-1", "0x1"], ] ), ], @@ -462,27 +890,72 @@ # test case containing two roots, one of them without subtasks ( [ - (1, [(2, "Task-5", [])]), - ( - 3, - [ - (4, "Task-1", []), - (5, "Task-2", [[["main"], 4]]), - (6, "Task-3", [[["main"], 4]]), - (7, "Task-4", [[["main"], 4]]), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-5", + coroutine_stack=[], + awaited_by=[] + ) + ] ), - (8, []), - (0, []), + AwaitedInfo( + thread_id=3, + awaited_by=[ + TaskInfo( + task_id=4, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=5, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=4 + ) + ] + ), + TaskInfo( + task_id=6, + task_name="Task-3", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=4 + ) + ] + ), + TaskInfo( + task_id=7, + task_name="Task-4", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=4 + ) + ] + ) + ] + ), + AwaitedInfo(thread_id=8, awaited_by=[]), + AwaitedInfo(thread_id=0, awaited_by=[]) ] ), ( [ - [1, "0x2", "Task-5", "", "", "0x0"], - [3, "0x4", "Task-1", "", "", "0x0"], - [3, "0x5", "Task-2", "main", "Task-1", "0x4"], - [3, "0x6", "Task-3", "main", "Task-1", "0x4"], - [3, "0x7", "Task-4", "main", "Task-1", "0x4"], + [1, "0x2", "Task-5", "", "", "", "0x0"], + [3, "0x4", "Task-1", "", "", "", "0x0"], + [3, "0x5", "Task-2", "", "main", "Task-1", "0x4"], + [3, "0x6", "Task-3", "", "main", "Task-1", "0x4"], + [3, "0x7", "Task-4", "", "main", "Task-1", "0x4"], ] ), ], @@ -491,27 +964,52 @@ # this test case contains a cycle: two tasks awaiting each other. ( [ - ( - 1, - [ - (2, "Task-1", []), - ( - 3, - "a", - [[["awaiter2"], 4], [["main"], 2]], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] ), - (4, "b", [[["awaiter"], 3]]), - ], + TaskInfo( + task_id=3, + task_name="a", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("awaiter2", "", 0)], + task_name=4 + ), + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=4, + task_name="b", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("awaiter", "", 0)], + task_name=3 + ) + ] + ) + ] ), - (0, []), + AwaitedInfo(thread_id=0, awaited_by=[]) ] ), ( [ - [1, "0x2", "Task-1", "", "", "0x0"], - [1, "0x3", "a", "awaiter2", "b", "0x4"], - [1, "0x3", "a", "main", "Task-1", "0x2"], - [1, "0x4", "b", "awaiter", "a", "0x3"], + [1, "0x2", "Task-1", "", "", "", "0x0"], + [1, "0x3", "a", "", "awaiter2", "b", "0x4"], + [1, "0x3", "a", "", "main", "Task-1", "0x2"], + [1, "0x4", "b", "", "awaiter", "a", "0x3"], ] ), ], @@ -519,41 +1017,95 @@ # this test case contains two cycles ( [ - ( - 1, - [ - (2, "Task-1", []), - ( - 3, - "A", - [[["nested", "nested", "task_b"], 4]], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=3, + task_name="A", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0), + FrameInfo("task_b", "", 0) + ], + task_name=4 + ) + ] ), - ( - 4, - "B", - [ - [["nested", "nested", "task_c"], 5], - [["nested", "nested", "task_a"], 3], - ], + TaskInfo( + task_id=4, + task_name="B", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0), + FrameInfo("task_c", "", 0) + ], + task_name=5 + ), + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0), + FrameInfo("task_a", "", 0) + ], + task_name=3 + ) + ] ), - (5, "C", [[["nested", "nested"], 6]]), - ( - 6, - "Task-2", - [[["nested", "nested", "task_b"], 4]], + TaskInfo( + task_id=5, + task_name="C", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0) + ], + task_name=6 + ) + ] ), - ], + TaskInfo( + task_id=6, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("nested", "", 0), + FrameInfo("nested", "", 0), + FrameInfo("task_b", "", 0) + ], + task_name=4 + ) + ] + ) + ] ), - (0, []), + AwaitedInfo(thread_id=0, awaited_by=[]) ] ), ( [ - [1, "0x2", "Task-1", "", "", "0x0"], + [1, "0x2", "Task-1", "", "", "", "0x0"], [ 1, "0x3", "A", + "", "nested -> nested -> task_b", "B", "0x4", @@ -562,6 +1114,7 @@ 1, "0x4", "B", + "", "nested -> nested -> task_c", "C", "0x5", @@ -570,6 +1123,7 @@ 1, "0x4", "B", + "", "nested -> nested -> task_a", "A", "0x3", @@ -578,6 +1132,7 @@ 1, "0x5", "C", + "", "nested -> nested", "Task-2", "0x6", @@ -586,6 +1141,7 @@ 1, "0x6", "Task-2", + "", "nested -> nested -> task_b", "B", "0x4", @@ -600,7 +1156,8 @@ class TestAsyncioToolsTree(unittest.TestCase): def test_asyncio_utils(self): for input_, tree in TEST_INPUTS_TREE: with self.subTest(input_): - self.assertEqual(tools.build_async_tree(input_), tree) + result = tools.build_async_tree(input_) + self.assertEqual(result, tree) def test_asyncio_utils_cycles(self): for input_, cycles in TEST_INPUTS_CYCLES_TREE: @@ -615,7 +1172,8 @@ class TestAsyncioToolsTable(unittest.TestCase): def test_asyncio_utils(self): for input_, table in TEST_INPUTS_TABLE: with self.subTest(input_): - self.assertEqual(tools.build_task_table(input_), table) + result = tools.build_task_table(input_) + self.assertEqual(result, table) class TestAsyncioToolsBasic(unittest.TestCase): @@ -632,26 +1190,67 @@ def test_empty_input_table(self): self.assertEqual(tools.build_task_table(result), expected_output) def test_only_independent_tasks_tree(self): - input_ = [(1, [(10, "taskA", []), (11, "taskB", [])])] + input_ = [ + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=10, + task_name="taskA", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=11, + task_name="taskB", + coroutine_stack=[], + awaited_by=[] + ) + ] + ) + ] expected = [["└── (T) taskA"], ["└── (T) taskB"]] result = tools.build_async_tree(input_) self.assertEqual(sorted(result), sorted(expected)) def test_only_independent_tasks_table(self): - input_ = [(1, [(10, "taskA", []), (11, "taskB", [])])] + input_ = [ + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=10, + task_name="taskA", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=11, + task_name="taskB", + coroutine_stack=[], + awaited_by=[] + ) + ] + ) + ] self.assertEqual( tools.build_task_table(input_), - [[1, "0xa", "taskA", "", "", "0x0"], [1, "0xb", "taskB", "", "", "0x0"]], + [[1, '0xa', 'taskA', '', '', '', '0x0'], [1, '0xb', 'taskB', '', '', '', '0x0']] ) def test_single_task_tree(self): """Test build_async_tree with a single task and no awaits.""" result = [ - ( - 1, - [ - (2, "Task-1", []), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] + ) + ] ) ] expected_output = [ @@ -664,25 +1263,50 @@ def test_single_task_tree(self): def test_single_task_table(self): """Test build_task_table with a single task and no awaits.""" result = [ - ( - 1, - [ - (2, "Task-1", []), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] + ) + ] ) ] - expected_output = [[1, "0x2", "Task-1", "", "", "0x0"]] + expected_output = [[1, '0x2', 'Task-1', '', '', '', '0x0']] self.assertEqual(tools.build_task_table(result), expected_output) def test_cycle_detection(self): """Test build_async_tree raises CycleFoundException for cyclic input.""" result = [ - ( - 1, - [ - (2, "Task-1", [[["main"], 3]]), - (3, "Task-2", [[["main"], 2]]), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=3 + ) + ] + ), + TaskInfo( + task_id=3, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=2 + ) + ] + ) + ] ) ] with self.assertRaises(tools.CycleFoundException) as context: @@ -692,13 +1316,38 @@ def test_cycle_detection(self): def test_complex_tree(self): """Test build_async_tree with a more complex tree structure.""" result = [ - ( - 1, - [ - (2, "Task-1", []), - (3, "Task-2", [[["main"], 2]]), - (4, "Task-3", [[["main"], 3]]), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=3, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=4, + task_name="Task-3", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=3 + ) + ] + ) + ] ) ] expected_output = [ @@ -715,30 +1364,76 @@ def test_complex_tree(self): def test_complex_table(self): """Test build_task_table with a more complex tree structure.""" result = [ - ( - 1, - [ - (2, "Task-1", []), - (3, "Task-2", [[["main"], 2]]), - (4, "Task-3", [[["main"], 3]]), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=2, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=3, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=4, + task_name="Task-3", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("main", "", 0)], + task_name=3 + ) + ] + ) + ] ) ] expected_output = [ - [1, "0x2", "Task-1", "", "", "0x0"], - [1, "0x3", "Task-2", "main", "Task-1", "0x2"], - [1, "0x4", "Task-3", "main", "Task-2", "0x3"], + [1, '0x2', 'Task-1', '', '', '', '0x0'], + [1, '0x3', 'Task-2', '', 'main', 'Task-1', '0x2'], + [1, '0x4', 'Task-3', '', 'main', 'Task-2', '0x3'] ] self.assertEqual(tools.build_task_table(result), expected_output) def test_deep_coroutine_chain(self): input_ = [ - ( - 1, - [ - (10, "leaf", [[["c1", "c2", "c3", "c4", "c5"], 11]]), - (11, "root", []), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=10, + task_name="leaf", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("c1", "", 0), + FrameInfo("c2", "", 0), + FrameInfo("c3", "", 0), + FrameInfo("c4", "", 0), + FrameInfo("c5", "", 0) + ], + task_name=11 + ) + ] + ), + TaskInfo( + task_id=11, + task_name="root", + coroutine_stack=[], + awaited_by=[] + ) + ] ) ] expected = [ @@ -757,13 +1452,47 @@ def test_deep_coroutine_chain(self): def test_multiple_cycles_same_node(self): input_ = [ - ( - 1, - [ - (1, "Task-A", [[["call1"], 2]]), - (2, "Task-B", [[["call2"], 3]]), - (3, "Task-C", [[["call3"], 1], [["call4"], 2]]), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=1, + task_name="Task-A", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("call1", "", 0)], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=2, + task_name="Task-B", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("call2", "", 0)], + task_name=3 + ) + ] + ), + TaskInfo( + task_id=3, + task_name="Task-C", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("call3", "", 0)], + task_name=1 + ), + CoroInfo( + call_stack=[FrameInfo("call4", "", 0)], + task_name=2 + ) + ] + ) + ] ) ] with self.assertRaises(tools.CycleFoundException) as ctx: @@ -772,19 +1501,43 @@ def test_multiple_cycles_same_node(self): self.assertTrue(any(set(c) == {1, 2, 3} for c in cycles)) def test_table_output_format(self): - input_ = [(1, [(1, "Task-A", [[["foo"], 2]]), (2, "Task-B", [])])] + input_ = [ + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=1, + task_name="Task-A", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("foo", "", 0)], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=2, + task_name="Task-B", + coroutine_stack=[], + awaited_by=[] + ) + ] + ) + ] table = tools.build_task_table(input_) for row in table: - self.assertEqual(len(row), 6) + self.assertEqual(len(row), 7) self.assertIsInstance(row[0], int) # thread ID self.assertTrue( isinstance(row[1], str) and row[1].startswith("0x") ) # hex task ID self.assertIsInstance(row[2], str) # task name - self.assertIsInstance(row[3], str) # coroutine chain - self.assertIsInstance(row[4], str) # awaiter name + self.assertIsInstance(row[3], str) # coroutine stack + self.assertIsInstance(row[4], str) # coroutine chain + self.assertIsInstance(row[5], str) # awaiter name self.assertTrue( - isinstance(row[5], str) and row[5].startswith("0x") + isinstance(row[6], str) and row[6].startswith("0x") ) # hex awaiter ID @@ -792,28 +1545,86 @@ class TestAsyncioToolsEdgeCases(unittest.TestCase): def test_task_awaits_self(self): """A task directly awaits itself - should raise a cycle.""" - input_ = [(1, [(1, "Self-Awaiter", [[["loopback"], 1]])])] + input_ = [ + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=1, + task_name="Self-Awaiter", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("loopback", "", 0)], + task_name=1 + ) + ] + ) + ] + ) + ] with self.assertRaises(tools.CycleFoundException) as ctx: tools.build_async_tree(input_) self.assertIn([1, 1], ctx.exception.cycles) def test_task_with_missing_awaiter_id(self): """Awaiter ID not in task list - should not crash, just show 'Unknown'.""" - input_ = [(1, [(1, "Task-A", [[["coro"], 999]])])] # 999 not defined + input_ = [ + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=1, + task_name="Task-A", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("coro", "", 0)], + task_name=999 + ) + ] + ) + ] + ) + ] table = tools.build_task_table(input_) self.assertEqual(len(table), 1) - self.assertEqual(table[0][4], "Unknown") + self.assertEqual(table[0][5], "Unknown") def test_duplicate_coroutine_frames(self): """Same coroutine frame repeated under a parent - should deduplicate.""" input_ = [ - ( - 1, - [ - (1, "Task-1", [[["frameA"], 2], [["frameA"], 3]]), - (2, "Task-2", []), - (3, "Task-3", []), - ], + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=1, + task_name="Task-1", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("frameA", "", 0)], + task_name=2 + ), + CoroInfo( + call_stack=[FrameInfo("frameA", "", 0)], + task_name=3 + ) + ] + ), + TaskInfo( + task_id=2, + task_name="Task-2", + coroutine_stack=[], + awaited_by=[] + ), + TaskInfo( + task_id=3, + task_name="Task-3", + coroutine_stack=[], + awaited_by=[] + ) + ] ) ] tree = tools.build_async_tree(input_) @@ -830,14 +1641,63 @@ def test_duplicate_coroutine_frames(self): def test_task_with_no_name(self): """Task with no name in id2name - should still render with fallback.""" - input_ = [(1, [(1, "root", [[["f1"], 2]]), (2, None, [])])] + input_ = [ + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=1, + task_name="root", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[FrameInfo("f1", "", 0)], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=2, + task_name=None, + coroutine_stack=[], + awaited_by=[] + ) + ] + ) + ] # If name is None, fallback to string should not crash tree = tools.build_async_tree(input_) self.assertIn("(T) None", "\n".join(tree[0])) def test_tree_rendering_with_custom_emojis(self): """Pass custom emojis to the tree renderer.""" - input_ = [(1, [(1, "MainTask", [[["f1", "f2"], 2]]), (2, "SubTask", [])])] + input_ = [ + AwaitedInfo( + thread_id=1, + awaited_by=[ + TaskInfo( + task_id=1, + task_name="MainTask", + coroutine_stack=[], + awaited_by=[ + CoroInfo( + call_stack=[ + FrameInfo("f1", "", 0), + FrameInfo("f2", "", 0) + ], + task_name=2 + ) + ] + ), + TaskInfo( + task_id=2, + task_name="SubTask", + coroutine_stack=[], + awaited_by=[] + ) + ] + ) + ] tree = tools.build_async_tree(input_, task_emoji="🧵", cor_emoji="🔁") flat = "\n".join(tree[0]) self.assertIn("🧵 MainTask", flat) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 303af25fc7a715..90214e814f2b35 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -5,7 +5,7 @@ import sys import socket import threading -from asyncio import staggered, taskgroups +from asyncio import staggered, taskgroups, base_events, tasks from unittest.mock import ANY from test.support import os_helper, SHORT_TIMEOUT, busy_retry from test.support.script_helper import make_script @@ -18,8 +18,12 @@ try: from _remote_debugging import PROCESS_VM_READV_SUPPORTED from _remote_debugging import RemoteUnwinder + from _remote_debugging import FrameInfo, CoroInfo, TaskInfo except ImportError: - raise unittest.SkipTest("Test only runs when _remote_debugging is available") + raise unittest.SkipTest( + "Test only runs when _remote_debugging is available" + ) + def _make_test_script(script_dir, script_basename, source): to_return = make_script(script_dir, script_basename, source) @@ -28,7 +32,11 @@ def _make_test_script(script_dir, script_basename, source): skip_if_not_supported = unittest.skipIf( - (sys.platform != "darwin" and sys.platform != "linux" and sys.platform != "win32"), + ( + sys.platform != "darwin" + and sys.platform != "linux" + and sys.platform != "win32" + ), "Test only runs on Linux, Windows and MacOS", ) @@ -101,11 +109,16 @@ def foo(): client_socket, _ = server_socket.accept() server_socket.close() response = b"" - while b"ready:main" not in response or b"ready:thread" not in response: + while ( + b"ready:main" not in response + or b"ready:thread" not in response + ): response += client_socket.recv(1024) stack_trace = get_stack_trace(p.pid) except PermissionError: - self.skipTest("Insufficient permissions to read the stack trace") + self.skipTest( + "Insufficient permissions to read the stack trace" + ) finally: if client_socket is not None: client_socket.close() @@ -114,17 +127,17 @@ def foo(): p.wait(timeout=SHORT_TIMEOUT) thread_expected_stack_trace = [ - (script_name, 15, "foo"), - (script_name, 12, "baz"), - (script_name, 9, "bar"), - (threading.__file__, ANY, 'Thread.run') + FrameInfo([script_name, 15, "foo"]), + FrameInfo([script_name, 12, "baz"]), + FrameInfo([script_name, 9, "bar"]), + FrameInfo([threading.__file__, ANY, "Thread.run"]), ] # Is possible that there are more threads, so we check that the # expected stack traces are in the result (looking at you Windows!) self.assertIn((ANY, thread_expected_stack_trace), stack_trace) # Check that the main thread stack trace is in the result - frame = (script_name, 19, "") + frame = FrameInfo([script_name, 19, ""]) for _, stack in stack_trace: if frame in stack: break @@ -189,8 +202,12 @@ def new_eager_loop(): ): script_dir = os.path.join(work_dir, "script_pkg") os.mkdir(script_dir) - server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket = socket.socket( + socket.AF_INET, socket.SOCK_STREAM + ) + server_socket.setsockopt( + socket.SOL_SOCKET, socket.SO_REUSEADDR, 1 + ) server_socket.bind(("localhost", port)) server_socket.settimeout(SHORT_TIMEOUT) server_socket.listen(1) @@ -208,7 +225,9 @@ def new_eager_loop(): self.assertEqual(response, b"ready") stack_trace = get_async_stack_trace(p.pid) except PermissionError: - self.skipTest("Insufficient permissions to read the stack trace") + self.skipTest( + "Insufficient permissions to read the stack trace" + ) finally: if client_socket is not None: client_socket.close() @@ -219,79 +238,49 @@ def new_eager_loop(): # sets are unordered, so we want to sort "awaited_by"s stack_trace[2].sort(key=lambda x: x[1]) - root_task = "Task-1" expected_stack_trace = [ [ - (script_name, 10, "c5"), - (script_name, 14, "c4"), - (script_name, 17, "c3"), - (script_name, 20, "c2"), + FrameInfo([script_name, 10, "c5"]), + FrameInfo([script_name, 14, "c4"]), + FrameInfo([script_name, 17, "c3"]), + FrameInfo([script_name, 20, "c2"]), ], "c2_root", [ - [ - [ - ( - taskgroups.__file__, - ANY, - "TaskGroup._aexit" - ), - ( - taskgroups.__file__, - ANY, - "TaskGroup.__aexit__" - ), - (script_name, 26, "main"), - ], - "Task-1", - [], - ], - [ - [(script_name, 23, "c1")], - "sub_main_1", + CoroInfo( [ [ - [ - ( + FrameInfo( + [ taskgroups.__file__, ANY, - "TaskGroup._aexit" - ), - ( + "TaskGroup._aexit", + ] + ), + FrameInfo( + [ taskgroups.__file__, ANY, - "TaskGroup.__aexit__" - ), - (script_name, 26, "main"), - ], - "Task-1", - [], - ] - ], - ], - [ - [(script_name, 23, "c1")], - "sub_main_2", + "TaskGroup.__aexit__", + ] + ), + FrameInfo([script_name, 26, "main"]), + ], + "Task-1", + ] + ), + CoroInfo( [ - [ - [ - ( - taskgroups.__file__, - ANY, - "TaskGroup._aexit" - ), - ( - taskgroups.__file__, - ANY, - "TaskGroup.__aexit__" - ), - (script_name, 26, "main"), - ], - "Task-1", - [], - ] - ], - ], + [FrameInfo([script_name, 23, "c1"])], + "sub_main_1", + ] + ), + CoroInfo( + [ + [FrameInfo([script_name, 23, "c1"])], + "sub_main_2", + ] + ), ], ] self.assertEqual(stack_trace, expected_stack_trace) @@ -350,7 +339,9 @@ async def main(): self.assertEqual(response, b"ready") stack_trace = get_async_stack_trace(p.pid) except PermissionError: - self.skipTest("Insufficient permissions to read the stack trace") + self.skipTest( + "Insufficient permissions to read the stack trace" + ) finally: if client_socket is not None: client_socket.close() @@ -363,9 +354,9 @@ async def main(): expected_stack_trace = [ [ - (script_name, 10, "gen_nested_call"), - (script_name, 16, "gen"), - (script_name, 19, "main"), + FrameInfo([script_name, 10, "gen_nested_call"]), + FrameInfo([script_name, 16, "gen"]), + FrameInfo([script_name, 19, "main"]), ], "Task-1", [], @@ -427,7 +418,9 @@ async def main(): self.assertEqual(response, b"ready") stack_trace = get_async_stack_trace(p.pid) except PermissionError: - self.skipTest("Insufficient permissions to read the stack trace") + self.skipTest( + "Insufficient permissions to read the stack trace" + ) finally: if client_socket is not None: client_socket.close() @@ -439,9 +432,12 @@ async def main(): stack_trace[2].sort(key=lambda x: x[1]) expected_stack_trace = [ - [(script_name, 11, "deep"), (script_name, 15, "c1")], + [ + FrameInfo([script_name, 11, "deep"]), + FrameInfo([script_name, 15, "c1"]), + ], "Task-2", - [[[(script_name, 21, "main")], "Task-1", []]], + [CoroInfo([[FrameInfo([script_name, 21, "main"])], "Task-1"])], ] self.assertEqual(stack_trace, expected_stack_trace) @@ -503,7 +499,9 @@ async def main(): self.assertEqual(response, b"ready") stack_trace = get_async_stack_trace(p.pid) except PermissionError: - self.skipTest("Insufficient permissions to read the stack trace") + self.skipTest( + "Insufficient permissions to read the stack trace" + ) finally: if client_socket is not None: client_socket.close() @@ -515,20 +513,29 @@ async def main(): stack_trace[2].sort(key=lambda x: x[1]) expected_stack_trace = [ [ - (script_name, 11, "deep"), - (script_name, 15, "c1"), - (staggered.__file__, ANY, "staggered_race..run_one_coro"), + FrameInfo([script_name, 11, "deep"]), + FrameInfo([script_name, 15, "c1"]), + FrameInfo( + [ + staggered.__file__, + ANY, + "staggered_race..run_one_coro", + ] + ), ], "Task-2", [ - [ + CoroInfo( [ - (staggered.__file__, ANY, "staggered_race"), - (script_name, 21, "main"), - ], - "Task-1", - [], - ] + [ + FrameInfo( + [staggered.__file__, ANY, "staggered_race"] + ), + FrameInfo([script_name, 21, "main"]), + ], + "Task-1", + ] + ) ], ] self.assertEqual(stack_trace, expected_stack_trace) @@ -659,62 +666,174 @@ async def main(): # expected: at least 1000 pending tasks self.assertGreaterEqual(len(entries), 1000) # the first three tasks stem from the code structure - self.assertIn((ANY, "Task-1", []), entries) main_stack = [ - ( - taskgroups.__file__, - ANY, - "TaskGroup._aexit", + FrameInfo([taskgroups.__file__, ANY, "TaskGroup._aexit"]), + FrameInfo( + [taskgroups.__file__, ANY, "TaskGroup.__aexit__"] ), - ( - taskgroups.__file__, - ANY, - "TaskGroup.__aexit__", - ), - (script_name, 60, "main"), + FrameInfo([script_name, 60, "main"]), ] self.assertIn( - (ANY, "server task", [[main_stack, ANY]]), + TaskInfo( + [ANY, "Task-1", [CoroInfo([main_stack, ANY])], []] + ), entries, ) self.assertIn( - (ANY, "echo client spam", [[main_stack, ANY]]), + TaskInfo( + [ + ANY, + "server task", + [ + CoroInfo( + [ + [ + FrameInfo( + [ + base_events.__file__, + ANY, + "Server.serve_forever", + ] + ) + ], + ANY, + ] + ) + ], + [ + CoroInfo( + [ + [ + FrameInfo( + [ + taskgroups.__file__, + ANY, + "TaskGroup._aexit", + ] + ), + FrameInfo( + [ + taskgroups.__file__, + ANY, + "TaskGroup.__aexit__", + ] + ), + FrameInfo( + [script_name, ANY, "main"] + ), + ], + ANY, + ] + ) + ], + ] + ), + entries, + ) + self.assertIn( + TaskInfo( + [ + ANY, + "Task-4", + [ + CoroInfo( + [ + [ + FrameInfo( + [tasks.__file__, ANY, "sleep"] + ), + FrameInfo( + [ + script_name, + 38, + "echo_client", + ] + ), + ], + ANY, + ] + ) + ], + [ + CoroInfo( + [ + [ + FrameInfo( + [ + taskgroups.__file__, + ANY, + "TaskGroup._aexit", + ] + ), + FrameInfo( + [ + taskgroups.__file__, + ANY, + "TaskGroup.__aexit__", + ] + ), + FrameInfo( + [ + script_name, + 41, + "echo_client_spam", + ] + ), + ], + ANY, + ] + ) + ], + ] + ), entries, ) - expected_stack = [ - [ + expected_awaited_by = [ + CoroInfo( [ - ( - taskgroups.__file__, - ANY, - "TaskGroup._aexit", - ), - ( - taskgroups.__file__, - ANY, - "TaskGroup.__aexit__", - ), - (script_name, 41, "echo_client_spam"), - ], - ANY, - ] + [ + FrameInfo( + [ + taskgroups.__file__, + ANY, + "TaskGroup._aexit", + ] + ), + FrameInfo( + [ + taskgroups.__file__, + ANY, + "TaskGroup.__aexit__", + ] + ), + FrameInfo( + [script_name, 41, "echo_client_spam"] + ), + ], + ANY, + ] + ) ] - tasks_with_stack = [ - task for task in entries if task[2] == expected_stack + tasks_with_awaited = [ + task + for task in entries + if task.awaited_by == expected_awaited_by ] - self.assertGreaterEqual(len(tasks_with_stack), 1000) + self.assertGreaterEqual(len(tasks_with_awaited), 1000) # the final task will have some random number, but it should for # sure be one of the echo client spam horde (In windows this is not true # for some reason) if sys.platform != "win32": self.assertEqual( - expected_stack, - entries[-1][2], + tasks_with_awaited[-1].awaited_by, + entries[-1].awaited_by, ) except PermissionError: - self.skipTest("Insufficient permissions to read the stack trace") + self.skipTest( + "Insufficient permissions to read the stack trace" + ) finally: if client_socket is not None: client_socket.close() @@ -740,17 +859,21 @@ def test_self_trace(self): self.assertEqual( stack[:2], [ - ( - __file__, - get_stack_trace.__code__.co_firstlineno + 2, - "get_stack_trace", + FrameInfo( + [ + __file__, + get_stack_trace.__code__.co_firstlineno + 2, + "get_stack_trace", + ] ), - ( - __file__, - self.test_self_trace.__code__.co_firstlineno + 6, - "TestGetStackTrace.test_self_trace", + FrameInfo( + [ + __file__, + self.test_self_trace.__code__.co_firstlineno + 6, + "TestGetStackTrace.test_self_trace", + ] ), - ] + ], ) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-12-18-12-42.gh-issue-135371.R_YUtR.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-12-18-12-42.gh-issue-135371.R_YUtR.rst new file mode 100644 index 00000000000000..9f2e825e57bb2a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-12-18-12-42.gh-issue-135371.R_YUtR.rst @@ -0,0 +1,4 @@ +Fixed :mod:`asyncio` debugging tools to properly display internal coroutine +call stacks alongside external task dependencies. The ``python -m asyncio +ps`` and ``python -m asyncio pstree`` commands now show complete execution +context. Patch by Pablo Galindo. diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index ea58f38006e199..19f12c3b02e5a4 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -97,6 +97,101 @@ struct _Py_AsyncioModuleDebugOffsets { } asyncio_thread_state; }; +/* ============================================================================ + * STRUCTSEQ TYPE DEFINITIONS + * ============================================================================ */ + +// TaskInfo structseq type - replaces 4-tuple (task_id, task_name, coroutine_stack, awaited_by) +static PyStructSequence_Field TaskInfo_fields[] = { + {"task_id", "Task ID (memory address)"}, + {"task_name", "Task name"}, + {"coroutine_stack", "Coroutine call stack"}, + {"awaited_by", "Tasks awaiting this task"}, + {NULL} +}; + +static PyStructSequence_Desc TaskInfo_desc = { + "_remote_debugging.TaskInfo", + "Information about an asyncio task", + TaskInfo_fields, + 4 +}; + +// FrameInfo structseq type - replaces 3-tuple (filename, lineno, funcname) +static PyStructSequence_Field FrameInfo_fields[] = { + {"filename", "Source code filename"}, + {"lineno", "Line number"}, + {"funcname", "Function name"}, + {NULL} +}; + +static PyStructSequence_Desc FrameInfo_desc = { + "_remote_debugging.FrameInfo", + "Information about a frame", + FrameInfo_fields, + 3 +}; + +// CoroInfo structseq type - replaces 2-tuple (call_stack, task_name) +static PyStructSequence_Field CoroInfo_fields[] = { + {"call_stack", "Coroutine call stack"}, + {"task_name", "Task name"}, + {NULL} +}; + +static PyStructSequence_Desc CoroInfo_desc = { + "_remote_debugging.CoroInfo", + "Information about a coroutine", + CoroInfo_fields, + 2 +}; + +// ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info) +static PyStructSequence_Field ThreadInfo_fields[] = { + {"thread_id", "Thread ID"}, + {"frame_info", "Frame information"}, + {NULL} +}; + +static PyStructSequence_Desc ThreadInfo_desc = { + "_remote_debugging.ThreadInfo", + "Information about a thread", + ThreadInfo_fields, + 2 +}; + +// AwaitedInfo structseq type - replaces 2-tuple (tid, awaited_by_list) +static PyStructSequence_Field AwaitedInfo_fields[] = { + {"thread_id", "Thread ID"}, + {"awaited_by", "List of tasks awaited by this thread"}, + {NULL} +}; + +static PyStructSequence_Desc AwaitedInfo_desc = { + "_remote_debugging.AwaitedInfo", + "Information about what a thread is awaiting", + AwaitedInfo_fields, + 2 +}; + +typedef struct { + PyObject *func_name; + PyObject *file_name; + int first_lineno; + PyObject *linetable; // bytes + uintptr_t addr_code_adaptive; +} CachedCodeMetadata; + +typedef struct { + /* Types */ + PyTypeObject *RemoteDebugging_Type; + PyTypeObject *TaskInfo_Type; + PyTypeObject *FrameInfo_Type; + PyTypeObject *CoroInfo_Type; + PyTypeObject *ThreadInfo_Type; + PyTypeObject *AwaitedInfo_Type; +} RemoteDebuggingState; + typedef struct { PyObject_HEAD proc_handle_t handle; @@ -109,6 +204,7 @@ typedef struct { uint64_t code_object_generation; _Py_hashtable_t *code_object_cache; int debug; + RemoteDebuggingState *cached_state; // Cached module state #ifdef Py_GIL_DISABLED // TLBC cache invalidation tracking uint32_t tlbc_generation; // Track TLBC index pool changes @@ -116,19 +212,6 @@ typedef struct { #endif } RemoteUnwinderObject; -typedef struct { - PyObject *func_name; - PyObject *file_name; - int first_lineno; - PyObject *linetable; // bytes - uintptr_t addr_code_adaptive; -} CachedCodeMetadata; - -typedef struct { - /* Types */ - PyTypeObject *RemoteDebugging_Type; -} RemoteDebuggingState; - typedef struct { int lineno; @@ -218,6 +301,24 @@ RemoteDebugging_GetState(PyObject *module) return (RemoteDebuggingState *)state; } +static inline RemoteDebuggingState * +RemoteDebugging_GetStateFromType(PyTypeObject *type) +{ + PyObject *module = PyType_GetModule(type); + assert(module != NULL); + return RemoteDebugging_GetState(module); +} + +static inline RemoteDebuggingState * +RemoteDebugging_GetStateFromObject(PyObject *obj) +{ + RemoteUnwinderObject *unwinder = (RemoteUnwinderObject *)obj; + if (unwinder->cached_state == NULL) { + unwinder->cached_state = RemoteDebugging_GetStateFromType(Py_TYPE(obj)); + } + return unwinder->cached_state; +} + static inline int RemoteDebugging_InitState(RemoteDebuggingState *st) { @@ -854,24 +955,14 @@ create_task_result( char task_obj[SIZEOF_TASK_OBJ]; uintptr_t coro_addr; - result = PyList_New(0); - if (result == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create task result list"); - goto error; - } - + // Create call_stack first since it's the first tuple element call_stack = PyList_New(0); if (call_stack == NULL) { set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create call stack list"); goto error; } - if (PyList_Append(result, call_stack)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append call stack to task result"); - goto error; - } - Py_CLEAR(call_stack); - + // Create task name/address for second tuple element if (recurse_task) { tn = parse_task_name(unwinder, task_address); } else { @@ -882,12 +973,6 @@ create_task_result( goto error; } - if (PyList_Append(result, tn)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task name to result"); - goto error; - } - Py_CLEAR(tn); - // Parse coroutine chain if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, unwinder->async_debug_offsets.asyncio_task_object.size, @@ -900,31 +985,29 @@ create_task_result( coro_addr &= ~Py_TAG_BITS; if ((void*)coro_addr != NULL) { - call_stack = PyList_New(0); - if (call_stack == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create coro call stack list"); - goto error; - } - if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) { - Py_DECREF(call_stack); set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain"); goto error; } if (PyList_Reverse(call_stack)) { - Py_DECREF(call_stack); set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reverse call stack"); goto error; } + } - if (PyList_SetItem(result, 0, call_stack) < 0) { - Py_DECREF(call_stack); - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to set call stack in result"); - goto error; - } + // Create final CoroInfo result + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result = PyStructSequence_New(state->CoroInfo_Type); + if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create CoroInfo"); + goto error; } + // PyStructSequence_SetItem steals references, so we don't need to DECREF on success + PyStructSequence_SetItem(result, 0, call_stack); // This steals the reference + PyStructSequence_SetItem(result, 1, tn); // This steals the reference + return result; error: @@ -943,7 +1026,6 @@ parse_task( ) { char is_task; PyObject* result = NULL; - PyObject* awaited_by = NULL; int err; err = read_char( @@ -962,48 +1044,37 @@ parse_task( goto error; } } else { - result = PyList_New(0); + // Create an empty CoroInfo for non-task objects + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result = PyStructSequence_New(state->CoroInfo_Type); if (result == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty task result"); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty CoroInfo"); goto error; } - } - - if (PyList_Append(render_to, result)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task result to render list"); - goto error; - } - - if (recurse_task) { - awaited_by = PyList_New(0); - if (awaited_by == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list"); + PyObject *empty_list = PyList_New(0); + if (empty_list == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty list"); goto error; } - - if (PyList_Append(result, awaited_by)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by to result"); + PyObject *task_name = PyLong_FromUnsignedLongLong(task_address); + if (task_name == NULL) { + Py_DECREF(empty_list); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name"); goto error; } - Py_DECREF(awaited_by); + PyStructSequence_SetItem(result, 0, empty_list); // This steals the reference + PyStructSequence_SetItem(result, 1, task_name); // This steals the reference + } - /* awaited_by is borrowed from 'result' to simplify cleanup */ - if (parse_task_awaited_by(unwinder, task_address, awaited_by, 1) < 0) { - // Clear the pointer so the cleanup doesn't try to decref it since - // it's borrowed from 'result' and will be decrefed when result is - // deleted. - awaited_by = NULL; - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task awaited_by relationships"); - goto error; - } + if (PyList_Append(render_to, result)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task result to render list"); + goto error; } Py_DECREF(result); - return 0; error: Py_XDECREF(result); - Py_XDECREF(awaited_by); return -1; } @@ -1161,6 +1232,7 @@ process_single_task_node( PyObject *current_awaited_by = NULL; PyObject *task_id = NULL; PyObject *result_item = NULL; + PyObject *coroutine_stack = NULL; tn = parse_task_name(unwinder, task_addr); if (tn == NULL) { @@ -1174,25 +1246,40 @@ process_single_task_node( goto error; } + // Extract the coroutine stack for this task + coroutine_stack = PyList_New(0); + if (coroutine_stack == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create coroutine stack list in single task node"); + goto error; + } + + if (parse_task(unwinder, task_addr, coroutine_stack, 0) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task coroutine stack in single task node"); + goto error; + } + task_id = PyLong_FromUnsignedLongLong(task_addr); if (task_id == NULL) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task ID in single task node"); goto error; } - result_item = PyTuple_New(3); + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result_item = PyStructSequence_New(state->TaskInfo_Type); if (result_item == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create result tuple in single task node"); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create TaskInfo in single task node"); goto error; } - PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref - PyTuple_SET_ITEM(result_item, 1, tn); // steals ref - PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref + PyStructSequence_SetItem(result_item, 0, task_id); // steals ref + PyStructSequence_SetItem(result_item, 1, tn); // steals ref + PyStructSequence_SetItem(result_item, 2, coroutine_stack); // steals ref + PyStructSequence_SetItem(result_item, 3, current_awaited_by); // steals ref // References transferred to tuple task_id = NULL; tn = NULL; + coroutine_stack = NULL; current_awaited_by = NULL; if (PyList_Append(result, result_item)) { @@ -1203,9 +1290,11 @@ process_single_task_node( Py_DECREF(result_item); // Get back current_awaited_by reference for parse_task_awaited_by - current_awaited_by = PyTuple_GET_ITEM(result_item, 2); + current_awaited_by = PyStructSequence_GetItem(result_item, 3); if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by, 0) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by in single task node"); + // No cleanup needed here since all references were transferred to result_item + // and result_item was already added to result list and decreffed return -1; } @@ -1216,6 +1305,7 @@ process_single_task_node( Py_XDECREF(current_awaited_by); Py_XDECREF(task_id); Py_XDECREF(result_item); + Py_XDECREF(coroutine_stack); return -1; } @@ -1554,17 +1644,18 @@ parse_code_object(RemoteUnwinderObject *unwinder, goto error; } - tuple = PyTuple_New(3); + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + tuple = PyStructSequence_New(state->FrameInfo_Type); if (!tuple) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create result tuple for code object"); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo for code object"); goto error; } Py_INCREF(meta->func_name); Py_INCREF(meta->file_name); - PyTuple_SET_ITEM(tuple, 0, meta->file_name); - PyTuple_SET_ITEM(tuple, 1, lineno); - PyTuple_SET_ITEM(tuple, 2, meta->func_name); + PyStructSequence_SetItem(tuple, 0, meta->file_name); + PyStructSequence_SetItem(tuple, 1, lineno); + PyStructSequence_SetItem(tuple, 2, meta->func_name); *result = tuple; return 0; @@ -2212,23 +2303,24 @@ append_awaited_by( return -1; } - PyObject *result_item = PyTuple_New(2); - if (result_item == NULL) { + PyObject* awaited_by_for_thread = PyList_New(0); + if (awaited_by_for_thread == NULL) { Py_DECREF(tid_py); - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by result tuple"); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by thread list"); return -1; } - PyObject* awaited_by_for_thread = PyList_New(0); - if (awaited_by_for_thread == NULL) { + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *result_item = PyStructSequence_New(state->AwaitedInfo_Type); + if (result_item == NULL) { Py_DECREF(tid_py); - Py_DECREF(result_item); - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by thread list"); + Py_DECREF(awaited_by_for_thread); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo"); return -1; } - PyTuple_SET_ITEM(result_item, 0, tid_py); // steals ref - PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref + PyStructSequence_SetItem(result_item, 0, tid_py); // steals ref + PyStructSequence_SetItem(result_item, 1, awaited_by_for_thread); // steals ref if (PyList_Append(result, result_item)) { Py_DECREF(result_item); set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by result item"); @@ -2352,14 +2444,15 @@ unwind_stack_for_thread( goto error; } - result = PyTuple_New(2); + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result = PyStructSequence_New(state->ThreadInfo_Type); if (result == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create thread unwind result tuple"); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create ThreadInfo"); goto error; } - PyTuple_SET_ITEM(result, 0, thread_id); // Steals reference - PyTuple_SET_ITEM(result, 1, frame_info); // Steals reference + PyStructSequence_SetItem(result, 0, thread_id); // Steals reference + PyStructSequence_SetItem(result, 1, frame_info); // Steals reference cleanup_stack_chunks(&chunks); return result; @@ -2414,6 +2507,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, /*[clinic end generated code: output=3982f2a7eba49334 input=48a762566b828e91]*/ { self->debug = debug; + self->cached_state = NULL; if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle"); return -1; @@ -2860,6 +2954,47 @@ _remote_debugging_exec(PyObject *m) if (PyModule_AddType(m, st->RemoteDebugging_Type) < 0) { return -1; } + + // Initialize structseq types + st->TaskInfo_Type = PyStructSequence_NewType(&TaskInfo_desc); + if (st->TaskInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->TaskInfo_Type) < 0) { + return -1; + } + + st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc); + if (st->FrameInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->FrameInfo_Type) < 0) { + return -1; + } + + st->CoroInfo_Type = PyStructSequence_NewType(&CoroInfo_desc); + if (st->CoroInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->CoroInfo_Type) < 0) { + return -1; + } + + st->ThreadInfo_Type = PyStructSequence_NewType(&ThreadInfo_desc); + if (st->ThreadInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->ThreadInfo_Type) < 0) { + return -1; + } + + st->AwaitedInfo_Type = PyStructSequence_NewType(&AwaitedInfo_desc); + if (st->AwaitedInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->AwaitedInfo_Type) < 0) { + return -1; + } #ifdef Py_GIL_DISABLED PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif @@ -2878,6 +3013,11 @@ remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg) { RemoteDebuggingState *state = RemoteDebugging_GetState(mod); Py_VISIT(state->RemoteDebugging_Type); + Py_VISIT(state->TaskInfo_Type); + Py_VISIT(state->FrameInfo_Type); + Py_VISIT(state->CoroInfo_Type); + Py_VISIT(state->ThreadInfo_Type); + Py_VISIT(state->AwaitedInfo_Type); return 0; } @@ -2886,6 +3026,11 @@ remote_debugging_clear(PyObject *mod) { RemoteDebuggingState *state = RemoteDebugging_GetState(mod); Py_CLEAR(state->RemoteDebugging_Type); + Py_CLEAR(state->TaskInfo_Type); + Py_CLEAR(state->FrameInfo_Type); + Py_CLEAR(state->CoroInfo_Type); + Py_CLEAR(state->ThreadInfo_Type); + Py_CLEAR(state->AwaitedInfo_Type); return 0; } From fc413ecb8f4bf1c59b29932695e3538548eb1a8a Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sat, 14 Jun 2025 18:07:19 +0300 Subject: [PATCH 564/989] gh-135504: Document `LIBZSTD_CFLAGS` and `LIBZSTD_LIBS` config options (#135505) --- Doc/using/configure.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index b914d3397b6dda..0b7eaf35a1e689 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -445,6 +445,14 @@ Options for third-party dependencies C compiler and linker flags for ``libuuid``, used by :mod:`uuid` module, overriding ``pkg-config``. +.. option:: LIBZSTD_CFLAGS +.. option:: LIBZSTD_LIBS + + C compiler and linker flags for ``libzstd``, used by :mod:`compression.zstd` module, + overriding ``pkg-config``. + + .. versionadded:: 3.14 + .. option:: PANEL_CFLAGS .. option:: PANEL_LIBS From 2e15a50851da66eb8227ec6ea07a9cc7ed08fbf3 Mon Sep 17 00:00:00 2001 From: Caleb Xu Date: Sat, 14 Jun 2025 12:04:16 -0400 Subject: [PATCH 565/989] gh-135497: fix `MAXLOGNAME` detection in `configure.ac` (#135508) --- .../next/Build/2025-06-14-10-32-11.gh-issue-135497.ajlV4F.rst | 1 + .../next/Library/2025-06-14-14-19-13.gh-issue-135497.1pzwdA.rst | 1 + configure | 2 +- configure.ac | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-06-14-10-32-11.gh-issue-135497.ajlV4F.rst create mode 100644 Misc/NEWS.d/next/Library/2025-06-14-14-19-13.gh-issue-135497.1pzwdA.rst diff --git a/Misc/NEWS.d/next/Build/2025-06-14-10-32-11.gh-issue-135497.ajlV4F.rst b/Misc/NEWS.d/next/Build/2025-06-14-10-32-11.gh-issue-135497.ajlV4F.rst new file mode 100644 index 00000000000000..c84663b14668d6 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-06-14-10-32-11.gh-issue-135497.ajlV4F.rst @@ -0,0 +1 @@ +Fix the detection of ``MAXLOGNAME`` in the ``configure.ac`` script. diff --git a/Misc/NEWS.d/next/Library/2025-06-14-14-19-13.gh-issue-135497.1pzwdA.rst b/Misc/NEWS.d/next/Library/2025-06-14-14-19-13.gh-issue-135497.1pzwdA.rst new file mode 100644 index 00000000000000..d3e81de9dbfd34 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-14-14-19-13.gh-issue-135497.1pzwdA.rst @@ -0,0 +1 @@ +Fix :func:`os.getlogin` failing for longer usernames on BSD-based platforms. diff --git a/configure b/configure index fef9f2d7da935c..6c4923e82a886d 100755 --- a/configure +++ b/configure @@ -23849,7 +23849,7 @@ fi -ac_fn_check_decl "$LINENO" "MAXLOGNAME" "ac_cv_have_decl_MAXLOGNAME" "#include +ac_fn_check_decl "$LINENO" "MAXLOGNAME" "ac_cv_have_decl_MAXLOGNAME" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_MAXLOGNAME" = xyes then : diff --git a/configure.ac b/configure.ac index cc37a636c522ba..4f935aacef1665 100644 --- a/configure.ac +++ b/configure.ac @@ -5542,7 +5542,7 @@ AC_CHECK_DECL([MAXLOGNAME], [AC_DEFINE([HAVE_MAXLOGNAME], [1], [Define if you have the 'MAXLOGNAME' constant.])], [], - [@%:@include ]) + [@%:@include ]) AC_CHECK_DECLS([UT_NAMESIZE], [AC_DEFINE([HAVE_UT_NAMESIZE], [1], From 8979d3afe376c67931665070a79f6939ebcd940b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 15 Jun 2025 03:32:44 +0300 Subject: [PATCH 566/989] gh-135171: Update documentation for the generator expression (GH-135351) * gh-135171: Update documentation for the generator expression Document that the iterator for the leftmost "for" clause is created immediately. * Update Doc/reference/expressions.rst Co-authored-by: Brian Skinn --------- Co-authored-by: Brian Skinn --- Doc/reference/expressions.rst | 5 +++-- .../2025-06-10-17-02-06.gh-issue-135171.quHvts.rst | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2025-06-10-17-02-06.gh-issue-135171.quHvts.rst diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 2a550b504ca765..3d3bf1d9840eef 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -406,8 +406,9 @@ brackets or curly braces. Variables used in the generator expression are evaluated lazily when the :meth:`~generator.__next__` method is called for the generator object (in the same fashion as normal generators). However, the iterable expression in the -leftmost :keyword:`!for` clause is immediately evaluated, so that an error -produced by it will be emitted at the point where the generator expression +leftmost :keyword:`!for` clause is immediately evaluated, and the +:term:`iterator` is immediately created for that iterable, so that an error +produced while creating the iterator will be emitted at the point where the generator expression is defined, rather than at the point where the first value is retrieved. Subsequent :keyword:`!for` clauses and any filter condition in the leftmost :keyword:`!for` clause cannot be evaluated in the enclosing scope as they may diff --git a/Misc/NEWS.d/next/Documentation/2025-06-10-17-02-06.gh-issue-135171.quHvts.rst b/Misc/NEWS.d/next/Documentation/2025-06-10-17-02-06.gh-issue-135171.quHvts.rst new file mode 100644 index 00000000000000..129ff74189bc64 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2025-06-10-17-02-06.gh-issue-135171.quHvts.rst @@ -0,0 +1,2 @@ +Document that the :term:`iterator` for the leftmost :keyword:`!for` clause +in the generator expression is created immediately. From 2bd3895fcabb2dfdae5c0c72e60483e3d3267f0f Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Sun, 15 Jun 2025 01:34:29 -0400 Subject: [PATCH 567/989] gh-127319: Disable port reuse on HTTP, XMLRPC, and logging TCP servers (GH-135405) Prior to issue #120485 these servers did not allow port reuse, which makes sense as the behavior of port reuse is surprising if you're not expecting it. It's unclear to me why these services were switched to allow port reuse, but I believe the desired behavior (unless subclasses opt in) is to not allow port reuse. See also: https://bugzilla.redhat.com/show_bug.cgi?id=2323170 --- Lib/http/server.py | 2 +- Lib/logging/config.py | 2 +- Lib/test/test_logging.py | 2 +- Lib/xmlrpc/server.py | 2 +- .../2025-06-11-15-08-10.gh-issue-127319.OVGFSZ.rst | 3 +++ 5 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-11-15-08-10.gh-issue-127319.OVGFSZ.rst diff --git a/Lib/http/server.py b/Lib/http/server.py index ef10d185932633..a2ffbe2e44df64 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -115,7 +115,7 @@ class HTTPServer(socketserver.TCPServer): allow_reuse_address = True # Seems to make sense in testing environment - allow_reuse_port = True + allow_reuse_port = False def server_bind(self): """Override server_bind to store the server name.""" diff --git a/Lib/logging/config.py b/Lib/logging/config.py index c994349fd6eee5..3d9aa00fa52d11 100644 --- a/Lib/logging/config.py +++ b/Lib/logging/config.py @@ -1018,7 +1018,7 @@ class ConfigSocketReceiver(ThreadingTCPServer): """ allow_reuse_address = True - allow_reuse_port = True + allow_reuse_port = False def __init__(self, host='localhost', port=DEFAULT_LOGGING_CONFIG_PORT, handler=None, ready=None, verify=None): diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index fa5b1e438168bc..e672dfcbb465cc 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -1036,7 +1036,7 @@ class TestTCPServer(ControlMixin, ThreadingTCPServer): """ allow_reuse_address = True - allow_reuse_port = True + allow_reuse_port = False def __init__(self, addr, handler, poll_interval=0.5, bind_and_activate=True): diff --git a/Lib/xmlrpc/server.py b/Lib/xmlrpc/server.py index 90a356fbb8eae4..8130c739af2fe8 100644 --- a/Lib/xmlrpc/server.py +++ b/Lib/xmlrpc/server.py @@ -578,7 +578,7 @@ class SimpleXMLRPCServer(socketserver.TCPServer, """ allow_reuse_address = True - allow_reuse_port = True + allow_reuse_port = False # Warning: this is for debugging purposes only! Never set this to True in # production code, as will be sending out sensitive information (exception diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-11-15-08-10.gh-issue-127319.OVGFSZ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-11-15-08-10.gh-issue-127319.OVGFSZ.rst new file mode 100644 index 00000000000000..d90153c96841df --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-11-15-08-10.gh-issue-127319.OVGFSZ.rst @@ -0,0 +1,3 @@ +Set the ``allow_reuse_port`` class variable to ``False`` on the XMLRPC, +logging, and HTTP servers. This matches the behavior in prior Python +releases, which is to not allow port reuse. From 81237fbcf6adc962647566eafca62dd5a905375e Mon Sep 17 00:00:00 2001 From: Jacob Austin Lincoln <99031153+lincolnj1@users.noreply.github.com> Date: Sun, 15 Jun 2025 09:13:19 -0700 Subject: [PATCH 568/989] gh-65697: Improved error msg for configparser key validation (#135527) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improved error msg for configparser key validation and added note in 3.14 whatsnew * Properly added change to configparser * 📜🤖 Added by blurb_it. --------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Doc/whatsnew/3.14.rst | 8 ++++++++ Lib/configparser.py | 11 +++++++---- .../2025-06-15-03-03-22.gh-issue-65697.COdwZd.rst | 1 + 3 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-15-03-03-22.gh-issue-65697.COdwZd.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 705bf46d603697..895446e2721ca5 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1259,6 +1259,14 @@ concurrent.futures buffer. (Contributed by Enzo Bonnal and Josh Rosenberg in :gh:`74028`.) +configparser +------------ + +* Security fix: will no longer write config files it cannot read. Attempting + to :meth:`configparser.ConfigParser.write` keys containing delimiters or + beginning with the section header pattern will raise a + :class:`configparser.InvalidWriteError`. + (Contributed by Jacob Lincoln in :gh:`129270`) contextvars ----------- diff --git a/Lib/configparser.py b/Lib/configparser.py index 239fda60a02ca0..18af1eadaad111 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1218,11 +1218,14 @@ def _convert_to_boolean(self, value): def _validate_key_contents(self, key): """Raises an InvalidWriteError for any keys containing - delimiters or that match the section header pattern""" + delimiters or that begins with the section header pattern""" if re.match(self.SECTCRE, key): - raise InvalidWriteError("Cannot write keys matching section pattern") - if any(delim in key for delim in self._delimiters): - raise InvalidWriteError("Cannot write key that contains delimiters") + raise InvalidWriteError( + f"Cannot write key {key}; begins with section pattern") + for delim in self._delimiters: + if delim in key: + raise InvalidWriteError( + f"Cannot write key {key}; contains delimiter {delim}") def _validate_value_types(self, *, section="", option="", value=""): """Raises a TypeError for illegal non-string values. diff --git a/Misc/NEWS.d/next/Library/2025-06-15-03-03-22.gh-issue-65697.COdwZd.rst b/Misc/NEWS.d/next/Library/2025-06-15-03-03-22.gh-issue-65697.COdwZd.rst new file mode 100644 index 00000000000000..d374220d02f5ce --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-15-03-03-22.gh-issue-65697.COdwZd.rst @@ -0,0 +1 @@ +:class:`configparser`'s error message when attempting to write an invalid key is now more helpful. From b9a1b049820517d270f5019d5eb52c8e452eb33f Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 15 Jun 2025 20:32:32 +0200 Subject: [PATCH 569/989] gh-135371: Clean tags from pointers in all cases in remote debugging module (#135534) --- Modules/_remote_debugging_module.c | 106 ++++++++++++++++------------- 1 file changed, 60 insertions(+), 46 deletions(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 19f12c3b02e5a4..389889a2cf4c6c 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -39,6 +39,8 @@ * ============================================================================ */ #define GET_MEMBER(type, obj, offset) (*(type*)((char*)(obj) + (offset))) +#define CLEAR_PTR_TAG(ptr) (((uintptr_t)(ptr) & ~Py_TAG_BITS)) +#define GET_MEMBER_NO_TAG(type, obj, offset) (type)(CLEAR_PTR_TAG(*(type*)((char*)(obj) + (offset)))) /* Size macros for opaque buffers */ #define SIZEOF_BYTES_OBJ sizeof(PyBytesObject) @@ -243,6 +245,13 @@ module _remote_debugging * FORWARD DECLARATIONS * ============================================================================ */ +static inline int +is_frame_valid( + RemoteUnwinderObject *unwinder, + uintptr_t frame_addr, + uintptr_t code_object_addr +); + static int parse_tasks_in_set( RemoteUnwinderObject *unwinder, @@ -734,8 +743,7 @@ parse_task_name( return NULL; } - uintptr_t task_name_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name); - task_name_addr &= ~Py_TAG_BITS; + uintptr_t task_name_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name); // The task name can be a long or a string so we need to check the type char task_name_obj[SIZEOF_PYOBJECT]; @@ -798,8 +806,7 @@ static int parse_task_awaited_by( return -1; } - uintptr_t task_ab_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by); - task_ab_addr &= ~Py_TAG_BITS; + uintptr_t task_ab_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by); if ((void*)task_ab_addr == NULL) { return 0; @@ -849,8 +856,7 @@ handle_yield_from_frame( return -1; } - uintptr_t stackpointer_addr = GET_MEMBER(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer); - stackpointer_addr &= ~Py_TAG_BITS; + uintptr_t stackpointer_addr = GET_MEMBER_NO_TAG(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer); if ((void*)stackpointer_addr != NULL) { uintptr_t gi_await_addr; @@ -917,6 +923,11 @@ parse_coro_chain( return -1; } + int8_t frame_state = GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state); + if (frame_state == FRAME_CLEARED) { + return 0; + } + uintptr_t gen_type_addr = GET_MEMBER(uintptr_t, gen_object, unwinder->debug_offsets.pyobject.ob_type); PyObject* name = NULL; @@ -936,7 +947,7 @@ parse_coro_chain( } Py_DECREF(name); - if (GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state) == FRAME_SUSPENDED_YIELD_FROM) { + if (frame_state == FRAME_SUSPENDED_YIELD_FROM) { return handle_yield_from_frame(unwinder, gi_iframe_addr, gen_type_addr, render_to); } @@ -981,8 +992,7 @@ create_task_result( goto error; } - coro_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro); - coro_addr &= ~Py_TAG_BITS; + coro_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro); if ((void*)coro_addr != NULL) { if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) { @@ -1816,10 +1826,10 @@ parse_frame_from_chunks( char *frame = (char *)frame_ptr; *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); - - if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER || - !GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable)) { - return 0; + uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable); + int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object); + if (frame_valid != 1) { + return frame_valid; } uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); @@ -1832,9 +1842,7 @@ parse_frame_from_chunks( } #endif - return parse_code_object( - unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable), - instruction_pointer, previous_frame, tlbc_index); + return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index); } /* ============================================================================ @@ -2077,6 +2085,33 @@ find_running_task_and_coro( * FRAME PARSING FUNCTIONS * ============================================================================ */ +static inline int +is_frame_valid( + RemoteUnwinderObject *unwinder, + uintptr_t frame_addr, + uintptr_t code_object_addr +) { + if ((void*)code_object_addr == NULL) { + return 0; + } + + void* frame = (void*)frame_addr; + + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK || + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { + return 0; // C frame + } + + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR + && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) { + PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner)); + set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame"); + return -1; + } + return 1; +} + static int parse_frame_object( RemoteUnwinderObject *unwinder, @@ -2098,13 +2133,10 @@ parse_frame_object( } *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); - - if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER) { - return 0; - } - - if ((void*)GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable) == NULL) { - return 0; + uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); + int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object); + if (frame_valid != 1) { + return frame_valid; } uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); @@ -2117,9 +2149,7 @@ parse_frame_object( } #endif - return parse_code_object( - unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable), - instruction_pointer, previous_frame, tlbc_index); + return parse_code_object(unwinder, result, code_object,instruction_pointer, previous_frame, tlbc_index); } static int @@ -2144,26 +2174,10 @@ parse_async_frame_object( } *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); - - *code_object = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); - // Strip tag bits for consistent comparison - *code_object &= ~Py_TAG_BITS; - assert(code_object != NULL); - if ((void*)*code_object == NULL) { - return 0; - } - - if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK || - GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { - return 0; // C frame - } - - if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR - && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) { - PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", - GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner)); - set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame"); - return -1; + *code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); + int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, *code_object); + if (frame_valid != 1) { + return frame_valid; } uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); From 076f87468a570c2a5e2e68d06af0a5267eb81713 Mon Sep 17 00:00:00 2001 From: Nadeshiko Manju Date: Mon, 16 Jun 2025 02:49:49 +0800 Subject: [PATCH 570/989] gh-135361: update documentation for `remote_debugger_script` audit event (#135362) --- Doc/library/sys.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 55e442b20ff877..71f9999464ab52 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1933,6 +1933,13 @@ always available. Unless explicitly noted otherwise, all variables are read-only interpreter is pre-release (alpha, beta, or release candidate) then the local and remote interpreters must be the same exact version. + .. audit-event:: remote_debugger_script script_path + + When the script is executed in the remote process, an + :ref:`auditing event ` + ``sys.remote_debugger_script`` is raised + with the path in the remote process. + .. availability:: Unix, Windows. .. versionadded:: 3.14 From 54e29ea4eb7b54c888fd5764eef2215535e4d862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 15 Jun 2025 21:00:58 +0200 Subject: [PATCH 571/989] gh-111178: fix UBSan failures for `RemoteUnwinderObject` (#135539) --- Modules/_remote_debugging_module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 389889a2cf4c6c..c2421cac6bdb17 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -214,6 +214,8 @@ typedef struct { #endif } RemoteUnwinderObject; +#define RemoteUnwinder_CAST(op) ((RemoteUnwinderObject *)(op)) + typedef struct { int lineno; @@ -2913,8 +2915,9 @@ static PyMethodDef RemoteUnwinder_methods[] = { }; static void -RemoteUnwinder_dealloc(RemoteUnwinderObject *self) +RemoteUnwinder_dealloc(PyObject *op) { + RemoteUnwinderObject *self = RemoteUnwinder_CAST(op); PyTypeObject *tp = Py_TYPE(self); if (self->code_object_cache) { _Py_hashtable_destroy(self->code_object_cache); From 60181f4ed0e48ff35dc296da6b51473bfc553d16 Mon Sep 17 00:00:00 2001 From: Dan Lenski Date: Sun, 15 Jun 2025 12:29:38 -0700 Subject: [PATCH 572/989] gh-67022: Document bytes/str inconsistency in email.header.decode_header() and suggest email.headerregistry.HeaderRegistry as a sane alternative (#92900) * gh-67022: Document bytes/str inconsistency in email.header.decode_header() This function's possible return types have been surprising and error-prone for the entirety of its Python 3.x history. It can return either: 1. `typing.List[typing.Tuple[bytes, typing.Optional[str]]]` of length >1 2. or `typing.List[typing.Tuple[str, None]]`, of length exactly 1 This means that any user of this function must be prepared to accept either `bytes` or `str` for the first member of the 2-tuples it returns, which is a very surprising behavior in Python 3.x, particularly given that the second member of the tuple is supposed to represent the charset/encoding of the first member. This patch documents the behavior of this function, and adds test cases to demonstrate it. As discussed in bpo-22833, this cannot be changed in a backwards-compatible way, and some users of this function depend precisely on the existing behavior. Add warnings about obsolescence of 'email.header.decode_header' and 'email.header.make_header' functions. Recommend use of `email.headerregistry.HeaderRegistry` instead, as suggested in https://github.com/python/cpython/pull/92900#discussion_r1112472177 --- Doc/library/email.header.rst | 34 ++++++++++++++++++++++++++----- Lib/email/header.py | 17 ++++++++++++---- Lib/test/test_email/test_email.py | 12 +++++++++++ 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst index 219fad0d2f6745..c3392a62b8ee79 100644 --- a/Doc/library/email.header.rst +++ b/Doc/library/email.header.rst @@ -178,16 +178,36 @@ The :mod:`email.header` module also provides the following convenient functions. Decode a message header value without converting the character set. The header value is in *header*. - This function returns a list of ``(decoded_string, charset)`` pairs containing - each of the decoded parts of the header. *charset* is ``None`` for non-encoded - parts of the header, otherwise a lower case string containing the name of the - character set specified in the encoded string. + For historical reasons, this function may return either: - Here's an example:: + 1. A list of pairs containing each of the decoded parts of the header, + ``(decoded_bytes, charset)``, where *decoded_bytes* is always an instance of + :class:`bytes`, and *charset* is either: + + - A lower case string containing the name of the character set specified. + + - ``None`` for non-encoded parts of the header. + + 2. A list of length 1 containing a pair ``(string, None)``, where + *string* is always an instance of :class:`str`. + + An :exc:`email.errors.HeaderParseError` may be raised when certain decoding + errors occur (e.g. a base64 decoding exception). + + Here are examples: >>> from email.header import decode_header >>> decode_header('=?iso-8859-1?q?p=F6stal?=') [(b'p\xf6stal', 'iso-8859-1')] + >>> decode_header('unencoded_string') + [('unencoded_string', None)] + >>> decode_header('bar =?utf-8?B?ZsOzbw==?=') + [(b'bar ', None), (b'f\xc3\xb3o', 'utf-8')] + + .. note:: + + This function exists for for backwards compatibility only. For + new code, we recommend using :class:`email.headerregistry.HeaderRegistry`. .. function:: make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' ') @@ -203,3 +223,7 @@ The :mod:`email.header` module also provides the following convenient functions. :class:`Header` instance. Optional *maxlinelen*, *header_name*, and *continuation_ws* are as in the :class:`Header` constructor. + .. note:: + + This function exists for for backwards compatibility only, and is + not recommended for use in new code. diff --git a/Lib/email/header.py b/Lib/email/header.py index 113a81f41314ec..220a84a7454b21 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -59,16 +59,22 @@ def decode_header(header): """Decode a message header value without converting charset. - Returns a list of (string, charset) pairs containing each of the decoded - parts of the header. Charset is None for non-encoded parts of the header, - otherwise a lower-case string containing the name of the character set - specified in the encoded string. + For historical reasons, this function may return either: + + 1. A list of length 1 containing a pair (str, None). + 2. A list of (bytes, charset) pairs containing each of the decoded + parts of the header. Charset is None for non-encoded parts of the header, + otherwise a lower-case string containing the name of the character set + specified in the encoded string. header may be a string that may or may not contain RFC2047 encoded words, or it may be a Header object. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). + + This function exists for backwards compatibility only. For new code, we + recommend using email.headerregistry.HeaderRegistry instead. """ # If it is a Header object, we can just return the encoded chunks. if hasattr(header, '_chunks'): @@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, This function takes one of those sequence of pairs and returns a Header instance. Optional maxlinelen, header_name, and continuation_ws are as in the Header constructor. + + This function exists for backwards compatibility only, and is not + recommended for use in new code. """ h = Header(maxlinelen=maxlinelen, header_name=header_name, continuation_ws=continuation_ws) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 8765d121fd0813..b8116d073a2670 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -2568,6 +2568,18 @@ def test_multiline_header(self): self.assertEqual(str(make_header(decode_header(s))), '"Müller T" ') + def test_unencoded_ascii(self): + # bpo-22833/gh-67022: returns [(str, None)] rather than [(bytes, None)] + s = 'header without encoded words' + self.assertEqual(decode_header(s), + [('header without encoded words', None)]) + + def test_unencoded_utf8(self): + # bpo-22833/gh-67022: returns [(str, None)] rather than [(bytes, None)] + s = 'header with unexpected non ASCII caract\xe8res' + self.assertEqual(decode_header(s), + [('header with unexpected non ASCII caract\xe8res', None)]) + # Test the MIMEMessage class class TestMIMEMessage(TestEmailBase): From 667a86e076eeadc0d0e6e0f0da58882ed748865b Mon Sep 17 00:00:00 2001 From: Nadeshiko Manju Date: Mon, 16 Jun 2025 13:25:50 +0800 Subject: [PATCH 573/989] gh-131798: JIT: replace _CHECK_METHOD_VERSION with _CHECK_FUNCTION_VERSION_INLINE (GH-135022) Signed-off-by: Manjusaka Co-authored-by: Brandt Bucher --- Lib/test/test_capi/test_opt.py | 22 +++++++++++++++++++ ...-06-02-20-13-37.gh-issue-131798.JQRFvR.rst | 1 + Python/optimizer_bytecodes.c | 10 +++++++++ Python/optimizer_cases.c.h | 10 +++++++++ 4 files changed, 43 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-20-13-37.gh-issue-131798.JQRFvR.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index bf22ef2a5922e7..3fc2cb33795a5f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1381,6 +1381,21 @@ def testfunc(n): # Removed guard self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops) + def test_method_guards_removed_or_reduced(self): + def testfunc(n): + result = 0 + for i in range(n): + result += test_bound_method(i) + return result + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, sum(range(TIER2_THRESHOLD))) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_PUSH_FRAME", uops) + # Strength reduced version + self.assertIn("_CHECK_FUNCTION_VERSION_INLINE", uops) + self.assertNotIn("_CHECK_METHOD_VERSION", uops) + def test_jit_error_pops(self): """ Tests that the correct number of pops are inserted into the @@ -2294,5 +2309,12 @@ def testfunc(n): def global_identity(x): return x +class TestObject: + def test(self, *args, **kwargs): + return args[0] + +test_object = TestObject() +test_bound_method = TestObject.test.__get__(test_object) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-20-13-37.gh-issue-131798.JQRFvR.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-20-13-37.gh-issue-131798.JQRFvR.rst new file mode 100644 index 00000000000000..0e68c793e5e133 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-20-13-37.gh-issue-131798.JQRFvR.rst @@ -0,0 +1 @@ +Optimize ``_CHECK_METHOD_VERSION`` into ``_CHECK_FUNCTION_VERSION_INLINE`` in JIT-compiled code. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index c2469547d77943..5a9a3a943a7b02 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -672,6 +672,16 @@ dummy_func(void) { sym_set_type(callable, &PyFunction_Type); } + op(_CHECK_METHOD_VERSION, (func_version/2, callable, null, unused[oparg] -- callable, null, unused[oparg])) { + if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyMethod_Type)) { + PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable); + assert(PyMethod_Check(method)); + REPLACE_OP(this_instr, _CHECK_FUNCTION_VERSION_INLINE, 0, func_version); + this_instr->operand1 = (uintptr_t)method->im_func; + } + sym_set_type(callable, &PyMethod_Type); + } + op(_CHECK_FUNCTION_EXACT_ARGS, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { assert(sym_matches_type(callable, &PyFunction_Type)); if (sym_is_const(ctx, callable)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index d9313be0bb0df6..8c4f0399c75a73 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1833,6 +1833,16 @@ } case _CHECK_METHOD_VERSION: { + JitOptSymbol *callable; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)this_instr->operand0; + if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyMethod_Type)) { + PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable); + assert(PyMethod_Check(method)); + REPLACE_OP(this_instr, _CHECK_FUNCTION_VERSION_INLINE, 0, func_version); + this_instr->operand1 = (uintptr_t)method->im_func; + } + sym_set_type(callable, &PyMethod_Type); break; } From b102f091feebe8597d14ac69b9fd982a4cda6c8d Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 16 Jun 2025 07:11:49 -0300 Subject: [PATCH 574/989] gh-120608: fix NEWS entry typo (#135535) --- Misc/NEWS.d/3.14.0a6.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/3.14.0a6.rst b/Misc/NEWS.d/3.14.0a6.rst index bafd8845de6973..d8840b6f283e76 100644 --- a/Misc/NEWS.d/3.14.0a6.rst +++ b/Misc/NEWS.d/3.14.0a6.rst @@ -1325,7 +1325,7 @@ variable. .. nonce: d75n8U .. section: Core and Builtins -Adapt :func:`reversed` for use in the free-theading build. The +Adapt :func:`reversed` for use in the free-threading build. The :func:`reversed` is still not thread-safe in the sense that concurrent iterations may see the same object, but they will not corrupt the interpreter state. From f0799795994bfd9ab0740c4d70ac54270991ba47 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 16 Jun 2025 23:32:52 +0900 Subject: [PATCH 575/989] gh-119132: Remove "experimental" tag from the CPython free-threading. (gh-135550) * gh-119132: Remove "experimental" tag from the CPython free-threading build * Address code review Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> * Add NEWS.d * Regen configure.ac * Update doc * Update * Update * Update * Update Doc/howto/free-threading-python.rst Co-authored-by: Peter Bierma * Update ctypes.rst * Update * Update Doc/howto/free-threading-python.rst Co-authored-by: T. Wouters * Apply suggestions from code review Co-authored-by: T. Wouters Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --------- Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Peter Bierma Co-authored-by: T. Wouters --- Doc/howto/free-threading-extensions.rst | 4 ++-- Doc/howto/free-threading-python.rst | 17 ++++++++++------- Doc/library/ctypes.rst | 2 +- Doc/library/threading.rst | 2 +- Doc/using/configure.rst | 4 ++-- Lib/platform.py | 2 +- ...25-06-16-07-20-28.gh-issue-119132.fcI8s7.rst | 1 + PCbuild/build.bat | 2 +- Python/getversion.c | 2 +- configure | 4 ++-- configure.ac | 2 +- 11 files changed, 23 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-06-16-07-20-28.gh-issue-119132.fcI8s7.rst diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 175bb5dc831848..02b45879ccfaca 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -6,8 +6,8 @@ C API Extension Support for Free Threading ****************************************** -Starting with the 3.13 release, CPython has experimental support for running -with the :term:`global interpreter lock` (GIL) disabled in a configuration +Starting with the 3.13 release, CPython has support for running with +the :term:`global interpreter lock` (GIL) disabled in a configuration called :term:`free threading`. This document describes how to adapt C API extensions to support free threading. diff --git a/Doc/howto/free-threading-python.rst b/Doc/howto/free-threading-python.rst index c33cef2c8e935b..24069617c47ae1 100644 --- a/Doc/howto/free-threading-python.rst +++ b/Doc/howto/free-threading-python.rst @@ -1,18 +1,21 @@ .. _freethreading-python-howto: -********************************************** -Python experimental support for free threading -********************************************** +********************************* +Python support for free threading +********************************* -Starting with the 3.13 release, CPython has experimental support for a build of +Starting with the 3.13 release, CPython has support for a build of Python called :term:`free threading` where the :term:`global interpreter lock` (GIL) is disabled. Free-threaded execution allows for full utilization of the available processing power by running threads in parallel on available CPU cores. While not all software will benefit from this automatically, programs designed with threading in mind will run faster on multi-core hardware. -**The free-threaded mode is experimental** and work is ongoing to improve it: -expect some bugs and a substantial single-threaded performance hit. +The free-threaded mode is working and continues to be improved, but +there is some additional overhead in single-threaded workloads compared +to the regular build. Additionally, third-party packages, in particular ones +with an :term:`extension module`, may not be ready for use in a +free-threaded build, and will re-enable the :term:`GIL`. This document describes the implications of free threading for Python code. See :ref:`freethreading-extensions-howto` for information on @@ -43,7 +46,7 @@ Identifying free-threaded Python ================================ To check if the current interpreter supports free-threading, :option:`python -VV <-V>` -and :data:`sys.version` contain "experimental free-threading build". +and :data:`sys.version` contain "free-threading build". The new :func:`sys._is_gil_enabled` function can be used to check whether the GIL is actually disabled in the running process. diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index 2ee4450698a107..e00fe9c8145f12 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -882,7 +882,7 @@ invalid non-\ ``NULL`` pointers would crash Python):: Thread safety without the GIL ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In Python 3.13, the :term:`GIL` may be disabled on :term:`experimental free threaded ` builds. +From Python 3.13 onward, the :term:`GIL` can be disabled on :term:`free threaded ` builds. In ctypes, reads and writes to a single object concurrently is safe, but not across multiple objects: .. code-block:: pycon diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index 7edcdcabdce1df..52fefd590daf18 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -102,7 +102,7 @@ CPU-bound tasks, as only one thread can execute Python bytecode at a time. Despite this, threads remain a useful tool for achieving concurrency in many scenarios. -As of Python 3.13, experimental :term:`free-threaded ` builds +As of Python 3.13, :term:`free-threaded ` builds can disable the GIL, enabling true parallel execution of threads, but this feature is not available by default (see :pep:`703`). diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 0b7eaf35a1e689..df81a330549147 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -290,8 +290,8 @@ General Options .. option:: --disable-gil - Enables **experimental** support for running Python without the - :term:`global interpreter lock` (GIL): free threading build. + Enables support for running Python without the :term:`global interpreter + lock` (GIL): free threading build. Defines the ``Py_GIL_DISABLED`` macro and adds ``"t"`` to :data:`sys.abiflags`. diff --git a/Lib/platform.py b/Lib/platform.py index 077db81264a57c..e7f180fc5ac3a3 100644 --- a/Lib/platform.py +++ b/Lib/platform.py @@ -1144,7 +1144,7 @@ def _sys_version(sys_version=None): # CPython cpython_sys_version_parser = re.compile( r'([\w.+]+)\s*' # "version" - r'(?:experimental free-threading build\s+)?' # "free-threading-build" + r'(?:free-threading build\s+)?' # "free-threading-build" r'\(#?([^,]+)' # "(#buildno" r'(?:,\s*([\w ]*)' # ", builddate" r'(?:,\s*([\w :]*))?)?\)\s*' # ", buildtime)" diff --git a/Misc/NEWS.d/next/Build/2025-06-16-07-20-28.gh-issue-119132.fcI8s7.rst b/Misc/NEWS.d/next/Build/2025-06-16-07-20-28.gh-issue-119132.fcI8s7.rst new file mode 100644 index 00000000000000..3eb0805b9ce1be --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-06-16-07-20-28.gh-issue-119132.fcI8s7.rst @@ -0,0 +1 @@ +Remove "experimental" tag from the CPython free-threading build. diff --git a/PCbuild/build.bat b/PCbuild/build.bat index 2f358991e484ce..602357048867d6 100644 --- a/PCbuild/build.bat +++ b/PCbuild/build.bat @@ -33,7 +33,7 @@ echo. -k Attempt to kill any running Pythons before building (usually done echo. automatically by the pythoncore project) echo. --pgo Build with Profile-Guided Optimization. This flag echo. overrides -c and -d -echo. --disable-gil Enable experimental support for running without the GIL. +echo. --disable-gil Enable support for running without the GIL. echo. --test-marker Enable the test marker within the build. echo. --regen Regenerate all opcodes, grammar and tokens. echo. --experimental-jit Enable the experimental just-in-time compiler. diff --git a/Python/getversion.c b/Python/getversion.c index 226b2f999a6bfd..8d8bc6ea70048c 100644 --- a/Python/getversion.c +++ b/Python/getversion.c @@ -15,7 +15,7 @@ void _Py_InitVersion(void) } initialized = 1; #ifdef Py_GIL_DISABLED - const char *buildinfo_format = "%.80s experimental free-threading build (%.80s) %.80s"; + const char *buildinfo_format = "%.80s free-threading build (%.80s) %.80s"; #else const char *buildinfo_format = "%.80s (%.80s) %.80s"; #endif diff --git a/configure b/configure index 6c4923e82a886d..2b1482d9264d24 100755 --- a/configure +++ b/configure @@ -1826,8 +1826,8 @@ Optional Features: no) --enable-profiling enable C-level code profiling with gprof (default is no) - --disable-gil enable experimental support for running without the - GIL (default is no) + --disable-gil enable support for running without the GIL (default + is no) --enable-pystats enable internal statistics gathering (default is no) --enable-optimizations enable expensive, stable optimizations (PGO, etc.) (default is no) diff --git a/configure.ac b/configure.ac index 4f935aacef1665..7dbb332f1d30a7 100644 --- a/configure.ac +++ b/configure.ac @@ -1716,7 +1716,7 @@ ABI_THREAD="" # --disable-gil AC_MSG_CHECKING([for --disable-gil]) AC_ARG_ENABLE([gil], - [AS_HELP_STRING([--disable-gil], [enable experimental support for running without the GIL (default is no)])], + [AS_HELP_STRING([--disable-gil], [enable support for running without the GIL (default is no)])], [AS_VAR_IF([enable_gil], [yes], [disable_gil=no], [disable_gil=yes])], [disable_gil=no] ) AC_MSG_RESULT([$disable_gil]) From 4c15505071498439407483004721d0369f110229 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 16 Jun 2025 19:12:20 +0300 Subject: [PATCH 576/989] gh-135513: Fix unused variable warning in `crossinterp.c` (#135514) --- Python/crossinterp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 39c7ea698904bd..0bd267e07d5f2b 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -2619,7 +2619,9 @@ _PyXI_Enter(_PyXI_session *session, PyInterpreterState *interp, PyObject *nsupdates, _PyXI_session_result *result) { - PyThreadState *tstate = _PyThreadState_GET(); +#ifndef NDEBUG + PyThreadState *tstate = _PyThreadState_GET(); // Only used for asserts +#endif // Convert the attrs for cross-interpreter use. _PyXI_namespace *sharedns = NULL; @@ -2661,7 +2663,9 @@ _PyXI_Enter(_PyXI_session *session, _enter_session(session, interp); _PyXI_failure override = XI_FAILURE_INIT; override.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; +#ifndef NDEBUG tstate = _PyThreadState_GET(); +#endif // Ensure this thread owns __main__. if (_PyInterpreterState_SetRunningMain(interp) < 0) { @@ -2697,7 +2701,9 @@ _PyXI_Enter(_PyXI_session *session, // Exit the session. _exit_session(session); +#ifndef NDEBUG tstate = _PyThreadState_GET(); +#endif if (sharedns != NULL) { _destroy_sharedns(sharedns); From d8994b0a77cc9821772d05db00a6ab23382fa17d Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 16 Jun 2025 12:55:20 -0400 Subject: [PATCH 577/989] gh-132617: Fix `dict.update()` mutation check (gh-134815) Use `ma_used` instead of `ma_keys->dk_nentries` for modification check so that we only check if the dictionary is modified, not if new keys are added to a different dictionary that shared the same keys object. --- Lib/test/test_dict.py | 32 +++++++++++++++++++ ...-05-27-20-29-00.gh-issue-132617.EmUfQQ.rst | 3 ++ Objects/dictobject.c | 4 +-- 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-29-00.gh-issue-132617.EmUfQQ.rst diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 52c38e42eca02d..60c62430370e96 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -290,6 +290,38 @@ def badgen(): ['Cannot convert dictionary update sequence element #0 to a sequence'], ) + def test_update_shared_keys(self): + class MyClass: pass + + # Subclass str to enable us to create an object during the + # dict.update() call. + class MyStr(str): + def __hash__(self): + return super().__hash__() + + def __eq__(self, other): + # Create an object that shares the same PyDictKeysObject as + # obj.__dict__. + obj2 = MyClass() + obj2.a = "a" + obj2.b = "b" + obj2.c = "c" + return super().__eq__(other) + + obj = MyClass() + obj.a = "a" + obj.b = "b" + + x = {} + x[MyStr("a")] = MyStr("a") + + # gh-132617: this previously raised "dict mutated during update" error + x.update(obj.__dict__) + + self.assertEqual(x, { + MyStr("a"): "a", + "b": "b", + }) def test_fromkeys(self): self.assertEqual(dict.fromkeys('abc'), {'a':None, 'b':None, 'c':None}) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-29-00.gh-issue-132617.EmUfQQ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-29-00.gh-issue-132617.EmUfQQ.rst new file mode 100644 index 00000000000000..53aef541e64c23 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-29-00.gh-issue-132617.EmUfQQ.rst @@ -0,0 +1,3 @@ +Fix :meth:`dict.update` modification check that could incorrectly raise a +"dict mutated during update" error when a different dictionary was modified +that happens to share the same underlying keys object. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index fd8ccf56324207..72901377cbb3da 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3858,7 +3858,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe } } - Py_ssize_t orig_size = other->ma_keys->dk_nentries; + Py_ssize_t orig_size = other->ma_used; Py_ssize_t pos = 0; Py_hash_t hash; PyObject *key, *value; @@ -3892,7 +3892,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe if (err != 0) return -1; - if (orig_size != other->ma_keys->dk_nentries) { + if (orig_size != other->ma_used) { PyErr_SetString(PyExc_RuntimeError, "dict mutated during update"); return -1; From 21bac3aecd03805ea74ca67a3a291a9eb347588f Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 17 Jun 2025 01:07:25 +0300 Subject: [PATCH 578/989] Use replacements to update versions in "Using Python on macOS" (#130400) --- Doc/conf.py | 4 ++ Doc/using/mac.rst | 93 ++++++++++++++++++++++++----------------------- 2 files changed, 51 insertions(+), 46 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index b08f5452901b15..161c2986441edd 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -79,6 +79,10 @@ rst_epilog = f""" .. |python_version_literal| replace:: ``Python {version}`` .. |python_x_dot_y_literal| replace:: ``python{version}`` +.. |python_x_dot_y_t_literal| replace:: ``python{version}t`` +.. |python_x_dot_y_t_literal_config| replace:: ``python{version}t-config`` +.. |x_dot_y_b2_literal| replace:: ``{version}.0b2`` +.. |applications_python_version_literal| replace:: ``/Applications/Python {version}/`` .. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}`` .. Apparently this how you hack together a formatted link: diff --git a/Doc/using/mac.rst b/Doc/using/mac.rst index 4b6c884f3d4f25..d44f55b5cc9581 100644 --- a/Doc/using/mac.rst +++ b/Doc/using/mac.rst @@ -20,13 +20,6 @@ the Pythons provided by the CPython release team for download from the `python.org website `_. See :ref:`alternative_bundles` for some other options. -.. |usemac_x_dot_y| replace:: 3.13 -.. |usemac_python_x_dot_y_literal| replace:: ``python3.13`` -.. |usemac_python_x_dot_y_t_literal| replace:: ``python3.13t`` -.. |usemac_python_x_dot_y_t_literal_config| replace:: ``python3.13t-config`` -.. |usemac_applications_folder_name| replace:: ``Python 3.13`` -.. |usemac_applications_folder_version| replace:: ``/Applications/Python 3.13/`` - .. _getting-osx: .. _getting-and-installing-macpython: @@ -64,7 +57,7 @@ Clicking on the **Continue** button brings up the **Read Me** for this installer Besides other important information, the **Read Me** documents which Python version is going to be installed and on what versions of macOS it is supported. You may need to scroll through to read the whole file. By default, this **Read Me** will also be -installed in |usemac_applications_folder_version| and available to read anytime. +installed in |applications_python_version_literal| and available to read anytime. .. image:: mac_installer_02_readme.png @@ -97,7 +90,7 @@ When the installation is complete, the **Summary** window will appear. .. image:: mac_installer_06_summary.png Double-click on the :command:`Install Certificates.command` -icon or file in the |usemac_applications_folder_version| window to complete the +icon or file in the |applications_python_version_literal| window to complete the installation. .. image:: mac_installer_07_applications.png @@ -114,7 +107,7 @@ Close this terminal window and the installer window. A default install will include: -* A |usemac_applications_folder_name| folder in your :file:`Applications` folder. In here +* A |python_version_literal| folder in your :file:`Applications` folder. In here you find :program:`IDLE`, the development environment that is a standard part of official Python distributions; and :program:`Python Launcher`, which handles double-clicking Python scripts from the macOS `Finder `_. @@ -141,7 +134,7 @@ How to run a Python script There are two ways to invoke the Python interpreter. If you are familiar with using a Unix shell in a terminal -window, you can invoke |usemac_python_x_dot_y_literal| or ``python3`` optionally +window, you can invoke |python_x_dot_y_literal| or ``python3`` optionally followed by one or more command line options (described in :ref:`using-on-general`). The Python tutorial also has a useful section on :ref:`using Python interactively from a shell `. @@ -160,7 +153,7 @@ for more information. To run a Python script file from the terminal window, you can invoke the interpreter with the name of the script file: - |usemac_python_x_dot_y_literal| ``myscript.py`` + |python_x_dot_y_literal| ``myscript.py`` To run your script from the Finder, you can either: @@ -268,7 +261,7 @@ Installing Free-threaded Binaries The ``python.org`` :ref:`Python for macOS ` installer package can optionally install an additional build of -Python |usemac_x_dot_y| that supports :pep:`703`, the experimental free-threading feature +Python |version| that supports :pep:`703`, the experimental free-threading feature (running with the :term:`global interpreter lock` disabled). Check the release page on ``python.org`` for possible updated information. @@ -282,46 +275,54 @@ step of the installer as described above. If the box next to the **Free-threaded Python** package name is checked, a separate :file:`PythonT.framework` will also be installed alongside the normal :file:`Python.framework` in :file:`/Library/Frameworks`. -This configuration allows a free-threaded Python |usemac_x_dot_y| build to co-exist -on your system with a traditional (GIL only) Python |usemac_x_dot_y| build with +This configuration allows a free-threaded Python |version| build to co-exist +on your system with a traditional (GIL only) Python |version| build with minimal risk while installing or testing. This installation layout is itself experimental and is subject to change in future releases. Known cautions and limitations: - The **UNIX command-line tools** package, which is selected by default, - will install links in :file:`/usr/local/bin` for |usemac_python_x_dot_y_t_literal|, - the free-threaded interpreter, and |usemac_python_x_dot_y_t_literal_config|, + will install links in :file:`/usr/local/bin` for |python_x_dot_y_t_literal|, + the free-threaded interpreter, and |python_x_dot_y_t_literal_config|, a configuration utility which may be useful for package builders. Since :file:`/usr/local/bin` is typically included in your shell ``PATH``, in most cases no changes to your ``PATH`` environment variables should - be needed to use |usemac_python_x_dot_y_t_literal|. + be needed to use |python_x_dot_y_t_literal|. - For this release, the **Shell profile updater** package and the - :file:`Update Shell Profile.command` in |usemac_applications_folder_version| + :file:`Update Shell Profile.command` in |applications_python_version_literal| do not support the free-threaded package. - The free-threaded build and the traditional build have separate search paths and separate :file:`site-packages` directories so, by default, if you need a package available in both builds, it may need to be installed in both. The free-threaded package will install a separate instance of :program:`pip` for use - with |usemac_python_x_dot_y_t_literal|. + with |python_x_dot_y_t_literal|. - To install a package using :command:`pip` without a :command:`venv`: - |usemac_python_x_dot_y_t_literal| ``-m pip install `` + .. parsed-literal:: + + python\ |version|\ t -m pip install - When working with multiple Python environments, it is usually safest and easiest to :ref:`create and use virtual environments `. This can avoid possible command name conflicts and confusion about which Python is in use: - |usemac_python_x_dot_y_t_literal| ``-m venv `` + .. parsed-literal:: + + python\ |version|\ t -m venv + then :command:`activate`. - To run a free-threaded version of IDLE: - |usemac_python_x_dot_y_t_literal| ``-m idlelib`` + .. parsed-literal:: + + python\ |version|\ t -m idlelib + - The interpreters in both builds respond to the same :ref:`PYTHON environment variables ` @@ -337,28 +338,28 @@ Known cautions and limitations: thus it only needs to be run once. - If you cannot depend on the link in ``/usr/local/bin`` pointing to the - ``python.org`` free-threaded |usemac_python_x_dot_y_t_literal| (for example, if you want + ``python.org`` free-threaded |python_x_dot_y_t_literal| (for example, if you want to install your own version there or some other distribution does), you can explicitly set your shell ``PATH`` environment variable to include the ``PythonT`` framework ``bin`` directory: - .. code-block:: sh + .. parsed-literal:: - export PATH="/Library/Frameworks/PythonT.framework/Versions/3.13/bin":"$PATH" + export PATH="/Library/Frameworks/PythonT.framework/Versions/\ |version|\ /bin":"$PATH" The traditional framework installation by default does something similar, except for :file:`Python.framework`. Be aware that having both framework ``bin`` directories in ``PATH`` can lead to confusion if there are duplicate names - like ``python3.13`` in both; which one is actually used depends on the order + like |python_x_dot_y_literal| in both; which one is actually used depends on the order they appear in ``PATH``. The ``which python3.x`` or ``which python3.xt`` commands can show which path is being used. Using virtual environments can help avoid such ambiguities. Another option might be to create a shell :command:`alias` to the desired interpreter, like: - .. code-block:: sh + .. parsed-literal:: - alias py3.13="/Library/Frameworks/Python.framework/Versions/3.13/bin/python3.13" - alias py3.13t="/Library/Frameworks/PythonT.framework/Versions/3.13/bin/python3.13t" + alias py\ |version|\ ="/Library/Frameworks/Python.framework/Versions/\ |version|\ /bin/python\ |version|\ " + alias py\ |version|\ t="/Library/Frameworks/PythonT.framework/Versions/\ |version|\ /bin/python\ |version|\ t" Installing using the command line --------------------------------- @@ -369,22 +370,22 @@ the macOS command line :command:`installer` utility lets you select non-default options, too. If you are not familiar with :command:`installer`, it can be somewhat cryptic (see :command:`man installer` for more information). As an example, the following shell snippet shows one way to do it, -using the ``3.13.0b2`` release and selecting the free-threaded interpreter +using the |x_dot_y_b2_literal| release and selecting the free-threaded interpreter option: -.. code-block:: sh +.. parsed-literal:: - RELEASE="python-3.13.0b2-macos11.pkg" + RELEASE="python-\ |version|\ 0b2-macos11.pkg" # download installer pkg - curl -O https://www.python.org/ftp/python/3.13.0/${RELEASE} + curl -O \https://www.python.org/ftp/python/\ |version|\ .0/${RELEASE} # create installer choicechanges to customize the install: - # enable the PythonTFramework-3.13 package + # enable the PythonTFramework-\ |version|\ package # while accepting the other defaults (install all other packages) cat > ./choicechanges.plist < - + @@ -393,7 +394,7 @@ option: choiceAttribute selected choiceIdentifier - org.python.Python.PythonTFramework-3.13 + org.python.Python.PythonTFramework-\ |version|\ @@ -404,19 +405,19 @@ option: You can then test that both installer builds are now available with something like: -.. code-block:: console +.. parsed-literal:: $ # test that the free-threaded interpreter was installed if the Unix Command Tools package was enabled - $ /usr/local/bin/python3.13t -VV - Python 3.13.0b2 experimental free-threading build (v3.13.0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] + $ /usr/local/bin/python\ |version|\ t -VV + Python \ |version|\ .0b2 experimental free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] $ # and the traditional interpreter - $ /usr/local/bin/python3.13 -VV - Python 3.13.0b2 (v3.13.0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] + $ /usr/local/bin/python\ |version|\ -VV + Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] $ # test that they are also available without the prefix if /usr/local/bin is on $PATH - $ python3.13t -VV - Python 3.13.0b2 experimental free-threading build (v3.13.0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] - $ python3.13 -VV - Python 3.13.0b2 (v3.13.0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] + $ python\ |version|\ t -VV + Python \ |version|\ .0b2 experimental free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] + $ python\ |version|\ -VV + Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] .. note:: From 730133a0acae4d0c340a7f546733af05d4a82fa3 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Mon, 16 Jun 2025 19:03:53 -0400 Subject: [PATCH 579/989] Add Peter Bierma as a codeowner for the object and runtime lifecycle (#135588) --- .github/CODEOWNERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 63a28490043899..7edadff2754e4d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -35,6 +35,7 @@ Objects/type* @markshannon Objects/codeobject.c @markshannon Objects/frameobject.c @markshannon Objects/call.c @markshannon +Objects/object.c @ZeroIntensity Python/ceval*.c @markshannon Python/ceval*.h @markshannon Python/codegen.c @markshannon @iritkatriel @@ -66,8 +67,8 @@ Doc/_static/** @AA-Turner @hugovk Doc/tools/** @AA-Turner @hugovk # runtime state/lifecycle -**/*pylifecycle* @ericsnowcurrently -**/*pystate* @ericsnowcurrently +**/*pylifecycle* @ericsnowcurrently @ZeroIntensity +**/*pystate* @ericsnowcurrently @ZeroIntensity **/*preconfig* @ericsnowcurrently **/*initconfig* @ericsnowcurrently **/*pathconfig* @ericsnowcurrently From 68b7e1a6677d7a8fb47fbd28cb5d39a87217273c Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 16 Jun 2025 16:17:17 -0700 Subject: [PATCH 580/989] gh-128627: Emscripten: Add missing semicolon in ios detection code (#135590) --- Python/emscripten_trampoline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/emscripten_trampoline.c b/Python/emscripten_trampoline.c index 975c28eec104c6..75b98a047234d8 100644 --- a/Python/emscripten_trampoline.c +++ b/Python/emscripten_trampoline.c @@ -80,7 +80,7 @@ function getPyEMCountArgsPtr() { // To differentiate, we check if the platform is 'MacIntel' (common for Macs and newer iPads) // AND if the device has multi-touch capabilities (navigator.maxTouchPoints > 1) (navigator.platform === 'MacIntel' && typeof navigator.maxTouchPoints !== 'undefined' && navigator.maxTouchPoints > 1) - ) + ); if (isIOS) { return 0; } From a450a0ddec7a2813fb4603f0df406fa33750654a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 16 Jun 2025 17:34:19 -0600 Subject: [PATCH 581/989] gh-135443: Sometimes Fall Back to __main__.__dict__ For Globals (gh-135491) For several builtin functions, we now fall back to __main__.__dict__ for the globals when there is no current frame and _PyInterpreterState_IsRunningMain() returns true. This allows those functions to be run with Interpreter.call(). The affected builtins: * exec() * eval() * globals() * locals() * vars() * dir() We take a similar approach with "stateless" functions, which don't use any global variables. --- Include/internal/pycore_ceval.h | 10 ++ Lib/test/test_interpreters/test_api.py | 107 ++++++++++++++++ Objects/object.c | 20 ++- Python/bltinmodule.c | 163 +++++++++++++++++-------- Python/ceval.c | 119 +++++++++++++++++- Python/crossinterp_data_lookup.h | 24 ++-- Python/import.c | 15 ++- 7 files changed, 392 insertions(+), 66 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 239177deb4a948..18623cc8f1c945 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -239,6 +239,16 @@ static inline void _Py_LeaveRecursiveCall(void) { extern _PyInterpreterFrame* _PyEval_GetFrame(void); +extern PyObject * _PyEval_GetGlobalsFromRunningMain(PyThreadState *); +extern int _PyEval_EnsureBuiltins( + PyThreadState *, + PyObject *, + PyObject **p_builtins); +extern int _PyEval_EnsureBuiltinsWithModule( + PyThreadState *, + PyObject *, + PyObject **p_builtins); + PyAPI_FUNC(PyObject *)_Py_MakeCoro(PyFunctionObject *func); /* Handle signals, pending calls, GIL drop request diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 1403cd145b6787..a409c1a691818e 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -1414,6 +1414,113 @@ def test_call_invalid(self): with self.assertRaises(interpreters.NotShareableError): interp.call(func, op, 'eggs!') + def test_callable_requires_frame(self): + # There are various functions that require a current frame. + interp = interpreters.create() + for call, expected in [ + ((eval, '[1, 2, 3]'), + [1, 2, 3]), + ((eval, 'sum([1, 2, 3])'), + 6), + ((exec, '...'), + None), + ]: + with self.subTest(str(call)): + res = interp.call(*call) + self.assertEqual(res, expected) + + result_not_pickleable = [ + globals, + locals, + vars, + ] + for func, expectedtype in { + globals: dict, + locals: dict, + vars: dict, + dir: list, + }.items(): + with self.subTest(str(func)): + if func in result_not_pickleable: + with self.assertRaises(interpreters.NotShareableError): + interp.call(func) + else: + res = interp.call(func) + self.assertIsInstance(res, expectedtype) + self.assertIn('__builtins__', res) + + def test_globals_from_builtins(self): + # The builtins exec(), eval(), globals(), locals(), vars(), + # and dir() each runs relative to the target interpreter's + # __main__ module, when called directly. However, + # globals(), locals(), and vars() don't work when called + # directly so we don't check them. + from _frozen_importlib import BuiltinImporter + interp = interpreters.create() + + names = interp.call(dir) + self.assertEqual(names, [ + '__builtins__', + '__doc__', + '__loader__', + '__name__', + '__package__', + '__spec__', + ]) + + values = {name: interp.call(eval, name) + for name in names if name != '__builtins__'} + self.assertEqual(values, { + '__name__': '__main__', + '__doc__': None, + '__spec__': None, # It wasn't imported, so no module spec? + '__package__': None, + '__loader__': BuiltinImporter, + }) + with self.assertRaises(ExecutionFailed): + interp.call(eval, 'spam'), + + interp.call(exec, f'assert dir() == {names}') + + # Update the interpreter's __main__. + interp.prepare_main(spam=42) + expected = names + ['spam'] + + names = interp.call(dir) + self.assertEqual(names, expected) + + value = interp.call(eval, 'spam') + self.assertEqual(value, 42) + + interp.call(exec, f'assert dir() == {expected}, dir()') + + def test_globals_from_stateless_func(self): + # A stateless func, which doesn't depend on any globals, + # doesn't go through pickle, so it runs in __main__. + def set_global(name, value): + globals()[name] = value + + def get_global(name): + return globals().get(name) + + interp = interpreters.create() + + modname = interp.call(get_global, '__name__') + self.assertEqual(modname, '__main__') + + res = interp.call(get_global, 'spam') + self.assertIsNone(res) + + interp.exec('spam = True') + res = interp.call(get_global, 'spam') + self.assertTrue(res) + + interp.call(set_global, 'spam', 42) + res = interp.call(get_global, 'spam') + self.assertEqual(res, 42) + + interp.exec('assert spam == 42, repr(spam)') + def test_call_in_thread(self): interp = interpreters.create() diff --git a/Objects/object.c b/Objects/object.c index eff3a9862129a2..6a581a19604f9d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2084,9 +2084,25 @@ _dir_locals(void) PyObject *names; PyObject *locals; - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) + if (_PyEval_GetFrame() != NULL) { + locals = _PyEval_GetFrameLocals(); + } + else { + PyThreadState *tstate = _PyThreadState_GET(); + locals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (locals == NULL) { + if (!_PyErr_Occurred(tstate)) { + locals = _PyEval_GetFrameLocals(); + assert(_PyErr_Occurred(tstate)); + } + } + else { + Py_INCREF(locals); + } + } + if (locals == NULL) { return NULL; + } names = PyMapping_Keys(locals); Py_DECREF(locals); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index e08c63924ca16d..51d7297ec243cc 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -957,6 +957,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, PyObject *locals) /*[clinic end generated code: output=0a0824aa70093116 input=7c7bce5299a89062]*/ { + PyThreadState *tstate = _PyThreadState_GET(); PyObject *result = NULL, *source_copy; const char *str; @@ -970,35 +971,46 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, : "globals must be a dict"); return NULL; } - if (globals == Py_None) { + + int fromframe = 0; + if (globals != Py_None) { + Py_INCREF(globals); + } + else if (_PyEval_GetFrame() != NULL) { + fromframe = 1; globals = PyEval_GetGlobals(); - if (locals == Py_None) { - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) - return NULL; - } - else { - Py_INCREF(locals); - } + assert(globals != NULL); + Py_INCREF(globals); } - else if (locals == Py_None) - locals = Py_NewRef(globals); else { - Py_INCREF(locals); + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (!_PyErr_Occurred(tstate)) { + PyErr_SetString(PyExc_TypeError, + "eval must be given globals and locals " + "when called without a frame"); + } + return NULL; + } + Py_INCREF(globals); } - if (globals == NULL || locals == NULL) { - PyErr_SetString(PyExc_TypeError, - "eval must be given globals and locals " - "when called without a frame"); - goto error; + if (locals != Py_None) { + Py_INCREF(locals); } - - int r = PyDict_Contains(globals, &_Py_ID(__builtins__)); - if (r == 0) { - r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins()); + else if (fromframe) { + locals = _PyEval_GetFrameLocals(); + if (locals == NULL) { + assert(PyErr_Occurred()); + Py_DECREF(globals); + return NULL; + } + } + else { + locals = Py_NewRef(globals); } - if (r < 0) { + + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { goto error; } @@ -1039,6 +1051,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, } error: + Py_XDECREF(globals); Py_XDECREF(locals); return result; } @@ -1069,29 +1082,44 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, PyObject *locals, PyObject *closure) /*[clinic end generated code: output=7579eb4e7646743d input=25e989b6d87a3a21]*/ { + PyThreadState *tstate = _PyThreadState_GET(); PyObject *v; - if (globals == Py_None) { + int fromframe = 0; + if (globals != Py_None) { + Py_INCREF(globals); + } + else if (_PyEval_GetFrame() != NULL) { + fromframe = 1; globals = PyEval_GetGlobals(); - if (locals == Py_None) { - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) - return NULL; - } - else { - Py_INCREF(locals); + assert(globals != NULL); + Py_INCREF(globals); + } + else { + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (!_PyErr_Occurred(tstate)) { + PyErr_SetString(PyExc_SystemError, + "globals and locals cannot be NULL"); + } + goto error; } - if (!globals || !locals) { - PyErr_SetString(PyExc_SystemError, - "globals and locals cannot be NULL"); + Py_INCREF(globals); + } + + if (locals != Py_None) { + Py_INCREF(locals); + } + else if (fromframe) { + locals = _PyEval_GetFrameLocals(); + if (locals == NULL) { + assert(PyErr_Occurred()); + Py_DECREF(globals); return NULL; } } - else if (locals == Py_None) { - locals = Py_NewRef(globals); - } else { - Py_INCREF(locals); + locals = Py_NewRef(globals); } if (!PyDict_Check(globals)) { @@ -1105,11 +1133,8 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, Py_TYPE(locals)->tp_name); goto error; } - int r = PyDict_Contains(globals, &_Py_ID(__builtins__)); - if (r == 0) { - r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins()); - } - if (r < 0) { + + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { goto error; } @@ -1186,11 +1211,13 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, } if (v == NULL) goto error; + Py_DECREF(globals); Py_DECREF(locals); Py_DECREF(v); Py_RETURN_NONE; error: + Py_XDECREF(globals); Py_XDECREF(locals); return NULL; } @@ -1240,10 +1267,21 @@ static PyObject * builtin_globals_impl(PyObject *module) /*[clinic end generated code: output=e5dd1527067b94d2 input=9327576f92bb48ba]*/ { - PyObject *d; - - d = PyEval_GetGlobals(); - return Py_XNewRef(d); + PyObject *globals; + if (_PyEval_GetFrame() != NULL) { + globals = PyEval_GetGlobals(); + assert(globals != NULL); + return Py_NewRef(globals); + } + PyThreadState *tstate = _PyThreadState_GET(); + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (_PyErr_Occurred(tstate)) { + return NULL; + } + Py_RETURN_NONE; + } + return Py_NewRef(globals); } @@ -1887,7 +1925,21 @@ static PyObject * builtin_locals_impl(PyObject *module) /*[clinic end generated code: output=b46c94015ce11448 input=7874018d478d5c4b]*/ { - return _PyEval_GetFrameLocals(); + PyObject *locals; + if (_PyEval_GetFrame() != NULL) { + locals = _PyEval_GetFrameLocals(); + assert(locals != NULL || PyErr_Occurred()); + return locals; + } + PyThreadState *tstate = _PyThreadState_GET(); + locals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (locals == NULL) { + if (_PyErr_Occurred(tstate)) { + return NULL; + } + Py_RETURN_NONE; + } + return Py_NewRef(locals); } @@ -2623,7 +2675,22 @@ builtin_vars(PyObject *self, PyObject *args) if (!PyArg_UnpackTuple(args, "vars", 0, 1, &v)) return NULL; if (v == NULL) { - d = _PyEval_GetFrameLocals(); + if (_PyEval_GetFrame() != NULL) { + d = _PyEval_GetFrameLocals(); + } + else { + PyThreadState *tstate = _PyThreadState_GET(); + d = _PyEval_GetGlobalsFromRunningMain(tstate); + if (d == NULL) { + if (!_PyErr_Occurred(tstate)) { + d = _PyEval_GetFrameLocals(); + assert(_PyErr_Occurred(tstate)); + } + } + else { + Py_INCREF(d); + } + } } else { if (PyObject_GetOptionalAttr(v, &_Py_ID(__dict__), &d) == 0) { diff --git a/Python/ceval.c b/Python/ceval.c index 4cfe4bb88f4e48..3da6f61f5acde5 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2746,10 +2746,9 @@ _PyEval_GetFrameLocals(void) return locals; } -PyObject * -PyEval_GetGlobals(void) +static PyObject * +_PyEval_GetGlobals(PyThreadState *tstate) { - PyThreadState *tstate = _PyThreadState_GET(); _PyInterpreterFrame *current_frame = _PyThreadState_GetFrame(tstate); if (current_frame == NULL) { return NULL; @@ -2757,6 +2756,120 @@ PyEval_GetGlobals(void) return current_frame->f_globals; } +PyObject * +PyEval_GetGlobals(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + return _PyEval_GetGlobals(tstate); +} + +PyObject * +_PyEval_GetGlobalsFromRunningMain(PyThreadState *tstate) +{ + if (!_PyInterpreterState_IsRunningMain(tstate->interp)) { + return NULL; + } + PyObject *mod = _Py_GetMainModule(tstate); + if (_Py_CheckMainModule(mod) < 0) { + Py_XDECREF(mod); + return NULL; + } + PyObject *globals = PyModule_GetDict(mod); // borrowed + Py_DECREF(mod); + return globals; +} + +static PyObject * +get_globals_builtins(PyObject *globals) +{ + PyObject *builtins = NULL; + if (PyDict_Check(globals)) { + if (PyDict_GetItemRef(globals, &_Py_ID(__builtins__), &builtins) < 0) { + return NULL; + } + } + else { + if (PyMapping_GetOptionalItem( + globals, &_Py_ID(__builtins__), &builtins) < 0) + { + return NULL; + } + } + return builtins; +} + +static int +set_globals_builtins(PyObject *globals, PyObject *builtins) +{ + if (PyDict_Check(globals)) { + if (PyDict_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) { + return -1; + } + } + else { + if (PyObject_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) { + return -1; + } + } + return 0; +} + +int +_PyEval_EnsureBuiltins(PyThreadState *tstate, PyObject *globals, + PyObject **p_builtins) +{ + PyObject *builtins = get_globals_builtins(globals); + if (builtins == NULL) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + builtins = PyEval_GetBuiltins(); // borrowed + if (builtins == NULL) { + assert(_PyErr_Occurred(tstate)); + return -1; + } + Py_INCREF(builtins); + if (set_globals_builtins(globals, builtins) < 0) { + Py_DECREF(builtins); + return -1; + } + } + if (p_builtins != NULL) { + *p_builtins = builtins; + } + else { + Py_DECREF(builtins); + } + return 0; +} + +int +_PyEval_EnsureBuiltinsWithModule(PyThreadState *tstate, PyObject *globals, + PyObject **p_builtins) +{ + PyObject *builtins = get_globals_builtins(globals); + if (builtins == NULL) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + builtins = PyImport_ImportModuleLevel("builtins", NULL, NULL, NULL, 0); + if (builtins == NULL) { + return -1; + } + if (set_globals_builtins(globals, builtins) < 0) { + Py_DECREF(builtins); + return -1; + } + } + if (p_builtins != NULL) { + *p_builtins = builtins; + } + else { + Py_DECREF(builtins); + } + return 0; +} + PyObject* PyEval_GetFrameLocals(void) { diff --git a/Python/crossinterp_data_lookup.h b/Python/crossinterp_data_lookup.h index 6d0b93eb82ac1e..c3c76ae8d9a289 100644 --- a/Python/crossinterp_data_lookup.h +++ b/Python/crossinterp_data_lookup.h @@ -722,16 +722,26 @@ _PyFunction_FromXIData(_PyXIData_t *xidata) return NULL; } // Create a new function. + // For stateless functions (no globals) we use __main__ as __globals__, + // just like we do for builtins like exec(). assert(PyCode_Check(code)); - PyObject *globals = PyDict_New(); + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *globals = _PyEval_GetGlobalsFromRunningMain(tstate); // borrowed if (globals == NULL) { - Py_DECREF(code); - return NULL; + if (_PyErr_Occurred(tstate)) { + Py_DECREF(code); + return NULL; + } + globals = PyDict_New(); + if (globals == NULL) { + Py_DECREF(code); + return NULL; + } } - PyThreadState *tstate = _PyThreadState_GET(); - if (PyDict_SetItem(globals, &_Py_ID(__builtins__), - tstate->interp->builtins) < 0) - { + else { + Py_INCREF(globals); + } + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { Py_DECREF(code); Py_DECREF(globals); return NULL; diff --git a/Python/import.c b/Python/import.c index 184dede335dfd6..73b94d0dd2a1b1 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3960,25 +3960,28 @@ PyImport_Import(PyObject *module_name) } /* Get the builtins from current globals */ - globals = PyEval_GetGlobals(); + globals = PyEval_GetGlobals(); // borrowed if (globals != NULL) { Py_INCREF(globals); + // XXX Use _PyEval_EnsureBuiltins()? builtins = PyObject_GetItem(globals, &_Py_ID(__builtins__)); if (builtins == NULL) { // XXX Fall back to interp->builtins or sys.modules['builtins']? goto err; } } + else if (_PyErr_Occurred(tstate)) { + goto err; + } else { /* No globals -- use standard builtins, and fake globals */ - builtins = PyImport_ImportModuleLevel("builtins", - NULL, NULL, NULL, 0); - if (builtins == NULL) { + globals = PyDict_New(); + if (globals == NULL) { goto err; } - globals = Py_BuildValue("{OO}", &_Py_ID(__builtins__), builtins); - if (globals == NULL) + if (_PyEval_EnsureBuiltinsWithModule(tstate, globals, &builtins) < 0) { goto err; + } } /* Get the __import__ function from the builtins */ From 7c685894cd9c2c669f09fad31365e3ad6c0c3861 Mon Sep 17 00:00:00 2001 From: Ned Deily Date: Tue, 17 Jun 2025 01:35:59 +0200 Subject: [PATCH 582/989] gh-119132: Update 'Using Python on macOS' documentation. (#135591) Remove `experimental` qualification for free-threading in the document text. Note that images included in the document will be updated later in the release cycle. --- Doc/using/mac.rst | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Doc/using/mac.rst b/Doc/using/mac.rst index d44f55b5cc9581..f88f3c2e0785e4 100644 --- a/Doc/using/mac.rst +++ b/Doc/using/mac.rst @@ -76,7 +76,7 @@ display. For most uses, the standard set of installation operations is appropria By pressing the **Customize** button, you can choose to omit or select certain package components of the installer. Click on each package name to see a description of what it installs. -To also install support for the optional experimental free-threaded feature, +To also install support for the optional free-threaded feature, see :ref:`install-freethreaded-macos`. .. image:: mac_installer_05_custom_install.png @@ -252,20 +252,20 @@ Advanced Topics Installing Free-threaded Binaries --------------------------------- -.. versionadded:: 3.13 (Experimental) - -.. note:: - - Everything described in this section is considered experimental, - and should be expected to change in future releases. +.. versionadded:: 3.13 The ``python.org`` :ref:`Python for macOS ` installer package can optionally install an additional build of -Python |version| that supports :pep:`703`, the experimental free-threading feature +Python |version| that supports :pep:`703`, the free-threading feature (running with the :term:`global interpreter lock` disabled). Check the release page on ``python.org`` for possible updated information. -Because this feature is still considered experimental, the support for it +The free-threaded mode is working and continues to be improved, but +there is some additional overhead in single-threaded workloads compared +to the regular build. Additionally, third-party packages, in particular ones +with an :term:`extension module`, may not be ready for use in a +free-threaded build, and will re-enable the :term:`GIL`. +Therefore, the support for free-threading is not installed by default. It is packaged as a separate install option, available by clicking the **Customize** button on the **Installation Type** step of the installer as described above. @@ -277,8 +277,8 @@ a separate :file:`PythonT.framework` will also be installed alongside the normal :file:`Python.framework` in :file:`/Library/Frameworks`. This configuration allows a free-threaded Python |version| build to co-exist on your system with a traditional (GIL only) Python |version| build with -minimal risk while installing or testing. This installation layout is itself -experimental and is subject to change in future releases. +minimal risk while installing or testing. This installation layout may +change in future releases. Known cautions and limitations: @@ -409,13 +409,13 @@ You can then test that both installer builds are now available with something li $ # test that the free-threaded interpreter was installed if the Unix Command Tools package was enabled $ /usr/local/bin/python\ |version|\ t -VV - Python \ |version|\ .0b2 experimental free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] + Python \ |version|\ .0b2 free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] $ # and the traditional interpreter $ /usr/local/bin/python\ |version|\ -VV Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] $ # test that they are also available without the prefix if /usr/local/bin is on $PATH $ python\ |version|\ t -VV - Python \ |version|\ .0b2 experimental free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] + Python \ |version|\ .0b2 free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] $ python\ |version|\ -VV Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] From 5b3a826888ed3b598dc4bcdc6a48d5f6ae231f8d Mon Sep 17 00:00:00 2001 From: PuQing Date: Tue, 17 Jun 2025 13:06:48 +0800 Subject: [PATCH 583/989] gh-135489: Show verbose output for failing tests during PGO profiling step with `--enable-optimizations` (#135512) --- Lib/test/libregrtest/single.py | 2 +- .../next/Tests/2025-06-14-13-20-17.gh-issue-135489.Uh0yVO.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Tests/2025-06-14-13-20-17.gh-issue-135489.Uh0yVO.rst diff --git a/Lib/test/libregrtest/single.py b/Lib/test/libregrtest/single.py index 57d7b649d2ef63..958a915626ad24 100644 --- a/Lib/test/libregrtest/single.py +++ b/Lib/test/libregrtest/single.py @@ -283,7 +283,7 @@ def _runtest(result: TestResult, runtests: RunTests) -> None: try: setup_tests(runtests) - if output_on_failure: + if output_on_failure or runtests.pgo: support.verbose = True stream = io.StringIO() diff --git a/Misc/NEWS.d/next/Tests/2025-06-14-13-20-17.gh-issue-135489.Uh0yVO.rst b/Misc/NEWS.d/next/Tests/2025-06-14-13-20-17.gh-issue-135489.Uh0yVO.rst new file mode 100644 index 00000000000000..2c9ecc5182935b --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-06-14-13-20-17.gh-issue-135489.Uh0yVO.rst @@ -0,0 +1 @@ +Show verbose output for failing tests during PGO profiling step with --enable-optimizations. From c51f241c97c5bcc8ae6830842db5b00f76d6a592 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 17 Jun 2025 09:02:35 +0300 Subject: [PATCH 584/989] gh-123299: Add PEP 779 to What's New in Python 3.14 (#135555) --- Doc/whatsnew/3.14.rst | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 895446e2721ca5..f0a87a9ada7bab 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -82,9 +82,10 @@ and improvements in user-friendliness and correctness. .. PEP-sized items next. +* :ref:`PEP 779: Free-threaded Python is officially supported ` * :ref:`PEP 649 and 749: deferred evaluation of annotations ` -* :ref:`PEP 734: Multiple Interpreters in the Stdlib ` -* :ref:`PEP 741: Python Configuration C API ` +* :ref:`PEP 734: Multiple interpreters in the stdlib ` +* :ref:`PEP 741: Python configuration C API ` * :ref:`PEP 750: Template strings ` * :ref:`PEP 758: Allow except and except* expressions without parentheses ` * :ref:`PEP 761: Discontinuation of PGP signatures ` @@ -124,9 +125,35 @@ of Python. See :ref:`below ` for details. New features ============ +.. _whatsnew314-pep779: + +PEP 779: Free-threaded Python is officially supported +----------------------------------------------------- + +The free-threaded build of Python is now supported and no longer experimental. +This is the start of phase II where free-threaded Python is officially supported +but still optional. + +We are confident that the project is on the right path, and we appreciate the +continued dedication from everyone working to make free-threading ready for +broader adoption across the Python community. + +With these recommendations and the acceptance of this PEP, we as the Python +developer community should broadly advertise that free-threading is a supported +Python build option now and into the future, and that it will not be removed +without a proper deprecation schedule. + +Any decision to transition to phase III, with free-threading as the default or +sole build of Python is still undecided, and dependent on many factors both +within CPython itself and the community. This decision is for the future. + +.. seealso:: + :pep:`779` and its `acceptance + `__. + .. _whatsnew314-pep734: -PEP 734: Multiple Interpreters in the Stdlib +PEP 734: Multiple interpreters in the stdlib -------------------------------------------- The CPython runtime supports running multiple copies of Python in the @@ -392,7 +419,7 @@ As can be seen, the API is similar to the APIs of the :mod:`!lzma` and :mod:`!bz2` modules. (Contributed by Emma Harper Smith, Adam Turner, Gregory P. Smith, Tomas Roun, -Victor Stinner, and Rogdham in :gh:`132983`) +Victor Stinner, and Rogdham in :gh:`132983`.) .. seealso:: :pep:`784`. @@ -727,7 +754,7 @@ Improved error messages .. _whatsnew314-pep741: -PEP 741: Python Configuration C API +PEP 741: Python configuration C API ----------------------------------- Add a :ref:`PyInitConfig C API ` to configure the Python @@ -1266,7 +1293,7 @@ configparser to :meth:`configparser.ConfigParser.write` keys containing delimiters or beginning with the section header pattern will raise a :class:`configparser.InvalidWriteError`. - (Contributed by Jacob Lincoln in :gh:`129270`) + (Contributed by Jacob Lincoln in :gh:`129270`.) contextvars ----------- @@ -2755,8 +2782,8 @@ New features * Add :c:func:`PyType_GetBaseByToken` and :c:data:`Py_tp_token` slot for easier superclass identification, which attempts to resolve the `type checking issue - `__ mentioned in :pep:`630` - (:gh:`124153`). + `__ mentioned in :pep:`630`. + (Contributed in :gh:`124153`.) * Add :c:func:`PyUnicode_Equal` function to the limited C API: test if two strings are equal. From 0d582def34babca7417ece8a9e4e16cc2a752d44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 17 Jun 2025 10:05:04 +0200 Subject: [PATCH 585/989] gh-134632: Fix `build-details.json` to use `INCLUDEPY` path (#134633) * gh-134632: Fix `build-details.json` to use `INCLUDEPY` path Fix ``build-details.json`` generation to use ``INCLUDEPY``, in order to reference the ``pythonX.Y`` subdirectory of the include directory, as required in :pep:`739`, instead of the top-level include directory. * test_build_details: Add tests for the c_api section * test_build_details: Expect pkgconfig for CPython unconditionally --- Lib/test/test_build_details.py | 7 +++++++ .../Build/2025-05-24-16-59-20.gh-issue-134632.i0W2hc.rst | 3 +++ Tools/build/generate-build-details.py | 4 ++-- 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-05-24-16-59-20.gh-issue-134632.i0W2hc.rst diff --git a/Lib/test/test_build_details.py b/Lib/test/test_build_details.py index 05ce163a337881..33ade161fb5058 100644 --- a/Lib/test/test_build_details.py +++ b/Lib/test/test_build_details.py @@ -123,6 +123,13 @@ def test_base_interpreter(self): self.assertEqual(os.path.realpath(value), os.path.realpath(sys.executable)) + @needs_installed_python + def test_c_api(self): + value = self.key('c_api') + self.assertTrue(os.path.exists(os.path.join(value['headers'], 'Python.h'))) + version = sysconfig.get_config_var('VERSION') + self.assertTrue(os.path.exists(os.path.join(value['pkgconfig_path'], f'python-{version}.pc'))) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Build/2025-05-24-16-59-20.gh-issue-134632.i0W2hc.rst b/Misc/NEWS.d/next/Build/2025-05-24-16-59-20.gh-issue-134632.i0W2hc.rst new file mode 100644 index 00000000000000..f41c8744b8aaa0 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-05-24-16-59-20.gh-issue-134632.i0W2hc.rst @@ -0,0 +1,3 @@ +Fixed ``build-details.json`` generation to use ``INCLUDEPY``, in order to +reference the ``pythonX.Y`` subdirectory of the include directory, as +required in :pep:`739`, instead of the top-level include directory. diff --git a/Tools/build/generate-build-details.py b/Tools/build/generate-build-details.py index 87e262065ec87b..8cd23e2f54f529 100644 --- a/Tools/build/generate-build-details.py +++ b/Tools/build/generate-build-details.py @@ -75,7 +75,7 @@ def generate_data(schema_version: str) -> collections.defaultdict[str, Any]: PY3LIBRARY = sysconfig.get_config_var('PY3LIBRARY') LIBPYTHON = sysconfig.get_config_var('LIBPYTHON') LIBPC = sysconfig.get_config_var('LIBPC') - INCLUDEDIR = sysconfig.get_config_var('INCLUDEDIR') + INCLUDEPY = sysconfig.get_config_var('INCLUDEPY') if os.name == 'posix': # On POSIX, LIBRARY is always the static library, while LDLIBRARY is the @@ -123,7 +123,7 @@ def generate_data(schema_version: str) -> collections.defaultdict[str, Any]: if has_static_library: data['libpython']['static'] = os.path.join(LIBDIR, LIBRARY) - data['c_api']['headers'] = INCLUDEDIR + data['c_api']['headers'] = INCLUDEPY if LIBPC: data['c_api']['pkgconfig_path'] = LIBPC From acc20a83f4d93682e91c4992c785eaf7e3d0c69c Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Tue, 17 Jun 2025 07:51:23 -0400 Subject: [PATCH 586/989] gh-134262: Catch both URLError and ConnectionError in retries (#135365) --- PCbuild/get_external.py | 7 ++++--- Tools/build/generate_sbom.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/PCbuild/get_external.py b/PCbuild/get_external.py index 8c1155c74a642c..a78aa6a23041ad 100755 --- a/PCbuild/get_external.py +++ b/PCbuild/get_external.py @@ -5,8 +5,9 @@ import pathlib import sys import time +import urllib.error +import urllib.request import zipfile -from urllib.request import urlretrieve def retrieve_with_retries(download_location, output_path, reporthook, @@ -14,12 +15,12 @@ def retrieve_with_retries(download_location, output_path, reporthook, """Download a file with exponential backoff retry and save to disk.""" for attempt in range(max_retries + 1): try: - resp = urlretrieve( + resp = urllib.request.urlretrieve( download_location, output_path, reporthook=reporthook, ) - except ConnectionError as ex: + except (urllib.error.URLError, ConnectionError) as ex: if attempt == max_retries: msg = f"Download from {download_location} failed." raise OSError(msg) from ex diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index df52f8de762a01..968397728b2f67 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -172,7 +172,7 @@ def download_with_retries(download_location: str, for attempt in range(max_retries + 1): try: resp = urllib.request.urlopen(download_location) - except urllib.error.URLError as ex: + except (urllib.error.URLError, ConnectionError) as ex: if attempt == max_retries: msg = f"Download from {download_location} failed." raise OSError(msg) from ex From a9e66a7c506680263b39bc8c150ddc5e72213c45 Mon Sep 17 00:00:00 2001 From: PuQing Date: Tue, 17 Jun 2025 20:11:09 +0800 Subject: [PATCH 587/989] gh-132815: Add support for JUMP_BACKWARD in specialization stats (#135606) --- .../next/Tests/2025-06-17-08-48-08.gh-issue-132815.CY1Esu.rst | 1 + Python/specialize.c | 1 + 2 files changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Tests/2025-06-17-08-48-08.gh-issue-132815.CY1Esu.rst diff --git a/Misc/NEWS.d/next/Tests/2025-06-17-08-48-08.gh-issue-132815.CY1Esu.rst b/Misc/NEWS.d/next/Tests/2025-06-17-08-48-08.gh-issue-132815.CY1Esu.rst new file mode 100644 index 00000000000000..5b7485ce2d6a9f --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-06-17-08-48-08.gh-issue-132815.CY1Esu.rst @@ -0,0 +1 @@ +Fix test__opcode: add ``JUMP_BACKWARD`` to specialization stats. diff --git a/Python/specialize.c b/Python/specialize.c index 92f79d39d55208..fe8d04cf3442f1 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -118,6 +118,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, LOAD_GLOBAL, "load_global"); err += add_stat_dict(stats, STORE_SUBSCR, "store_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); + err += add_stat_dict(stats, JUMP_BACKWARD, "jump_backward"); err += add_stat_dict(stats, CALL, "call"); err += add_stat_dict(stats, CALL_KW, "call_kw"); err += add_stat_dict(stats, BINARY_OP, "binary_op"); From 8dd8b5c2f0785675b9282b719256341448d49967 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 17 Jun 2025 13:43:09 +0100 Subject: [PATCH 588/989] GH-135379: Support limited scalar replacement for replicated uops in the JIT code generator. (GH-135563) * Use it to support efficient specializations of COPY and SWAP in the JIT. --- Include/internal/pycore_uop_ids.h | 347 +++++++++--------- Include/internal/pycore_uop_metadata.h | 37 +- Python/bytecodes.c | 6 +- Python/executor_cases.c.h | 61 ++- Python/generated_cases.c.h | 2 - Python/optimizer.c | 4 +- Tools/cases_generator/analyzer.py | 38 +- Tools/cases_generator/parsing.py | 8 +- .../cases_generator/uop_metadata_generator.py | 8 +- 9 files changed, 313 insertions(+), 198 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 2b845527cf2ed5..8211c5d056535e 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -86,86 +86,89 @@ extern "C" { #define _CONTAINS_OP_DICT 362 #define _CONTAINS_OP_SET 363 #define _CONVERT_VALUE CONVERT_VALUE -#define _COPY COPY +#define _COPY 364 +#define _COPY_1 365 +#define _COPY_2 366 +#define _COPY_3 367 #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 364 +#define _CREATE_INIT_FRAME 368 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 365 +#define _DEOPT 369 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 366 -#define _DO_CALL_FUNCTION_EX 367 -#define _DO_CALL_KW 368 +#define _DO_CALL 370 +#define _DO_CALL_FUNCTION_EX 371 +#define _DO_CALL_KW 372 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 369 +#define _ERROR_POP_N 373 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 370 -#define _EXPAND_METHOD_KW 371 -#define _FATAL_ERROR 372 +#define _EXPAND_METHOD 374 +#define _EXPAND_METHOD_KW 375 +#define _FATAL_ERROR 376 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 373 -#define _FOR_ITER_GEN_FRAME 374 -#define _FOR_ITER_TIER_TWO 375 +#define _FOR_ITER 377 +#define _FOR_ITER_GEN_FRAME 378 +#define _FOR_ITER_TIER_TWO 379 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 376 -#define _GUARD_CALLABLE_ISINSTANCE 377 -#define _GUARD_CALLABLE_LEN 378 -#define _GUARD_CALLABLE_LIST_APPEND 379 -#define _GUARD_CALLABLE_STR_1 380 -#define _GUARD_CALLABLE_TUPLE_1 381 -#define _GUARD_CALLABLE_TYPE_1 382 -#define _GUARD_DORV_NO_DICT 383 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 384 -#define _GUARD_GLOBALS_VERSION 385 -#define _GUARD_IS_FALSE_POP 386 -#define _GUARD_IS_NONE_POP 387 -#define _GUARD_IS_NOT_NONE_POP 388 -#define _GUARD_IS_TRUE_POP 389 -#define _GUARD_KEYS_VERSION 390 -#define _GUARD_NOS_DICT 391 -#define _GUARD_NOS_FLOAT 392 -#define _GUARD_NOS_INT 393 -#define _GUARD_NOS_LIST 394 -#define _GUARD_NOS_NOT_NULL 395 -#define _GUARD_NOS_NULL 396 -#define _GUARD_NOS_TUPLE 397 -#define _GUARD_NOS_UNICODE 398 -#define _GUARD_NOT_EXHAUSTED_LIST 399 -#define _GUARD_NOT_EXHAUSTED_RANGE 400 -#define _GUARD_NOT_EXHAUSTED_TUPLE 401 -#define _GUARD_THIRD_NULL 402 -#define _GUARD_TOS_ANY_SET 403 -#define _GUARD_TOS_DICT 404 -#define _GUARD_TOS_FLOAT 405 -#define _GUARD_TOS_INT 406 -#define _GUARD_TOS_LIST 407 -#define _GUARD_TOS_SLICE 408 -#define _GUARD_TOS_TUPLE 409 -#define _GUARD_TOS_UNICODE 410 -#define _GUARD_TYPE_VERSION 411 -#define _GUARD_TYPE_VERSION_AND_LOCK 412 +#define _GUARD_BINARY_OP_EXTEND 380 +#define _GUARD_CALLABLE_ISINSTANCE 381 +#define _GUARD_CALLABLE_LEN 382 +#define _GUARD_CALLABLE_LIST_APPEND 383 +#define _GUARD_CALLABLE_STR_1 384 +#define _GUARD_CALLABLE_TUPLE_1 385 +#define _GUARD_CALLABLE_TYPE_1 386 +#define _GUARD_DORV_NO_DICT 387 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 388 +#define _GUARD_GLOBALS_VERSION 389 +#define _GUARD_IS_FALSE_POP 390 +#define _GUARD_IS_NONE_POP 391 +#define _GUARD_IS_NOT_NONE_POP 392 +#define _GUARD_IS_TRUE_POP 393 +#define _GUARD_KEYS_VERSION 394 +#define _GUARD_NOS_DICT 395 +#define _GUARD_NOS_FLOAT 396 +#define _GUARD_NOS_INT 397 +#define _GUARD_NOS_LIST 398 +#define _GUARD_NOS_NOT_NULL 399 +#define _GUARD_NOS_NULL 400 +#define _GUARD_NOS_TUPLE 401 +#define _GUARD_NOS_UNICODE 402 +#define _GUARD_NOT_EXHAUSTED_LIST 403 +#define _GUARD_NOT_EXHAUSTED_RANGE 404 +#define _GUARD_NOT_EXHAUSTED_TUPLE 405 +#define _GUARD_THIRD_NULL 406 +#define _GUARD_TOS_ANY_SET 407 +#define _GUARD_TOS_DICT 408 +#define _GUARD_TOS_FLOAT 409 +#define _GUARD_TOS_INT 410 +#define _GUARD_TOS_LIST 411 +#define _GUARD_TOS_SLICE 412 +#define _GUARD_TOS_TUPLE 413 +#define _GUARD_TOS_UNICODE 414 +#define _GUARD_TYPE_VERSION 415 +#define _GUARD_TYPE_VERSION_AND_LOCK 416 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 413 -#define _INIT_CALL_PY_EXACT_ARGS 414 -#define _INIT_CALL_PY_EXACT_ARGS_0 415 -#define _INIT_CALL_PY_EXACT_ARGS_1 416 -#define _INIT_CALL_PY_EXACT_ARGS_2 417 -#define _INIT_CALL_PY_EXACT_ARGS_3 418 -#define _INIT_CALL_PY_EXACT_ARGS_4 419 -#define _INSERT_NULL 420 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 417 +#define _INIT_CALL_PY_EXACT_ARGS 418 +#define _INIT_CALL_PY_EXACT_ARGS_0 419 +#define _INIT_CALL_PY_EXACT_ARGS_1 420 +#define _INIT_CALL_PY_EXACT_ARGS_2 421 +#define _INIT_CALL_PY_EXACT_ARGS_3 422 +#define _INIT_CALL_PY_EXACT_ARGS_4 423 +#define _INSERT_NULL 424 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -175,171 +178,173 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 421 +#define _IS_NONE 425 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 422 -#define _ITER_CHECK_RANGE 423 -#define _ITER_CHECK_TUPLE 424 -#define _ITER_JUMP_LIST 425 -#define _ITER_JUMP_RANGE 426 -#define _ITER_JUMP_TUPLE 427 -#define _ITER_NEXT_LIST 428 -#define _ITER_NEXT_LIST_TIER_TWO 429 -#define _ITER_NEXT_RANGE 430 -#define _ITER_NEXT_TUPLE 431 -#define _JUMP_TO_TOP 432 +#define _ITER_CHECK_LIST 426 +#define _ITER_CHECK_RANGE 427 +#define _ITER_CHECK_TUPLE 428 +#define _ITER_JUMP_LIST 429 +#define _ITER_JUMP_RANGE 430 +#define _ITER_JUMP_TUPLE 431 +#define _ITER_NEXT_LIST 432 +#define _ITER_NEXT_LIST_TIER_TWO 433 +#define _ITER_NEXT_RANGE 434 +#define _ITER_NEXT_TUPLE 435 +#define _JUMP_TO_TOP 436 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 433 -#define _LOAD_ATTR_CLASS 434 +#define _LOAD_ATTR 437 +#define _LOAD_ATTR_CLASS 438 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 435 -#define _LOAD_ATTR_METHOD_LAZY_DICT 436 -#define _LOAD_ATTR_METHOD_NO_DICT 437 -#define _LOAD_ATTR_METHOD_WITH_VALUES 438 -#define _LOAD_ATTR_MODULE 439 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 440 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 441 -#define _LOAD_ATTR_PROPERTY_FRAME 442 -#define _LOAD_ATTR_SLOT 443 -#define _LOAD_ATTR_WITH_HINT 444 +#define _LOAD_ATTR_INSTANCE_VALUE 439 +#define _LOAD_ATTR_METHOD_LAZY_DICT 440 +#define _LOAD_ATTR_METHOD_NO_DICT 441 +#define _LOAD_ATTR_METHOD_WITH_VALUES 442 +#define _LOAD_ATTR_MODULE 443 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 444 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 445 +#define _LOAD_ATTR_PROPERTY_FRAME 446 +#define _LOAD_ATTR_SLOT 447 +#define _LOAD_ATTR_WITH_HINT 448 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 445 +#define _LOAD_BYTECODE 449 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 446 -#define _LOAD_CONST_INLINE_BORROW 447 -#define _LOAD_CONST_UNDER_INLINE 448 -#define _LOAD_CONST_UNDER_INLINE_BORROW 449 +#define _LOAD_CONST_INLINE 450 +#define _LOAD_CONST_INLINE_BORROW 451 +#define _LOAD_CONST_UNDER_INLINE 452 +#define _LOAD_CONST_UNDER_INLINE_BORROW 453 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 450 -#define _LOAD_FAST_0 451 -#define _LOAD_FAST_1 452 -#define _LOAD_FAST_2 453 -#define _LOAD_FAST_3 454 -#define _LOAD_FAST_4 455 -#define _LOAD_FAST_5 456 -#define _LOAD_FAST_6 457 -#define _LOAD_FAST_7 458 +#define _LOAD_FAST 454 +#define _LOAD_FAST_0 455 +#define _LOAD_FAST_1 456 +#define _LOAD_FAST_2 457 +#define _LOAD_FAST_3 458 +#define _LOAD_FAST_4 459 +#define _LOAD_FAST_5 460 +#define _LOAD_FAST_6 461 +#define _LOAD_FAST_7 462 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 459 -#define _LOAD_FAST_BORROW_0 460 -#define _LOAD_FAST_BORROW_1 461 -#define _LOAD_FAST_BORROW_2 462 -#define _LOAD_FAST_BORROW_3 463 -#define _LOAD_FAST_BORROW_4 464 -#define _LOAD_FAST_BORROW_5 465 -#define _LOAD_FAST_BORROW_6 466 -#define _LOAD_FAST_BORROW_7 467 +#define _LOAD_FAST_BORROW 463 +#define _LOAD_FAST_BORROW_0 464 +#define _LOAD_FAST_BORROW_1 465 +#define _LOAD_FAST_BORROW_2 466 +#define _LOAD_FAST_BORROW_3 467 +#define _LOAD_FAST_BORROW_4 468 +#define _LOAD_FAST_BORROW_5 469 +#define _LOAD_FAST_BORROW_6 470 +#define _LOAD_FAST_BORROW_7 471 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 468 -#define _LOAD_GLOBAL_BUILTINS 469 -#define _LOAD_GLOBAL_MODULE 470 +#define _LOAD_GLOBAL 472 +#define _LOAD_GLOBAL_BUILTINS 473 +#define _LOAD_GLOBAL_MODULE 474 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 471 -#define _LOAD_SMALL_INT_0 472 -#define _LOAD_SMALL_INT_1 473 -#define _LOAD_SMALL_INT_2 474 -#define _LOAD_SMALL_INT_3 475 -#define _LOAD_SPECIAL 476 +#define _LOAD_SMALL_INT 475 +#define _LOAD_SMALL_INT_0 476 +#define _LOAD_SMALL_INT_1 477 +#define _LOAD_SMALL_INT_2 478 +#define _LOAD_SMALL_INT_3 479 +#define _LOAD_SPECIAL 480 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 477 +#define _MAKE_CALLARGS_A_TUPLE 481 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 478 +#define _MAKE_WARM 482 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 479 -#define _MAYBE_EXPAND_METHOD_KW 480 -#define _MONITOR_CALL 481 -#define _MONITOR_CALL_KW 482 -#define _MONITOR_JUMP_BACKWARD 483 -#define _MONITOR_RESUME 484 +#define _MAYBE_EXPAND_METHOD 483 +#define _MAYBE_EXPAND_METHOD_KW 484 +#define _MONITOR_CALL 485 +#define _MONITOR_CALL_KW 486 +#define _MONITOR_JUMP_BACKWARD 487 +#define _MONITOR_RESUME 488 #define _NOP NOP -#define _POP_CALL 485 -#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486 -#define _POP_CALL_ONE 487 -#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488 -#define _POP_CALL_TWO 489 -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490 +#define _POP_CALL 489 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 490 +#define _POP_CALL_ONE 491 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 492 +#define _POP_CALL_TWO 493 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 494 #define _POP_EXCEPT POP_EXCEPT #define _POP_ITER POP_ITER -#define _POP_JUMP_IF_FALSE 491 -#define _POP_JUMP_IF_TRUE 492 +#define _POP_JUMP_IF_FALSE 495 +#define _POP_JUMP_IF_TRUE 496 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 493 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494 -#define _POP_TWO 495 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496 +#define _POP_TOP_LOAD_CONST_INLINE 497 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 498 +#define _POP_TWO 499 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 500 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 497 +#define _PUSH_FRAME 501 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 498 -#define _PY_FRAME_GENERAL 499 -#define _PY_FRAME_KW 500 -#define _QUICKEN_RESUME 501 -#define _REPLACE_WITH_TRUE 502 +#define _PUSH_NULL_CONDITIONAL 502 +#define _PY_FRAME_GENERAL 503 +#define _PY_FRAME_KW 504 +#define _QUICKEN_RESUME 505 +#define _REPLACE_WITH_TRUE 506 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 503 -#define _SEND 504 -#define _SEND_GEN_FRAME 505 +#define _SAVE_RETURN_OFFSET 507 +#define _SEND 508 +#define _SEND_GEN_FRAME 509 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 506 -#define _STORE_ATTR 507 -#define _STORE_ATTR_INSTANCE_VALUE 508 -#define _STORE_ATTR_SLOT 509 -#define _STORE_ATTR_WITH_HINT 510 +#define _START_EXECUTOR 510 +#define _STORE_ATTR 511 +#define _STORE_ATTR_INSTANCE_VALUE 512 +#define _STORE_ATTR_SLOT 513 +#define _STORE_ATTR_WITH_HINT 514 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 511 -#define _STORE_FAST_0 512 -#define _STORE_FAST_1 513 -#define _STORE_FAST_2 514 -#define _STORE_FAST_3 515 -#define _STORE_FAST_4 516 -#define _STORE_FAST_5 517 -#define _STORE_FAST_6 518 -#define _STORE_FAST_7 519 +#define _STORE_FAST 515 +#define _STORE_FAST_0 516 +#define _STORE_FAST_1 517 +#define _STORE_FAST_2 518 +#define _STORE_FAST_3 519 +#define _STORE_FAST_4 520 +#define _STORE_FAST_5 521 +#define _STORE_FAST_6 522 +#define _STORE_FAST_7 523 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 520 -#define _STORE_SUBSCR 521 -#define _STORE_SUBSCR_DICT 522 -#define _STORE_SUBSCR_LIST_INT 523 -#define _SWAP SWAP -#define _TIER2_RESUME_CHECK 524 -#define _TO_BOOL 525 +#define _STORE_SLICE 524 +#define _STORE_SUBSCR 525 +#define _STORE_SUBSCR_DICT 526 +#define _STORE_SUBSCR_LIST_INT 527 +#define _SWAP 528 +#define _SWAP_2 529 +#define _SWAP_3 530 +#define _TIER2_RESUME_CHECK 531 +#define _TO_BOOL 532 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 526 +#define _TO_BOOL_LIST 533 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 527 +#define _TO_BOOL_STR 534 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 528 -#define _UNPACK_SEQUENCE_LIST 529 -#define _UNPACK_SEQUENCE_TUPLE 530 -#define _UNPACK_SEQUENCE_TWO_TUPLE 531 +#define _UNPACK_SEQUENCE 535 +#define _UNPACK_SEQUENCE_LIST 536 +#define _UNPACK_SEQUENCE_TUPLE 537 +#define _UNPACK_SEQUENCE_TWO_TUPLE 538 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 531 +#define MAX_UOP_ID 538 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index cd36023c25cbb4..fad87d4b586e64 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -12,7 +12,8 @@ extern "C" { #include #include "pycore_uop_ids.h" extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1]; -extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1]; +typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange; +extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1]; extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; extern int _PyUop_num_popped(int opcode, int oparg); @@ -288,8 +289,13 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_COPY_1] = HAS_PURE_FLAG, + [_COPY_2] = HAS_PURE_FLAG, + [_COPY_3] = HAS_PURE_FLAG, [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_SWAP_2] = HAS_PURE_FLAG, + [_SWAP_3] = HAS_PURE_FLAG, [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG, @@ -323,12 +329,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG, }; -const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { - [_LOAD_FAST] = 8, - [_LOAD_FAST_BORROW] = 8, - [_LOAD_SMALL_INT] = 4, - [_STORE_FAST] = 8, - [_INIT_CALL_PY_EXACT_ARGS] = 5, +const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = { + [_LOAD_FAST] = { 0, 8 }, + [_LOAD_FAST_BORROW] = { 0, 8 }, + [_LOAD_SMALL_INT] = { 0, 4 }, + [_STORE_FAST] = { 0, 8 }, + [_INIT_CALL_PY_EXACT_ARGS] = { 0, 5 }, + [_COPY] = { 1, 4 }, + [_SWAP] = { 2, 4 }, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { @@ -408,6 +416,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CONTAINS_OP_SET] = "_CONTAINS_OP_SET", [_CONVERT_VALUE] = "_CONVERT_VALUE", [_COPY] = "_COPY", + [_COPY_1] = "_COPY_1", + [_COPY_2] = "_COPY_2", + [_COPY_3] = "_COPY_3", [_COPY_FREE_VARS] = "_COPY_FREE_VARS", [_CREATE_INIT_FRAME] = "_CREATE_INIT_FRAME", [_DELETE_ATTR] = "_DELETE_ATTR", @@ -617,6 +628,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT", [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT", [_SWAP] = "_SWAP", + [_SWAP_2] = "_SWAP_2", + [_SWAP_3] = "_SWAP_3", [_TIER2_RESUME_CHECK] = "_TIER2_RESUME_CHECK", [_TO_BOOL] = "_TO_BOOL", [_TO_BOOL_BOOL] = "_TO_BOOL_BOOL", @@ -1176,10 +1189,20 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _FORMAT_WITH_SPEC: return 2; + case _COPY_1: + return 0; + case _COPY_2: + return 0; + case _COPY_3: + return 0; case _COPY: return 0; case _BINARY_OP: return 2; + case _SWAP_2: + return 0; + case _SWAP_3: + return 0; case _SWAP: return 0; case _GUARD_IS_TRUE_POP: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 971e97a5784692..27a04766cc8dd8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4946,8 +4946,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { - assert(oparg > 0); + pure replicate(1:4) inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { top = PyStackRef_DUP(bottom); } @@ -4980,12 +4979,11 @@ dummy_func( macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP; - pure inst(SWAP, (bottom, unused[oparg-2], top -- + pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { _PyStackRef temp = bottom; bottom = top; top = temp; - assert(oparg >= 2); } inst(INSTRUMENTED_LINE, ( -- )) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index dbfb2391bf0623..74c78e4d1f5a69 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -6763,12 +6763,44 @@ break; } + case _COPY_1: { + _PyStackRef bottom; + _PyStackRef top; + bottom = stack_pointer[-1]; + top = PyStackRef_DUP(bottom); + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COPY_2: { + _PyStackRef bottom; + _PyStackRef top; + bottom = stack_pointer[-2]; + top = PyStackRef_DUP(bottom); + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COPY_3: { + _PyStackRef bottom; + _PyStackRef top; + bottom = stack_pointer[-3]; + top = PyStackRef_DUP(bottom); + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _COPY: { _PyStackRef bottom; _PyStackRef top; oparg = CURRENT_OPARG(); bottom = stack_pointer[-1 - (oparg-1)]; - assert(oparg > 0); top = PyStackRef_DUP(bottom); stack_pointer[0] = top; stack_pointer += 1; @@ -6808,6 +6840,32 @@ break; } + case _SWAP_2: { + _PyStackRef top; + _PyStackRef bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-2]; + _PyStackRef temp = bottom; + bottom = top; + top = temp; + stack_pointer[-2] = bottom; + stack_pointer[-1] = top; + break; + } + + case _SWAP_3: { + _PyStackRef top; + _PyStackRef bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-3]; + _PyStackRef temp = bottom; + bottom = top; + top = temp; + stack_pointer[-3] = bottom; + stack_pointer[-1] = top; + break; + } + case _SWAP: { _PyStackRef top; _PyStackRef bottom; @@ -6817,7 +6875,6 @@ _PyStackRef temp = bottom; bottom = top; top = temp; - assert(oparg >= 2); stack_pointer[-2 - (oparg-2)] = bottom; stack_pointer[-1] = top; break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2cf027c539b992..4fc1d5266d0a87 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5228,7 +5228,6 @@ _PyStackRef bottom; _PyStackRef top; bottom = stack_pointer[-1 - (oparg-1)]; - assert(oparg > 0); top = PyStackRef_DUP(bottom); stack_pointer[0] = top; stack_pointer += 1; @@ -11568,7 +11567,6 @@ _PyStackRef temp = bottom; bottom = top; top = temp; - assert(oparg >= 2); stack_pointer[-2 - (oparg-2)] = bottom; stack_pointer[-1] = top; DISPATCH(); diff --git a/Python/optimizer.c b/Python/optimizer.c index dde3dd8ebe745a..8d01d605ef4a2a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1292,8 +1292,8 @@ uop_optimize( for (int pc = 0; pc < length; pc++) { int opcode = buffer[pc].opcode; int oparg = buffer[pc].oparg; - if (oparg < _PyUop_Replication[opcode]) { - buffer[pc].opcode = opcode + oparg + 1; + if (oparg < _PyUop_Replication[opcode].stop && oparg >= _PyUop_Replication[opcode].start) { + buffer[pc].opcode = opcode + oparg + 1 - _PyUop_Replication[opcode].start; assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0); } else if (is_terminator(&buffer[pc])) { diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index ca6d0301f3572d..c6a9fbcad8891f 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -180,7 +180,7 @@ class Uop: properties: Properties _size: int = -1 implicitly_created: bool = False - replicated = 0 + replicated = range(0) replicates: "Uop | None" = None # Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro instruction_size: int | None = None @@ -868,6 +868,28 @@ def compute_properties(op: parser.CodeDef) -> Properties: needs_prev=variable_used(op, "prev_instr"), ) +def expand(items: list[StackItem], oparg: int) -> list[StackItem]: + # Only replace array item with scalar if no more than one item is an array + index = -1 + for i, item in enumerate(items): + if "oparg" in item.size: + if index >= 0: + return items + index = i + if index < 0: + return items + try: + count = int(eval(items[index].size.replace("oparg", str(oparg)))) + except ValueError: + return items + return items[:index] + [ + StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count) + ] + items[index+1:] + +def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect: + stack.inputs = expand(stack.inputs, oparg) + stack.outputs = expand(stack.outputs, oparg) + return stack def make_uop( name: str, @@ -887,20 +909,26 @@ def make_uop( ) for anno in op.annotations: if anno.startswith("replicate"): - result.replicated = int(anno[10:-1]) + text = anno[10:-1] + start, stop = text.split(":") + result.replicated = range(int(start), int(stop)) break else: return result - for oparg in range(result.replicated): + for oparg in result.replicated: name_x = name + "_" + str(oparg) properties = compute_properties(op) properties.oparg = False - properties.const_oparg = oparg + stack = analyze_stack(op) + if not variable_used(op, "oparg"): + stack = scalarize_stack(stack, oparg) + else: + properties.const_oparg = oparg rep = Uop( name=name_x, context=op.context, annotations=op.annotations, - stack=analyze_stack(op), + stack=stack, caches=analyze_caches(inputs), local_stores=find_variable_stores(op), body=op.block, diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index a6dac48187525d..c7fe0d162ac6e4 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -379,9 +379,13 @@ def inst_header(self) -> InstHeader | None: while anno := self.expect(lx.ANNOTATION): if anno.text == "replicate": self.require(lx.LPAREN) - times = self.require(lx.NUMBER) + stop = self.require(lx.NUMBER) + start_text = "0" + if self.expect(lx.COLON): + start_text = stop.text + stop = self.require(lx.NUMBER) self.require(lx.RPAREN) - annotations.append(f"replicate({times.text})") + annotations.append(f"replicate({start_text}:{stop.text})") else: annotations.append(anno.text) tkn = self.expect(lx.INST) diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index 6f995e5c46bfcf..1cc23837a72dea 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -24,7 +24,8 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n") - out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n") + out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n") + out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n") out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n") out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n") out.emit("#ifdef NEED_OPCODE_METADATA\n") @@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n") out.emit("};\n\n") - out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n") + out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n") for uop in analysis.uops.values(): if uop.replicated: - out.emit(f"[{uop.name}] = {uop.replicated},\n") + assert(uop.replicated.step == 1) + out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, {uop.replicated.stop} }},\n") out.emit("};\n\n") out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n") From fba5dded6df3c2b1943557afef89a5cb418f65a2 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Tue, 17 Jun 2025 23:25:53 +0800 Subject: [PATCH 589/989] gh-134584: Decref elimination for float ops in the JIT (GH-134588) This PR adds a PyJitRef API to the JIT's optimizer that mimics the _PyStackRef API. This allows it to track references and their stack lifetimes properly. Thus opening up the doorway to refcount elimination in the JIT. --- Include/internal/pycore_optimizer.h | 107 +++- Include/internal/pycore_uop_ids.h | 475 +++++++------- Include/internal/pycore_uop_metadata.h | 12 + Lib/test/test_capi/test_opt.py | 20 +- Lib/test/test_generated_cases.py | 12 +- ...-05-23-14-54-07.gh-issue-134584.y-WDjf.rst | 1 + Python/bytecodes.c | 46 ++ Python/executor_cases.c.h | 81 +++ Python/optimizer_analysis.c | 15 +- Python/optimizer_bytecodes.c | 66 +- Python/optimizer_cases.c.h | 594 ++++++++++-------- Python/optimizer_symbols.c | 332 +++++----- Tools/cases_generator/analyzer.py | 2 +- Tools/cases_generator/optimizer_generator.py | 4 +- 14 files changed, 1027 insertions(+), 740 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-23-14-54-07.gh-issue-134584.y-WDjf.rst diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index d3674726997f6a..b27341cb1dbbeb 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -10,6 +10,7 @@ extern "C" { #include "pycore_typedefs.h" // _PyInterpreterFrame #include "pycore_uop_ids.h" +#include "pycore_stackref.h" #include @@ -220,15 +221,58 @@ typedef union _jit_opt_symbol { } JitOptSymbol; +// This mimics the _PyStackRef API +typedef union { + uintptr_t bits; +} JitOptRef; + +#define REF_IS_BORROWED 1 + +#define JIT_BITS_TO_PTR_MASKED(REF) ((JitOptSymbol *)(((REF).bits) & (~REF_IS_BORROWED))) + +static inline JitOptSymbol * +PyJitRef_Unwrap(JitOptRef ref) +{ + return JIT_BITS_TO_PTR_MASKED(ref); +} + +bool _Py_uop_symbol_is_immortal(JitOptSymbol *sym); + + +static inline JitOptRef +PyJitRef_Wrap(JitOptSymbol *sym) +{ + return (JitOptRef){.bits=(uintptr_t)sym}; +} + +static inline JitOptRef +PyJitRef_Borrow(JitOptRef ref) +{ + return (JitOptRef){ .bits = ref.bits | REF_IS_BORROWED }; +} + +static const JitOptRef PyJitRef_NULL = {.bits = REF_IS_BORROWED}; + +static inline bool +PyJitRef_IsNull(JitOptRef ref) +{ + return ref.bits == PyJitRef_NULL.bits; +} + +static inline int +PyJitRef_IsBorrowed(JitOptRef ref) +{ + return (ref.bits & REF_IS_BORROWED) == REF_IS_BORROWED; +} struct _Py_UOpsAbstractFrame { // Max stacklen int stack_len; int locals_len; - JitOptSymbol **stack_pointer; - JitOptSymbol **stack; - JitOptSymbol **locals; + JitOptRef *stack_pointer; + JitOptRef *stack; + JitOptRef *locals; }; typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; @@ -251,37 +295,36 @@ typedef struct _JitOptContext { // Arena for the symbolic types. ty_arena t_arena; - JitOptSymbol **n_consumed; - JitOptSymbol **limit; - JitOptSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; + JitOptRef *n_consumed; + JitOptRef *limit; + JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; } JitOptContext; -extern bool _Py_uop_sym_is_null(JitOptSymbol *sym); -extern bool _Py_uop_sym_is_not_null(JitOptSymbol *sym); -extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptSymbol *sym); -extern PyObject *_Py_uop_sym_get_const(JitOptContext *ctx, JitOptSymbol *sym); -extern JitOptSymbol *_Py_uop_sym_new_unknown(JitOptContext *ctx); -extern JitOptSymbol *_Py_uop_sym_new_not_null(JitOptContext *ctx); -extern JitOptSymbol *_Py_uop_sym_new_type( +extern bool _Py_uop_sym_is_null(JitOptRef sym); +extern bool _Py_uop_sym_is_not_null(JitOptRef sym); +extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef sym); +extern PyObject *_Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef sym); +extern JitOptRef _Py_uop_sym_new_unknown(JitOptContext *ctx); +extern JitOptRef _Py_uop_sym_new_not_null(JitOptContext *ctx); +extern JitOptRef _Py_uop_sym_new_type( JitOptContext *ctx, PyTypeObject *typ); -extern JitOptSymbol *_Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); -extern JitOptSymbol *_Py_uop_sym_new_null(JitOptContext *ctx); -extern bool _Py_uop_sym_has_type(JitOptSymbol *sym); -extern bool _Py_uop_sym_matches_type(JitOptSymbol *sym, PyTypeObject *typ); -extern bool _Py_uop_sym_matches_type_version(JitOptSymbol *sym, unsigned int version); -extern void _Py_uop_sym_set_null(JitOptContext *ctx, JitOptSymbol *sym); -extern void _Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptSymbol *sym); -extern void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ); -extern bool _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version); -extern void _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val); -extern bool _Py_uop_sym_is_bottom(JitOptSymbol *sym); -extern int _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym); -extern PyTypeObject *_Py_uop_sym_get_type(JitOptSymbol *sym); -extern bool _Py_uop_sym_is_immortal(JitOptSymbol *sym); -extern JitOptSymbol *_Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptSymbol **args); -extern JitOptSymbol *_Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item); -extern int _Py_uop_sym_tuple_length(JitOptSymbol *sym); -extern JitOptSymbol *_Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptSymbol *value, bool truthy); +extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); +extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx); +extern bool _Py_uop_sym_has_type(JitOptRef sym); +extern bool _Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ); +extern bool _Py_uop_sym_matches_type_version(JitOptRef sym, unsigned int version); +extern void _Py_uop_sym_set_null(JitOptContext *ctx, JitOptRef sym); +extern void _Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptRef sym); +extern void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef sym, PyTypeObject *typ); +extern bool _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef sym, unsigned int version); +extern void _Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef sym, PyObject *const_val); +extern bool _Py_uop_sym_is_bottom(JitOptRef sym); +extern int _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptRef sym); +extern PyTypeObject *_Py_uop_sym_get_type(JitOptRef sym); +extern JitOptRef _Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptRef *args); +extern JitOptRef _Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptRef sym, int item); +extern int _Py_uop_sym_tuple_length(JitOptRef sym); +extern JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef value, bool truthy); extern void _Py_uop_abstractcontext_init(JitOptContext *ctx); extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx); @@ -290,7 +333,7 @@ extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( JitOptContext *ctx, PyCodeObject *co, int curr_stackentries, - JitOptSymbol **args, + JitOptRef *args, int arg_len); extern int _Py_uop_frame_pop(JitOptContext *ctx); diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8211c5d056535e..f67626ec5c6ac5 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -13,22 +13,25 @@ extern "C" { #define _SET_IP 301 #define _BINARY_OP 302 #define _BINARY_OP_ADD_FLOAT 303 -#define _BINARY_OP_ADD_INT 304 -#define _BINARY_OP_ADD_UNICODE 305 -#define _BINARY_OP_EXTEND 306 -#define _BINARY_OP_INPLACE_ADD_UNICODE 307 -#define _BINARY_OP_MULTIPLY_FLOAT 308 -#define _BINARY_OP_MULTIPLY_INT 309 -#define _BINARY_OP_SUBSCR_CHECK_FUNC 310 -#define _BINARY_OP_SUBSCR_DICT 311 -#define _BINARY_OP_SUBSCR_INIT_CALL 312 -#define _BINARY_OP_SUBSCR_LIST_INT 313 -#define _BINARY_OP_SUBSCR_LIST_SLICE 314 -#define _BINARY_OP_SUBSCR_STR_INT 315 -#define _BINARY_OP_SUBSCR_TUPLE_INT 316 -#define _BINARY_OP_SUBTRACT_FLOAT 317 -#define _BINARY_OP_SUBTRACT_INT 318 -#define _BINARY_SLICE 319 +#define _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS 304 +#define _BINARY_OP_ADD_INT 305 +#define _BINARY_OP_ADD_UNICODE 306 +#define _BINARY_OP_EXTEND 307 +#define _BINARY_OP_INPLACE_ADD_UNICODE 308 +#define _BINARY_OP_MULTIPLY_FLOAT 309 +#define _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS 310 +#define _BINARY_OP_MULTIPLY_INT 311 +#define _BINARY_OP_SUBSCR_CHECK_FUNC 312 +#define _BINARY_OP_SUBSCR_DICT 313 +#define _BINARY_OP_SUBSCR_INIT_CALL 314 +#define _BINARY_OP_SUBSCR_LIST_INT 315 +#define _BINARY_OP_SUBSCR_LIST_SLICE 316 +#define _BINARY_OP_SUBSCR_STR_INT 317 +#define _BINARY_OP_SUBSCR_TUPLE_INT 318 +#define _BINARY_OP_SUBTRACT_FLOAT 319 +#define _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS 320 +#define _BINARY_OP_SUBTRACT_INT 321 +#define _BINARY_SLICE 322 #define _BUILD_INTERPOLATION BUILD_INTERPOLATION #define _BUILD_LIST BUILD_LIST #define _BUILD_MAP BUILD_MAP @@ -37,138 +40,138 @@ extern "C" { #define _BUILD_STRING BUILD_STRING #define _BUILD_TEMPLATE BUILD_TEMPLATE #define _BUILD_TUPLE BUILD_TUPLE -#define _CALL_BUILTIN_CLASS 320 -#define _CALL_BUILTIN_FAST 321 -#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 322 -#define _CALL_BUILTIN_O 323 +#define _CALL_BUILTIN_CLASS 323 +#define _CALL_BUILTIN_FAST 324 +#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 325 +#define _CALL_BUILTIN_O 326 #define _CALL_INTRINSIC_1 CALL_INTRINSIC_1 #define _CALL_INTRINSIC_2 CALL_INTRINSIC_2 -#define _CALL_ISINSTANCE 324 -#define _CALL_KW_NON_PY 325 -#define _CALL_LEN 326 -#define _CALL_LIST_APPEND 327 -#define _CALL_METHOD_DESCRIPTOR_FAST 328 -#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 329 -#define _CALL_METHOD_DESCRIPTOR_NOARGS 330 -#define _CALL_METHOD_DESCRIPTOR_O 331 -#define _CALL_NON_PY_GENERAL 332 -#define _CALL_STR_1 333 -#define _CALL_TUPLE_1 334 -#define _CALL_TYPE_1 335 -#define _CHECK_AND_ALLOCATE_OBJECT 336 -#define _CHECK_ATTR_CLASS 337 -#define _CHECK_ATTR_METHOD_LAZY_DICT 338 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 339 +#define _CALL_ISINSTANCE 327 +#define _CALL_KW_NON_PY 328 +#define _CALL_LEN 329 +#define _CALL_LIST_APPEND 330 +#define _CALL_METHOD_DESCRIPTOR_FAST 331 +#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 332 +#define _CALL_METHOD_DESCRIPTOR_NOARGS 333 +#define _CALL_METHOD_DESCRIPTOR_O 334 +#define _CALL_NON_PY_GENERAL 335 +#define _CALL_STR_1 336 +#define _CALL_TUPLE_1 337 +#define _CALL_TYPE_1 338 +#define _CHECK_AND_ALLOCATE_OBJECT 339 +#define _CHECK_ATTR_CLASS 340 +#define _CHECK_ATTR_METHOD_LAZY_DICT 341 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 342 #define _CHECK_EG_MATCH CHECK_EG_MATCH #define _CHECK_EXC_MATCH CHECK_EXC_MATCH -#define _CHECK_FUNCTION 340 -#define _CHECK_FUNCTION_EXACT_ARGS 341 -#define _CHECK_FUNCTION_VERSION 342 -#define _CHECK_FUNCTION_VERSION_INLINE 343 -#define _CHECK_FUNCTION_VERSION_KW 344 -#define _CHECK_IS_NOT_PY_CALLABLE 345 -#define _CHECK_IS_NOT_PY_CALLABLE_KW 346 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 347 -#define _CHECK_METHOD_VERSION 348 -#define _CHECK_METHOD_VERSION_KW 349 -#define _CHECK_PEP_523 350 -#define _CHECK_PERIODIC 351 -#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 352 -#define _CHECK_RECURSION_REMAINING 353 -#define _CHECK_STACK_SPACE 354 -#define _CHECK_STACK_SPACE_OPERAND 355 -#define _CHECK_VALIDITY 356 -#define _COMPARE_OP 357 -#define _COMPARE_OP_FLOAT 358 -#define _COMPARE_OP_INT 359 -#define _COMPARE_OP_STR 360 -#define _CONTAINS_OP 361 -#define _CONTAINS_OP_DICT 362 -#define _CONTAINS_OP_SET 363 +#define _CHECK_FUNCTION 343 +#define _CHECK_FUNCTION_EXACT_ARGS 344 +#define _CHECK_FUNCTION_VERSION 345 +#define _CHECK_FUNCTION_VERSION_INLINE 346 +#define _CHECK_FUNCTION_VERSION_KW 347 +#define _CHECK_IS_NOT_PY_CALLABLE 348 +#define _CHECK_IS_NOT_PY_CALLABLE_KW 349 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 350 +#define _CHECK_METHOD_VERSION 351 +#define _CHECK_METHOD_VERSION_KW 352 +#define _CHECK_PEP_523 353 +#define _CHECK_PERIODIC 354 +#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 355 +#define _CHECK_RECURSION_REMAINING 356 +#define _CHECK_STACK_SPACE 357 +#define _CHECK_STACK_SPACE_OPERAND 358 +#define _CHECK_VALIDITY 359 +#define _COMPARE_OP 360 +#define _COMPARE_OP_FLOAT 361 +#define _COMPARE_OP_INT 362 +#define _COMPARE_OP_STR 363 +#define _CONTAINS_OP 364 +#define _CONTAINS_OP_DICT 365 +#define _CONTAINS_OP_SET 366 #define _CONVERT_VALUE CONVERT_VALUE -#define _COPY 364 -#define _COPY_1 365 -#define _COPY_2 366 -#define _COPY_3 367 +#define _COPY 367 +#define _COPY_1 368 +#define _COPY_2 369 +#define _COPY_3 370 #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 368 +#define _CREATE_INIT_FRAME 371 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 369 +#define _DEOPT 372 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 370 -#define _DO_CALL_FUNCTION_EX 371 -#define _DO_CALL_KW 372 +#define _DO_CALL 373 +#define _DO_CALL_FUNCTION_EX 374 +#define _DO_CALL_KW 375 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 373 +#define _ERROR_POP_N 376 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 374 -#define _EXPAND_METHOD_KW 375 -#define _FATAL_ERROR 376 +#define _EXPAND_METHOD 377 +#define _EXPAND_METHOD_KW 378 +#define _FATAL_ERROR 379 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 377 -#define _FOR_ITER_GEN_FRAME 378 -#define _FOR_ITER_TIER_TWO 379 +#define _FOR_ITER 380 +#define _FOR_ITER_GEN_FRAME 381 +#define _FOR_ITER_TIER_TWO 382 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 380 -#define _GUARD_CALLABLE_ISINSTANCE 381 -#define _GUARD_CALLABLE_LEN 382 -#define _GUARD_CALLABLE_LIST_APPEND 383 -#define _GUARD_CALLABLE_STR_1 384 -#define _GUARD_CALLABLE_TUPLE_1 385 -#define _GUARD_CALLABLE_TYPE_1 386 -#define _GUARD_DORV_NO_DICT 387 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 388 -#define _GUARD_GLOBALS_VERSION 389 -#define _GUARD_IS_FALSE_POP 390 -#define _GUARD_IS_NONE_POP 391 -#define _GUARD_IS_NOT_NONE_POP 392 -#define _GUARD_IS_TRUE_POP 393 -#define _GUARD_KEYS_VERSION 394 -#define _GUARD_NOS_DICT 395 -#define _GUARD_NOS_FLOAT 396 -#define _GUARD_NOS_INT 397 -#define _GUARD_NOS_LIST 398 -#define _GUARD_NOS_NOT_NULL 399 -#define _GUARD_NOS_NULL 400 -#define _GUARD_NOS_TUPLE 401 -#define _GUARD_NOS_UNICODE 402 -#define _GUARD_NOT_EXHAUSTED_LIST 403 -#define _GUARD_NOT_EXHAUSTED_RANGE 404 -#define _GUARD_NOT_EXHAUSTED_TUPLE 405 -#define _GUARD_THIRD_NULL 406 -#define _GUARD_TOS_ANY_SET 407 -#define _GUARD_TOS_DICT 408 -#define _GUARD_TOS_FLOAT 409 -#define _GUARD_TOS_INT 410 -#define _GUARD_TOS_LIST 411 -#define _GUARD_TOS_SLICE 412 -#define _GUARD_TOS_TUPLE 413 -#define _GUARD_TOS_UNICODE 414 -#define _GUARD_TYPE_VERSION 415 -#define _GUARD_TYPE_VERSION_AND_LOCK 416 +#define _GUARD_BINARY_OP_EXTEND 383 +#define _GUARD_CALLABLE_ISINSTANCE 384 +#define _GUARD_CALLABLE_LEN 385 +#define _GUARD_CALLABLE_LIST_APPEND 386 +#define _GUARD_CALLABLE_STR_1 387 +#define _GUARD_CALLABLE_TUPLE_1 388 +#define _GUARD_CALLABLE_TYPE_1 389 +#define _GUARD_DORV_NO_DICT 390 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 391 +#define _GUARD_GLOBALS_VERSION 392 +#define _GUARD_IS_FALSE_POP 393 +#define _GUARD_IS_NONE_POP 394 +#define _GUARD_IS_NOT_NONE_POP 395 +#define _GUARD_IS_TRUE_POP 396 +#define _GUARD_KEYS_VERSION 397 +#define _GUARD_NOS_DICT 398 +#define _GUARD_NOS_FLOAT 399 +#define _GUARD_NOS_INT 400 +#define _GUARD_NOS_LIST 401 +#define _GUARD_NOS_NOT_NULL 402 +#define _GUARD_NOS_NULL 403 +#define _GUARD_NOS_TUPLE 404 +#define _GUARD_NOS_UNICODE 405 +#define _GUARD_NOT_EXHAUSTED_LIST 406 +#define _GUARD_NOT_EXHAUSTED_RANGE 407 +#define _GUARD_NOT_EXHAUSTED_TUPLE 408 +#define _GUARD_THIRD_NULL 409 +#define _GUARD_TOS_ANY_SET 410 +#define _GUARD_TOS_DICT 411 +#define _GUARD_TOS_FLOAT 412 +#define _GUARD_TOS_INT 413 +#define _GUARD_TOS_LIST 414 +#define _GUARD_TOS_SLICE 415 +#define _GUARD_TOS_TUPLE 416 +#define _GUARD_TOS_UNICODE 417 +#define _GUARD_TYPE_VERSION 418 +#define _GUARD_TYPE_VERSION_AND_LOCK 419 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 417 -#define _INIT_CALL_PY_EXACT_ARGS 418 -#define _INIT_CALL_PY_EXACT_ARGS_0 419 -#define _INIT_CALL_PY_EXACT_ARGS_1 420 -#define _INIT_CALL_PY_EXACT_ARGS_2 421 -#define _INIT_CALL_PY_EXACT_ARGS_3 422 -#define _INIT_CALL_PY_EXACT_ARGS_4 423 -#define _INSERT_NULL 424 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 420 +#define _INIT_CALL_PY_EXACT_ARGS 421 +#define _INIT_CALL_PY_EXACT_ARGS_0 422 +#define _INIT_CALL_PY_EXACT_ARGS_1 423 +#define _INIT_CALL_PY_EXACT_ARGS_2 424 +#define _INIT_CALL_PY_EXACT_ARGS_3 425 +#define _INIT_CALL_PY_EXACT_ARGS_4 426 +#define _INSERT_NULL 427 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -178,173 +181,173 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 425 +#define _IS_NONE 428 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 426 -#define _ITER_CHECK_RANGE 427 -#define _ITER_CHECK_TUPLE 428 -#define _ITER_JUMP_LIST 429 -#define _ITER_JUMP_RANGE 430 -#define _ITER_JUMP_TUPLE 431 -#define _ITER_NEXT_LIST 432 -#define _ITER_NEXT_LIST_TIER_TWO 433 -#define _ITER_NEXT_RANGE 434 -#define _ITER_NEXT_TUPLE 435 -#define _JUMP_TO_TOP 436 +#define _ITER_CHECK_LIST 429 +#define _ITER_CHECK_RANGE 430 +#define _ITER_CHECK_TUPLE 431 +#define _ITER_JUMP_LIST 432 +#define _ITER_JUMP_RANGE 433 +#define _ITER_JUMP_TUPLE 434 +#define _ITER_NEXT_LIST 435 +#define _ITER_NEXT_LIST_TIER_TWO 436 +#define _ITER_NEXT_RANGE 437 +#define _ITER_NEXT_TUPLE 438 +#define _JUMP_TO_TOP 439 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 437 -#define _LOAD_ATTR_CLASS 438 +#define _LOAD_ATTR 440 +#define _LOAD_ATTR_CLASS 441 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 439 -#define _LOAD_ATTR_METHOD_LAZY_DICT 440 -#define _LOAD_ATTR_METHOD_NO_DICT 441 -#define _LOAD_ATTR_METHOD_WITH_VALUES 442 -#define _LOAD_ATTR_MODULE 443 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 444 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 445 -#define _LOAD_ATTR_PROPERTY_FRAME 446 -#define _LOAD_ATTR_SLOT 447 -#define _LOAD_ATTR_WITH_HINT 448 +#define _LOAD_ATTR_INSTANCE_VALUE 442 +#define _LOAD_ATTR_METHOD_LAZY_DICT 443 +#define _LOAD_ATTR_METHOD_NO_DICT 444 +#define _LOAD_ATTR_METHOD_WITH_VALUES 445 +#define _LOAD_ATTR_MODULE 446 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 447 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 448 +#define _LOAD_ATTR_PROPERTY_FRAME 449 +#define _LOAD_ATTR_SLOT 450 +#define _LOAD_ATTR_WITH_HINT 451 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 449 +#define _LOAD_BYTECODE 452 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 450 -#define _LOAD_CONST_INLINE_BORROW 451 -#define _LOAD_CONST_UNDER_INLINE 452 -#define _LOAD_CONST_UNDER_INLINE_BORROW 453 +#define _LOAD_CONST_INLINE 453 +#define _LOAD_CONST_INLINE_BORROW 454 +#define _LOAD_CONST_UNDER_INLINE 455 +#define _LOAD_CONST_UNDER_INLINE_BORROW 456 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 454 -#define _LOAD_FAST_0 455 -#define _LOAD_FAST_1 456 -#define _LOAD_FAST_2 457 -#define _LOAD_FAST_3 458 -#define _LOAD_FAST_4 459 -#define _LOAD_FAST_5 460 -#define _LOAD_FAST_6 461 -#define _LOAD_FAST_7 462 +#define _LOAD_FAST 457 +#define _LOAD_FAST_0 458 +#define _LOAD_FAST_1 459 +#define _LOAD_FAST_2 460 +#define _LOAD_FAST_3 461 +#define _LOAD_FAST_4 462 +#define _LOAD_FAST_5 463 +#define _LOAD_FAST_6 464 +#define _LOAD_FAST_7 465 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 463 -#define _LOAD_FAST_BORROW_0 464 -#define _LOAD_FAST_BORROW_1 465 -#define _LOAD_FAST_BORROW_2 466 -#define _LOAD_FAST_BORROW_3 467 -#define _LOAD_FAST_BORROW_4 468 -#define _LOAD_FAST_BORROW_5 469 -#define _LOAD_FAST_BORROW_6 470 -#define _LOAD_FAST_BORROW_7 471 +#define _LOAD_FAST_BORROW 466 +#define _LOAD_FAST_BORROW_0 467 +#define _LOAD_FAST_BORROW_1 468 +#define _LOAD_FAST_BORROW_2 469 +#define _LOAD_FAST_BORROW_3 470 +#define _LOAD_FAST_BORROW_4 471 +#define _LOAD_FAST_BORROW_5 472 +#define _LOAD_FAST_BORROW_6 473 +#define _LOAD_FAST_BORROW_7 474 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 472 -#define _LOAD_GLOBAL_BUILTINS 473 -#define _LOAD_GLOBAL_MODULE 474 +#define _LOAD_GLOBAL 475 +#define _LOAD_GLOBAL_BUILTINS 476 +#define _LOAD_GLOBAL_MODULE 477 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 475 -#define _LOAD_SMALL_INT_0 476 -#define _LOAD_SMALL_INT_1 477 -#define _LOAD_SMALL_INT_2 478 -#define _LOAD_SMALL_INT_3 479 -#define _LOAD_SPECIAL 480 +#define _LOAD_SMALL_INT 478 +#define _LOAD_SMALL_INT_0 479 +#define _LOAD_SMALL_INT_1 480 +#define _LOAD_SMALL_INT_2 481 +#define _LOAD_SMALL_INT_3 482 +#define _LOAD_SPECIAL 483 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 481 +#define _MAKE_CALLARGS_A_TUPLE 484 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 482 +#define _MAKE_WARM 485 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 483 -#define _MAYBE_EXPAND_METHOD_KW 484 -#define _MONITOR_CALL 485 -#define _MONITOR_CALL_KW 486 -#define _MONITOR_JUMP_BACKWARD 487 -#define _MONITOR_RESUME 488 +#define _MAYBE_EXPAND_METHOD 486 +#define _MAYBE_EXPAND_METHOD_KW 487 +#define _MONITOR_CALL 488 +#define _MONITOR_CALL_KW 489 +#define _MONITOR_JUMP_BACKWARD 490 +#define _MONITOR_RESUME 491 #define _NOP NOP -#define _POP_CALL 489 -#define _POP_CALL_LOAD_CONST_INLINE_BORROW 490 -#define _POP_CALL_ONE 491 -#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 492 -#define _POP_CALL_TWO 493 -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 494 +#define _POP_CALL 492 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 493 +#define _POP_CALL_ONE 494 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 495 +#define _POP_CALL_TWO 496 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 497 #define _POP_EXCEPT POP_EXCEPT #define _POP_ITER POP_ITER -#define _POP_JUMP_IF_FALSE 495 -#define _POP_JUMP_IF_TRUE 496 +#define _POP_JUMP_IF_FALSE 498 +#define _POP_JUMP_IF_TRUE 499 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 497 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 498 -#define _POP_TWO 499 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 500 +#define _POP_TOP_LOAD_CONST_INLINE 500 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 501 +#define _POP_TWO 502 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 503 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 501 +#define _PUSH_FRAME 504 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 502 -#define _PY_FRAME_GENERAL 503 -#define _PY_FRAME_KW 504 -#define _QUICKEN_RESUME 505 -#define _REPLACE_WITH_TRUE 506 +#define _PUSH_NULL_CONDITIONAL 505 +#define _PY_FRAME_GENERAL 506 +#define _PY_FRAME_KW 507 +#define _QUICKEN_RESUME 508 +#define _REPLACE_WITH_TRUE 509 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 507 -#define _SEND 508 -#define _SEND_GEN_FRAME 509 +#define _SAVE_RETURN_OFFSET 510 +#define _SEND 511 +#define _SEND_GEN_FRAME 512 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 510 -#define _STORE_ATTR 511 -#define _STORE_ATTR_INSTANCE_VALUE 512 -#define _STORE_ATTR_SLOT 513 -#define _STORE_ATTR_WITH_HINT 514 +#define _START_EXECUTOR 513 +#define _STORE_ATTR 514 +#define _STORE_ATTR_INSTANCE_VALUE 515 +#define _STORE_ATTR_SLOT 516 +#define _STORE_ATTR_WITH_HINT 517 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 515 -#define _STORE_FAST_0 516 -#define _STORE_FAST_1 517 -#define _STORE_FAST_2 518 -#define _STORE_FAST_3 519 -#define _STORE_FAST_4 520 -#define _STORE_FAST_5 521 -#define _STORE_FAST_6 522 -#define _STORE_FAST_7 523 +#define _STORE_FAST 518 +#define _STORE_FAST_0 519 +#define _STORE_FAST_1 520 +#define _STORE_FAST_2 521 +#define _STORE_FAST_3 522 +#define _STORE_FAST_4 523 +#define _STORE_FAST_5 524 +#define _STORE_FAST_6 525 +#define _STORE_FAST_7 526 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 524 -#define _STORE_SUBSCR 525 -#define _STORE_SUBSCR_DICT 526 -#define _STORE_SUBSCR_LIST_INT 527 -#define _SWAP 528 -#define _SWAP_2 529 -#define _SWAP_3 530 -#define _TIER2_RESUME_CHECK 531 -#define _TO_BOOL 532 +#define _STORE_SLICE 527 +#define _STORE_SUBSCR 528 +#define _STORE_SUBSCR_DICT 529 +#define _STORE_SUBSCR_LIST_INT 530 +#define _SWAP 531 +#define _SWAP_2 532 +#define _SWAP_3 533 +#define _TIER2_RESUME_CHECK 534 +#define _TO_BOOL 535 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 533 +#define _TO_BOOL_LIST 536 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 534 +#define _TO_BOOL_STR 537 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 535 -#define _UNPACK_SEQUENCE_LIST 536 -#define _UNPACK_SEQUENCE_TUPLE 537 -#define _UNPACK_SEQUENCE_TWO_TUPLE 538 +#define _UNPACK_SEQUENCE 538 +#define _UNPACK_SEQUENCE_LIST 539 +#define _UNPACK_SEQUENCE_TUPLE 540 +#define _UNPACK_SEQUENCE_TWO_TUPLE 541 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 538 +#define MAX_UOP_ID 541 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index fad87d4b586e64..40914c578c9038 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -94,6 +94,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_MULTIPLY_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_ADD_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_SUBTRACT_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, @@ -342,11 +345,13 @@ const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = { const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_BINARY_OP] = "_BINARY_OP", [_BINARY_OP_ADD_FLOAT] = "_BINARY_OP_ADD_FLOAT", + [_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS] = "_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS", [_BINARY_OP_ADD_INT] = "_BINARY_OP_ADD_INT", [_BINARY_OP_ADD_UNICODE] = "_BINARY_OP_ADD_UNICODE", [_BINARY_OP_EXTEND] = "_BINARY_OP_EXTEND", [_BINARY_OP_INPLACE_ADD_UNICODE] = "_BINARY_OP_INPLACE_ADD_UNICODE", [_BINARY_OP_MULTIPLY_FLOAT] = "_BINARY_OP_MULTIPLY_FLOAT", + [_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS] = "_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS", [_BINARY_OP_MULTIPLY_INT] = "_BINARY_OP_MULTIPLY_INT", [_BINARY_OP_SUBSCR_CHECK_FUNC] = "_BINARY_OP_SUBSCR_CHECK_FUNC", [_BINARY_OP_SUBSCR_DICT] = "_BINARY_OP_SUBSCR_DICT", @@ -356,6 +361,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_BINARY_OP_SUBSCR_STR_INT] = "_BINARY_OP_SUBSCR_STR_INT", [_BINARY_OP_SUBSCR_TUPLE_INT] = "_BINARY_OP_SUBSCR_TUPLE_INT", [_BINARY_OP_SUBTRACT_FLOAT] = "_BINARY_OP_SUBTRACT_FLOAT", + [_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS] = "_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS", [_BINARY_OP_SUBTRACT_INT] = "_BINARY_OP_SUBTRACT_INT", [_BINARY_SLICE] = "_BINARY_SLICE", [_BUILD_INTERPOLATION] = "_BUILD_INTERPOLATION", @@ -799,6 +805,12 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _BINARY_OP_SUBTRACT_FLOAT: return 2; + case _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS: + return 2; + case _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS: + return 2; + case _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS: + return 2; case _BINARY_OP_ADD_UNICODE: return 2; case _BINARY_OP_INPLACE_ADD_UNICODE: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 3fc2cb33795a5f..41833836e720ba 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -678,7 +678,7 @@ def testfunc(n): self.assertLessEqual(len(guard_nos_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_ADD_FLOAT", uops) + self.assertIn("_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS", uops) def test_float_subtract_constant_propagation(self): def testfunc(n): @@ -700,7 +700,7 @@ def testfunc(n): self.assertLessEqual(len(guard_nos_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_SUBTRACT_FLOAT", uops) + self.assertIn("_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS", uops) def test_float_multiply_constant_propagation(self): def testfunc(n): @@ -722,7 +722,7 @@ def testfunc(n): self.assertLessEqual(len(guard_nos_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_MULTIPLY_FLOAT", uops) + self.assertIn("_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS", uops) def test_add_unicode_propagation(self): def testfunc(n): @@ -2262,6 +2262,20 @@ def f(n): self.assertNotIn("_LOAD_ATTR_METHOD_NO_DICT", uops) self.assertNotIn("_LOAD_ATTR_METHOD_LAZY_DICT", uops) + def test_float_op_refcount_elimination(self): + def testfunc(args): + a, b, n = args + c = 0.0 + for _ in range(n): + c += a + b + return c + + res, ex = self._run_with_optimizer(testfunc, (0.1, 0.1, TIER2_THRESHOLD)) + self.assertAlmostEqual(res, TIER2_THRESHOLD * (0.1 + 0.1)) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS", uops) + def test_remove_guard_for_slice_list(self): def f(n): for i in range(n): diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 6411e4318b6c3a..9e0fd1218f2534 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1976,8 +1976,8 @@ def test_overridden_abstract_args(self): """ output = """ case OP: { - JitOptSymbol *arg1; - JitOptSymbol *out; + JitOptRef arg1; + JitOptRef out; arg1 = stack_pointer[-1]; out = EGGS(arg1); stack_pointer[-1] = out; @@ -1985,7 +1985,7 @@ def test_overridden_abstract_args(self): } case OP2: { - JitOptSymbol *out; + JitOptRef out; out = sym_new_not_null(ctx); stack_pointer[-1] = out; break; @@ -2010,14 +2010,14 @@ def test_no_overridden_case(self): """ output = """ case OP: { - JitOptSymbol *out; + JitOptRef out; out = sym_new_not_null(ctx); stack_pointer[-1] = out; break; } case OP2: { - JitOptSymbol *out; + JitOptRef out; out = NULL; stack_pointer[-1] = out; break; @@ -2151,7 +2151,7 @@ def test_validate_uop_unused_output(self): """ output = """ case OP: { - JitOptSymbol *foo; + JitOptRef foo; foo = NULL; stack_pointer[0] = foo; stack_pointer += 1; diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-23-14-54-07.gh-issue-134584.y-WDjf.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-23-14-54-07.gh-issue-134584.y-WDjf.rst new file mode 100644 index 00000000000000..5f9e1553ae7ca5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-23-14-54-07.gh-issue-134584.y-WDjf.rst @@ -0,0 +1 @@ +Add a reference count elimination pass to the JIT compiler. Patch by Ken Jin. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 27a04766cc8dd8..2dd1d27747a693 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -687,6 +687,52 @@ dummy_func( ERROR_IF(PyStackRef_IsNull(res)); } + + pure op(_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS, (left, right -- res)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval * + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsNull(res)); + } + + pure op(_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS, (left, right -- res)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval + + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsNull(res)); + } + + pure op(_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS, (left, right -- res)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval - + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsNull(res)); + } + macro(BINARY_OP_MULTIPLY_FLOAT) = _GUARD_TOS_FLOAT + _GUARD_NOS_FLOAT + unused/5 + _BINARY_OP_MULTIPLY_FLOAT; macro(BINARY_OP_ADD_FLOAT) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 74c78e4d1f5a69..3b58e7fab5553d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1063,6 +1063,87 @@ break; } + case _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS: { + _PyStackRef right; + _PyStackRef left; + _PyStackRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval * + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + if (PyStackRef_IsNull(res)) { + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + JUMP_TO_ERROR(); + } + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS: { + _PyStackRef right; + _PyStackRef left; + _PyStackRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval + + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + if (PyStackRef_IsNull(res)) { + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + JUMP_TO_ERROR(); + } + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS: { + _PyStackRef right; + _PyStackRef left; + _PyStackRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval - + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + if (PyStackRef_IsNull(res)) { + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + JUMP_TO_ERROR(); + } + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _BINARY_OP_ADD_UNICODE: { _PyStackRef right; _PyStackRef left; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 6a7df233819b9c..de337f637163ba 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -347,8 +347,8 @@ static int optimize_to_bool( _PyUOpInstruction *this_instr, JitOptContext *ctx, - JitOptSymbol *value, - JitOptSymbol **result_ptr) + JitOptRef value, + JitOptRef *result_ptr) { if (sym_matches_type(value, &PyBool_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -375,7 +375,7 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) } } -static JitOptSymbol * +static JitOptRef lookup_attr(JitOptContext *ctx, _PyUOpInstruction *this_instr, PyTypeObject *type, PyObject *name, uint16_t immortal, uint16_t mortal) @@ -440,6 +440,13 @@ get_code_with_logging(_PyUOpInstruction *op) return co; } +// TODO (gh-134584) generate most of this table automatically +const uint16_t op_without_decref_inputs[MAX_UOP_ID + 1] = { + [_BINARY_OP_MULTIPLY_FLOAT] = _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS, + [_BINARY_OP_ADD_FLOAT] = _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS, + [_BINARY_OP_SUBTRACT_FLOAT] = _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS, +}; + /* 1 for success, 0 for not ready, cannot error at the moment. */ static int optimize_uops( @@ -477,7 +484,7 @@ optimize_uops( int oparg = this_instr->oparg; opcode = this_instr->opcode; - JitOptSymbol **stack_pointer = ctx->frame->stack_pointer; + JitOptRef *stack_pointer = ctx->frame->stack_pointer; #ifdef Py_DEBUG if (get_lltrace() >= 3) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 5a9a3a943a7b02..07b05fc3323ece 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -87,12 +87,12 @@ dummy_func(void) { } op(_LOAD_FAST_BORROW, (-- value)) { - value = GETLOCAL(oparg); + value = PyJitRef_Borrow(GETLOCAL(oparg)); } op(_LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); - JitOptSymbol *temp = sym_new_null(ctx); + JitOptRef temp = sym_new_null(ctx); GETLOCAL(oparg) = temp; } @@ -251,6 +251,10 @@ dummy_func(void) { else { res = sym_new_type(ctx, &PyFloat_Type); } + // TODO (gh-134584): Refactor this to use another uop + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); + } } op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { @@ -271,6 +275,10 @@ dummy_func(void) { else { res = sym_new_type(ctx, &PyFloat_Type); } + // TODO (gh-134584): Refactor this to use another uop + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); + } } op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { @@ -291,6 +299,10 @@ dummy_func(void) { else { res = sym_new_type(ctx, &PyFloat_Type); } + // TODO (gh-134584): Refactor this to use another uop + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); + } } op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { @@ -310,7 +322,7 @@ dummy_func(void) { } op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) { - JitOptSymbol *res; + JitOptRef res; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); @@ -329,7 +341,7 @@ dummy_func(void) { } op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame)) { - new_frame = NULL; + new_frame = PyJitRef_NULL; ctx->done = true; } @@ -488,7 +500,7 @@ dummy_func(void) { op(_LOAD_CONST, (-- value)) { PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); + value = PyJitRef_Borrow(sym_new_const(ctx, val)); } op(_LOAD_SMALL_INT, (-- value)) { @@ -496,35 +508,35 @@ dummy_func(void) { assert(val); assert(_Py_IsImmortal(val)); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); + value = PyJitRef_Borrow(sym_new_const(ctx, val)); } op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } op(_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) { - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused -- value)) { - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused -- value)) { - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused, unused -- value)) { - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { @@ -533,7 +545,7 @@ dummy_func(void) { } op(_SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { - JitOptSymbol *temp = bottom; + JitOptRef temp = bottom; bottom = top; top = temp; assert(oparg >= 2); @@ -547,7 +559,7 @@ dummy_func(void) { op(_LOAD_ATTR_MODULE, (dict_version/2, index/1, owner -- attr)) { (void)dict_version; (void)index; - attr = NULL; + attr = PyJitRef_NULL; if (sym_is_const(ctx, owner)) { PyModuleObject *mod = (PyModuleObject *)sym_get_const(ctx, owner); if (PyModule_CheckExact(mod)) { @@ -561,7 +573,7 @@ dummy_func(void) { } } } - if (attr == NULL) { + if (PyJitRef_IsNull(attr)) { /* No conversion made. We don't know what `attr` is. */ attr = sym_new_not_null(ctx); } @@ -654,7 +666,7 @@ dummy_func(void) { op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame)) { (void)fget; - new_frame = NULL; + new_frame = PyJitRef_NULL; ctx->done = true; } @@ -712,7 +724,7 @@ dummy_func(void) { } - assert(self_or_null != NULL); + assert(!PyJitRef_IsNull(self_or_null)); assert(args != NULL); if (sym_is_not_null(self_or_null)) { // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM @@ -721,9 +733,9 @@ dummy_func(void) { } if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, args, argcount); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args, argcount)); } else { - new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); } } @@ -742,11 +754,11 @@ dummy_func(void) { break; } - new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); } op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) { - new_frame = NULL; + new_frame = PyJitRef_NULL; ctx->done = true; } @@ -758,12 +770,12 @@ dummy_func(void) { } op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) { - init_frame = NULL; + init_frame = PyJitRef_NULL; ctx->done = true; } op(_RETURN_VALUE, (retval -- res)) { - JitOptSymbol *temp = retval; + JitOptRef temp = retval; DEAD(retval); SAVE_STACK(); ctx->frame->stack_pointer = stack_pointer; @@ -825,13 +837,13 @@ dummy_func(void) { } op(_FOR_ITER_GEN_FRAME, (unused, unused -- unused, unused, gen_frame)) { - gen_frame = NULL; + gen_frame = PyJitRef_NULL; /* We are about to hit the end of the trace */ ctx->done = true; } op(_SEND_GEN_FRAME, (unused, unused -- unused, gen_frame)) { - gen_frame = NULL; + gen_frame = PyJitRef_NULL; // We are about to hit the end of the trace: ctx->done = true; } @@ -851,7 +863,7 @@ dummy_func(void) { op(_PUSH_FRAME, (new_frame -- )) { SYNC_SP(); ctx->frame->stack_pointer = stack_pointer; - ctx->frame = (_Py_UOpsAbstractFrame *)new_frame; + ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame); ctx->curr_frame_depth++; stack_pointer = ctx->frame->stack_pointer; co = get_code(this_instr); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 8c4f0399c75a73..354331ef618f67 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -26,7 +26,7 @@ /* _MONITOR_RESUME is not a viable micro-op for tier 2 */ case _LOAD_FAST_CHECK: { - JitOptSymbol *value; + JitOptRef value; value = GETLOCAL(oparg); if (sym_is_null(value)) { ctx->done = true; @@ -38,7 +38,7 @@ } case _LOAD_FAST: { - JitOptSymbol *value; + JitOptRef value; value = GETLOCAL(oparg); stack_pointer[0] = value; stack_pointer += 1; @@ -47,8 +47,8 @@ } case _LOAD_FAST_BORROW: { - JitOptSymbol *value; - value = GETLOCAL(oparg); + JitOptRef value; + value = PyJitRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -56,9 +56,9 @@ } case _LOAD_FAST_AND_CLEAR: { - JitOptSymbol *value; + JitOptRef value; value = GETLOCAL(oparg); - JitOptSymbol *temp = sym_new_null(ctx); + JitOptRef temp = sym_new_null(ctx); GETLOCAL(oparg) = temp; stack_pointer[0] = value; stack_pointer += 1; @@ -67,10 +67,10 @@ } case _LOAD_CONST: { - JitOptSymbol *value; + JitOptRef value; PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); + value = PyJitRef_Borrow(sym_new_const(ctx, val)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -78,12 +78,12 @@ } case _LOAD_SMALL_INT: { - JitOptSymbol *value; + JitOptRef value; PyObject *val = PyLong_FromLong(oparg); assert(val); assert(_Py_IsImmortal(val)); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); + value = PyJitRef_Borrow(sym_new_const(ctx, val)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -91,7 +91,7 @@ } case _STORE_FAST: { - JitOptSymbol *value; + JitOptRef value; value = stack_pointer[-1]; GETLOCAL(oparg) = value; stack_pointer += -1; @@ -112,7 +112,7 @@ } case _PUSH_NULL: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -133,7 +133,7 @@ } case _END_SEND: { - JitOptSymbol *val; + JitOptRef val; val = sym_new_not_null(ctx); stack_pointer[-2] = val; stack_pointer += -1; @@ -142,15 +142,15 @@ } case _UNARY_NEGATIVE: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _UNARY_NOT: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); @@ -159,8 +159,8 @@ } case _TO_BOOL: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -171,7 +171,7 @@ } case _TO_BOOL_BOOL: { - JitOptSymbol *value; + JitOptRef value; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &value); if (!already_bool) { @@ -183,8 +183,8 @@ } case _TO_BOOL_INT: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -196,7 +196,7 @@ } case _GUARD_NOS_LIST: { - JitOptSymbol *nos; + JitOptRef nos; nos = stack_pointer[-2]; if (sym_matches_type(nos, &PyList_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -206,7 +206,7 @@ } case _GUARD_TOS_LIST: { - JitOptSymbol *tos; + JitOptRef tos; tos = stack_pointer[-1]; if (sym_matches_type(tos, &PyList_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -220,8 +220,8 @@ } case _TO_BOOL_LIST: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -232,8 +232,8 @@ } case _TO_BOOL_NONE: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -245,7 +245,7 @@ } case _GUARD_NOS_UNICODE: { - JitOptSymbol *nos; + JitOptRef nos; nos = stack_pointer[-2]; if (sym_matches_type(nos, &PyUnicode_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -255,7 +255,7 @@ } case _GUARD_TOS_UNICODE: { - JitOptSymbol *value; + JitOptRef value; value = stack_pointer[-1]; if (sym_matches_type(value, &PyUnicode_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -265,8 +265,8 @@ } case _TO_BOOL_STR: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -277,7 +277,7 @@ } case _REPLACE_WITH_TRUE: { - JitOptSymbol *res; + JitOptRef res; REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_True); res = sym_new_const(ctx, Py_True); stack_pointer[-1] = res; @@ -285,8 +285,8 @@ } case _UNARY_INVERT: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; if (sym_matches_type(value, &PyLong_Type)) { res = sym_new_type(ctx, &PyLong_Type); @@ -299,7 +299,7 @@ } case _GUARD_NOS_INT: { - JitOptSymbol *left; + JitOptRef left; left = stack_pointer[-2]; if (sym_matches_type(left, &PyLong_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -309,7 +309,7 @@ } case _GUARD_TOS_INT: { - JitOptSymbol *value; + JitOptRef value; value = stack_pointer[-1]; if (sym_matches_type(value, &PyLong_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -319,7 +319,7 @@ } case _BINARY_OP_MULTIPLY_INT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyLong_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -328,7 +328,7 @@ } case _BINARY_OP_ADD_INT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyLong_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -337,7 +337,7 @@ } case _BINARY_OP_SUBTRACT_INT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyLong_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -346,7 +346,7 @@ } case _GUARD_NOS_FLOAT: { - JitOptSymbol *left; + JitOptRef left; left = stack_pointer[-2]; if (sym_matches_type(left, &PyFloat_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -356,7 +356,7 @@ } case _GUARD_TOS_FLOAT: { - JitOptSymbol *value; + JitOptRef value; value = stack_pointer[-1]; if (sym_matches_type(value, &PyFloat_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -366,9 +366,9 @@ } case _BINARY_OP_MULTIPLY_FLOAT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { @@ -390,14 +390,17 @@ res = sym_new_type(ctx, &PyFloat_Type); stack_pointer += -1; } + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); + } stack_pointer[-1] = res; break; } case _BINARY_OP_ADD_FLOAT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { @@ -419,14 +422,17 @@ res = sym_new_type(ctx, &PyFloat_Type); stack_pointer += -1; } + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); + } stack_pointer[-1] = res; break; } case _BINARY_OP_SUBTRACT_FLOAT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { @@ -448,14 +454,44 @@ res = sym_new_type(ctx, &PyFloat_Type); stack_pointer += -1; } + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); + } stack_pointer[-1] = res; break; } + case _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS: { + JitOptRef res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS: { + JitOptRef res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS: { + JitOptRef res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _BINARY_OP_ADD_UNICODE: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { @@ -480,11 +516,11 @@ } case _BINARY_OP_INPLACE_ADD_UNICODE: { - JitOptSymbol *right; - JitOptSymbol *left; + JitOptRef right; + JitOptRef left; right = stack_pointer[-1]; left = stack_pointer[-2]; - JitOptSymbol *res; + JitOptRef res; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); @@ -509,7 +545,7 @@ } case _BINARY_OP_EXTEND: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -518,8 +554,8 @@ } case _BINARY_SLICE: { - JitOptSymbol *container; - JitOptSymbol *res; + JitOptRef container; + JitOptRef res; container = stack_pointer[-3]; PyTypeObject *type = sym_get_type(container); if (type == &PyUnicode_Type || @@ -544,7 +580,7 @@ } case _BINARY_OP_SUBSCR_LIST_INT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -553,7 +589,7 @@ } case _BINARY_OP_SUBSCR_LIST_SLICE: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -562,7 +598,7 @@ } case _BINARY_OP_SUBSCR_STR_INT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyUnicode_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -571,7 +607,7 @@ } case _GUARD_NOS_TUPLE: { - JitOptSymbol *nos; + JitOptRef nos; nos = stack_pointer[-2]; if (sym_matches_type(nos, &PyTuple_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -581,7 +617,7 @@ } case _GUARD_TOS_TUPLE: { - JitOptSymbol *tos; + JitOptRef tos; tos = stack_pointer[-1]; if (sym_matches_type(tos, &PyTuple_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -591,9 +627,9 @@ } case _BINARY_OP_SUBSCR_TUPLE_INT: { - JitOptSymbol *sub_st; - JitOptSymbol *tuple_st; - JitOptSymbol *res; + JitOptRef sub_st; + JitOptRef tuple_st; + JitOptRef res; sub_st = stack_pointer[-1]; tuple_st = stack_pointer[-2]; assert(sym_matches_type(tuple_st, &PyTuple_Type)); @@ -620,7 +656,7 @@ } case _GUARD_NOS_DICT: { - JitOptSymbol *nos; + JitOptRef nos; nos = stack_pointer[-2]; if (sym_matches_type(nos, &PyDict_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -630,7 +666,7 @@ } case _GUARD_TOS_DICT: { - JitOptSymbol *tos; + JitOptRef tos; tos = stack_pointer[-1]; if (sym_matches_type(tos, &PyDict_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -640,7 +676,7 @@ } case _BINARY_OP_SUBSCR_DICT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -649,7 +685,7 @@ } case _BINARY_OP_SUBSCR_CHECK_FUNC: { - JitOptSymbol *getitem; + JitOptRef getitem; getitem = sym_new_not_null(ctx); stack_pointer[0] = getitem; stack_pointer += 1; @@ -658,8 +694,8 @@ } case _BINARY_OP_SUBSCR_INIT_CALL: { - JitOptSymbol *new_frame; - new_frame = NULL; + JitOptRef new_frame; + new_frame = PyJitRef_NULL; ctx->done = true; stack_pointer[-3] = new_frame; stack_pointer += -2; @@ -704,14 +740,14 @@ } case _CALL_INTRINSIC_1: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _CALL_INTRINSIC_2: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -720,10 +756,10 @@ } case _RETURN_VALUE: { - JitOptSymbol *retval; - JitOptSymbol *res; + JitOptRef retval; + JitOptRef res; retval = stack_pointer[-1]; - JitOptSymbol *temp = retval; + JitOptRef temp = retval; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; @@ -747,14 +783,14 @@ } case _GET_AITER: { - JitOptSymbol *iter; + JitOptRef iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; } case _GET_ANEXT: { - JitOptSymbol *awaitable; + JitOptRef awaitable; awaitable = sym_new_not_null(ctx); stack_pointer[0] = awaitable; stack_pointer += 1; @@ -763,7 +799,7 @@ } case _GET_AWAITABLE: { - JitOptSymbol *iter; + JitOptRef iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; @@ -772,15 +808,15 @@ /* _SEND is not a viable micro-op for tier 2 */ case _SEND_GEN_FRAME: { - JitOptSymbol *gen_frame; - gen_frame = NULL; + JitOptRef gen_frame; + gen_frame = PyJitRef_NULL; ctx->done = true; stack_pointer[-1] = gen_frame; break; } case _YIELD_VALUE: { - JitOptSymbol *value; + JitOptRef value; value = sym_new_unknown(ctx); stack_pointer[-1] = value; break; @@ -793,7 +829,7 @@ } case _LOAD_COMMON_CONSTANT: { - JitOptSymbol *value; + JitOptRef value; value = sym_new_not_null(ctx); stack_pointer[0] = value; stack_pointer += 1; @@ -802,7 +838,7 @@ } case _LOAD_BUILD_CLASS: { - JitOptSymbol *bc; + JitOptRef bc; bc = sym_new_not_null(ctx); stack_pointer[0] = bc; stack_pointer += 1; @@ -821,8 +857,8 @@ } case _UNPACK_SEQUENCE: { - JitOptSymbol **values; - JitOptSymbol **top; + JitOptRef *values; + JitOptRef *top; values = &stack_pointer[-1]; top = &stack_pointer[-1 + oparg]; (void)top; @@ -835,9 +871,9 @@ } case _UNPACK_SEQUENCE_TWO_TUPLE: { - JitOptSymbol *seq; - JitOptSymbol *val1; - JitOptSymbol *val0; + JitOptRef seq; + JitOptRef val1; + JitOptRef val0; seq = stack_pointer[-1]; val0 = sym_tuple_getitem(ctx, seq, 0); val1 = sym_tuple_getitem(ctx, seq, 1); @@ -849,8 +885,8 @@ } case _UNPACK_SEQUENCE_TUPLE: { - JitOptSymbol *seq; - JitOptSymbol **values; + JitOptRef seq; + JitOptRef *values; seq = stack_pointer[-1]; values = &stack_pointer[-1]; for (int i = 0; i < oparg; i++) { @@ -862,7 +898,7 @@ } case _UNPACK_SEQUENCE_LIST: { - JitOptSymbol **values; + JitOptRef *values; values = &stack_pointer[-1]; for (int _i = oparg; --_i >= 0;) { values[_i] = sym_new_not_null(ctx); @@ -873,8 +909,8 @@ } case _UNPACK_EX: { - JitOptSymbol **values; - JitOptSymbol **top; + JitOptRef *values; + JitOptRef *top; values = &stack_pointer[-1]; top = &stack_pointer[(oparg & 0xFF) + (oparg >> 8)]; (void)top; @@ -910,7 +946,7 @@ } case _LOAD_LOCALS: { - JitOptSymbol *locals; + JitOptRef locals; locals = sym_new_not_null(ctx); stack_pointer[0] = locals; stack_pointer += 1; @@ -921,7 +957,7 @@ /* _LOAD_FROM_DICT_OR_GLOBALS is not a viable micro-op for tier 2 */ case _LOAD_NAME: { - JitOptSymbol *v; + JitOptRef v; v = sym_new_not_null(ctx); stack_pointer[0] = v; stack_pointer += 1; @@ -930,7 +966,7 @@ } case _LOAD_GLOBAL: { - JitOptSymbol **res; + JitOptRef *res; res = &stack_pointer[0]; res[0] = sym_new_not_null(ctx); stack_pointer += 1; @@ -939,7 +975,7 @@ } case _PUSH_NULL_CONDITIONAL: { - JitOptSymbol **null; + JitOptRef *null; null = &stack_pointer[0]; if (oparg & 1) { REPLACE_OP(this_instr, _PUSH_NULL, 0, 0); @@ -958,7 +994,7 @@ } case _LOAD_GLOBAL_MODULE: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -967,7 +1003,7 @@ } case _LOAD_GLOBAL_BUILTINS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -988,14 +1024,14 @@ } case _LOAD_FROM_DICT_OR_DEREF: { - JitOptSymbol *value; + JitOptRef value; value = sym_new_not_null(ctx); stack_pointer[-1] = value; break; } case _LOAD_DEREF: { - JitOptSymbol *value; + JitOptRef value; value = sym_new_not_null(ctx); stack_pointer[0] = value; stack_pointer += 1; @@ -1014,7 +1050,7 @@ } case _BUILD_STRING: { - JitOptSymbol *str; + JitOptRef str; str = sym_new_type(ctx, &PyUnicode_Type); stack_pointer[-oparg] = str; stack_pointer += 1 - oparg; @@ -1023,7 +1059,7 @@ } case _BUILD_INTERPOLATION: { - JitOptSymbol *interpolation; + JitOptRef interpolation; interpolation = sym_new_not_null(ctx); stack_pointer[-2 - (oparg & 1)] = interpolation; stack_pointer += -1 - (oparg & 1); @@ -1032,7 +1068,7 @@ } case _BUILD_TEMPLATE: { - JitOptSymbol *template; + JitOptRef template; template = sym_new_not_null(ctx); stack_pointer[-2] = template; stack_pointer += -1; @@ -1041,8 +1077,8 @@ } case _BUILD_TUPLE: { - JitOptSymbol **values; - JitOptSymbol *tup; + JitOptRef *values; + JitOptRef tup; values = &stack_pointer[-oparg]; tup = sym_new_tuple(ctx, oparg, values); stack_pointer[-oparg] = tup; @@ -1052,7 +1088,7 @@ } case _BUILD_LIST: { - JitOptSymbol *list; + JitOptRef list; list = sym_new_type(ctx, &PyList_Type); stack_pointer[-oparg] = list; stack_pointer += 1 - oparg; @@ -1073,7 +1109,7 @@ } case _BUILD_SET: { - JitOptSymbol *set; + JitOptRef set; set = sym_new_type(ctx, &PySet_Type); stack_pointer[-oparg] = set; stack_pointer += 1 - oparg; @@ -1082,7 +1118,7 @@ } case _BUILD_MAP: { - JitOptSymbol *map; + JitOptRef map; map = sym_new_type(ctx, &PyDict_Type); stack_pointer[-oparg*2] = map; stack_pointer += 1 - oparg*2; @@ -1113,7 +1149,7 @@ } case _LOAD_SUPER_ATTR_ATTR: { - JitOptSymbol *attr_st; + JitOptRef attr_st; attr_st = sym_new_not_null(ctx); stack_pointer[-3] = attr_st; stack_pointer += -2; @@ -1122,8 +1158,8 @@ } case _LOAD_SUPER_ATTR_METHOD: { - JitOptSymbol *attr; - JitOptSymbol *self_or_null; + JitOptRef attr; + JitOptRef self_or_null; attr = sym_new_not_null(ctx); self_or_null = sym_new_not_null(ctx); stack_pointer[-3] = attr; @@ -1134,9 +1170,9 @@ } case _LOAD_ATTR: { - JitOptSymbol *owner; - JitOptSymbol *attr; - JitOptSymbol **self_or_null; + JitOptRef owner; + JitOptRef attr; + JitOptRef *self_or_null; owner = stack_pointer[-1]; self_or_null = &stack_pointer[0]; (void)owner; @@ -1151,7 +1187,7 @@ } case _GUARD_TYPE_VERSION: { - JitOptSymbol *owner; + JitOptRef owner; owner = stack_pointer[-1]; uint32_t type_version = (uint32_t)this_instr->operand0; assert(type_version); @@ -1178,7 +1214,7 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { - JitOptSymbol *attr; + JitOptRef attr; uint16_t offset = (uint16_t)this_instr->operand0; attr = sym_new_not_null(ctx); (void)offset; @@ -1187,14 +1223,14 @@ } case _LOAD_ATTR_MODULE: { - JitOptSymbol *owner; - JitOptSymbol *attr; + JitOptRef owner; + JitOptRef attr; owner = stack_pointer[-1]; uint32_t dict_version = (uint32_t)this_instr->operand0; uint16_t index = (uint16_t)this_instr->operand0; (void)dict_version; (void)index; - attr = NULL; + attr = PyJitRef_NULL; if (sym_is_const(ctx, owner)) { PyModuleObject *mod = (PyModuleObject *)sym_get_const(ctx, owner); if (PyModule_CheckExact(mod)) { @@ -1209,7 +1245,7 @@ } } } - if (attr == NULL) { + if (PyJitRef_IsNull(attr)) { attr = sym_new_not_null(ctx); } stack_pointer[-1] = attr; @@ -1217,7 +1253,7 @@ } case _LOAD_ATTR_WITH_HINT: { - JitOptSymbol *attr; + JitOptRef attr; uint16_t hint = (uint16_t)this_instr->operand0; attr = sym_new_not_null(ctx); (void)hint; @@ -1226,7 +1262,7 @@ } case _LOAD_ATTR_SLOT: { - JitOptSymbol *attr; + JitOptRef attr; uint16_t index = (uint16_t)this_instr->operand0; attr = sym_new_not_null(ctx); (void)index; @@ -1235,7 +1271,7 @@ } case _CHECK_ATTR_CLASS: { - JitOptSymbol *owner; + JitOptRef owner; owner = stack_pointer[-1]; uint32_t type_version = (uint32_t)this_instr->operand0; PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version); @@ -1251,8 +1287,8 @@ } case _LOAD_ATTR_CLASS: { - JitOptSymbol *owner; - JitOptSymbol *attr; + JitOptRef owner; + JitOptRef attr; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; @@ -1266,10 +1302,10 @@ } case _LOAD_ATTR_PROPERTY_FRAME: { - JitOptSymbol *new_frame; + JitOptRef new_frame; PyObject *fget = (PyObject *)this_instr->operand0; (void)fget; - new_frame = NULL; + new_frame = PyJitRef_NULL; ctx->done = true; stack_pointer[-1] = new_frame; break; @@ -1300,7 +1336,7 @@ } case _COMPARE_OP: { - JitOptSymbol *res; + JitOptRef res; if (oparg & 16) { res = sym_new_type(ctx, &PyBool_Type); } @@ -1314,7 +1350,7 @@ } case _COMPARE_OP_FLOAT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -1323,9 +1359,9 @@ } case _COMPARE_OP_INT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { @@ -1355,7 +1391,7 @@ } case _COMPARE_OP_STR: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -1364,7 +1400,7 @@ } case _IS_OP: { - JitOptSymbol *b; + JitOptRef b; b = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = b; stack_pointer += -1; @@ -1373,7 +1409,7 @@ } case _CONTAINS_OP: { - JitOptSymbol *b; + JitOptRef b; b = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = b; stack_pointer += -1; @@ -1382,7 +1418,7 @@ } case _GUARD_TOS_ANY_SET: { - JitOptSymbol *tos; + JitOptRef tos; tos = stack_pointer[-1]; if (sym_matches_type(tos, &PySet_Type) || sym_matches_type(tos, &PyFrozenSet_Type)) @@ -1393,7 +1429,7 @@ } case _CONTAINS_OP_SET: { - JitOptSymbol *b; + JitOptRef b; b = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = b; stack_pointer += -1; @@ -1402,7 +1438,7 @@ } case _CONTAINS_OP_DICT: { - JitOptSymbol *b; + JitOptRef b; b = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = b; stack_pointer += -1; @@ -1411,8 +1447,8 @@ } case _CHECK_EG_MATCH: { - JitOptSymbol *rest; - JitOptSymbol *match; + JitOptRef rest; + JitOptRef match; rest = sym_new_not_null(ctx); match = sym_new_not_null(ctx); stack_pointer[-2] = rest; @@ -1421,14 +1457,14 @@ } case _CHECK_EXC_MATCH: { - JitOptSymbol *b; + JitOptRef b; b = sym_new_not_null(ctx); stack_pointer[-1] = b; break; } case _IMPORT_NAME: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -1437,7 +1473,7 @@ } case _IMPORT_FROM: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1450,15 +1486,15 @@ /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ case _IS_NONE: { - JitOptSymbol *b; + JitOptRef b; b = sym_new_not_null(ctx); stack_pointer[-1] = b; break; } case _GET_LEN: { - JitOptSymbol *obj; - JitOptSymbol *len; + JitOptRef obj; + JitOptRef len; obj = stack_pointer[-1]; int tuple_length = sym_tuple_length(obj); if (tuple_length == -1) { @@ -1487,7 +1523,7 @@ } case _MATCH_CLASS: { - JitOptSymbol *attrs; + JitOptRef attrs; attrs = sym_new_not_null(ctx); stack_pointer[-3] = attrs; stack_pointer += -2; @@ -1496,7 +1532,7 @@ } case _MATCH_MAPPING: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1505,7 +1541,7 @@ } case _MATCH_SEQUENCE: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1514,7 +1550,7 @@ } case _MATCH_KEYS: { - JitOptSymbol *values_or_none; + JitOptRef values_or_none; values_or_none = sym_new_not_null(ctx); stack_pointer[0] = values_or_none; stack_pointer += 1; @@ -1523,9 +1559,9 @@ } case _GET_ITER: { - JitOptSymbol *iterable; - JitOptSymbol *iter; - JitOptSymbol *index_or_null; + JitOptRef iterable; + JitOptRef iter; + JitOptRef index_or_null; iterable = stack_pointer[-1]; if (sym_matches_type(iterable, &PyTuple_Type) || sym_matches_type(iterable, &PyList_Type)) { iter = iterable; @@ -1543,7 +1579,7 @@ } case _GET_YIELD_FROM_ITER: { - JitOptSymbol *iter; + JitOptRef iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; @@ -1552,7 +1588,7 @@ /* _FOR_ITER is not a viable micro-op for tier 2 */ case _FOR_ITER_TIER_TWO: { - JitOptSymbol *next; + JitOptRef next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1575,7 +1611,7 @@ /* _ITER_NEXT_LIST is not a viable micro-op for tier 2 */ case _ITER_NEXT_LIST_TIER_TWO: { - JitOptSymbol *next; + JitOptRef next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1584,7 +1620,7 @@ } case _ITER_CHECK_TUPLE: { - JitOptSymbol *iter; + JitOptRef iter; iter = stack_pointer[-2]; if (sym_matches_type(iter, &PyTuple_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -1600,7 +1636,7 @@ } case _ITER_NEXT_TUPLE: { - JitOptSymbol *next; + JitOptRef next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1619,7 +1655,7 @@ } case _ITER_NEXT_RANGE: { - JitOptSymbol *next; + JitOptRef next; next = sym_new_type(ctx, &PyLong_Type); stack_pointer[0] = next; stack_pointer += 1; @@ -1628,8 +1664,8 @@ } case _FOR_ITER_GEN_FRAME: { - JitOptSymbol *gen_frame; - gen_frame = NULL; + JitOptRef gen_frame; + gen_frame = PyJitRef_NULL; ctx->done = true; stack_pointer[0] = gen_frame; stack_pointer += 1; @@ -1638,8 +1674,8 @@ } case _INSERT_NULL: { - JitOptSymbol *self; - JitOptSymbol **method_and_self; + JitOptRef self; + JitOptRef *method_and_self; self = stack_pointer[-1]; method_and_self = &stack_pointer[-1]; method_and_self[0] = sym_new_null(ctx); @@ -1650,7 +1686,7 @@ } case _LOAD_SPECIAL: { - JitOptSymbol **method_and_self; + JitOptRef *method_and_self; method_and_self = &stack_pointer[-2]; method_and_self[0] = sym_new_not_null(ctx); method_and_self[1] = sym_new_unknown(ctx); @@ -1658,7 +1694,7 @@ } case _WITH_EXCEPT_START: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1667,8 +1703,8 @@ } case _PUSH_EXC_INFO: { - JitOptSymbol *prev_exc; - JitOptSymbol *new_exc; + JitOptRef prev_exc; + JitOptRef new_exc; prev_exc = sym_new_not_null(ctx); new_exc = sym_new_not_null(ctx); stack_pointer[-1] = prev_exc; @@ -1687,9 +1723,9 @@ } case _LOAD_ATTR_METHOD_WITH_VALUES: { - JitOptSymbol *owner; - JitOptSymbol *attr; - JitOptSymbol *self; + JitOptRef owner; + JitOptRef attr; + JitOptRef self; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; @@ -1707,9 +1743,9 @@ } case _LOAD_ATTR_METHOD_NO_DICT: { - JitOptSymbol *owner; - JitOptSymbol *attr; - JitOptSymbol *self; + JitOptRef owner; + JitOptRef attr; + JitOptRef self; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; @@ -1727,8 +1763,8 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - JitOptSymbol *owner; - JitOptSymbol *attr; + JitOptRef owner; + JitOptRef attr; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; @@ -1742,8 +1778,8 @@ } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - JitOptSymbol *owner; - JitOptSymbol *attr; + JitOptRef owner; + JitOptRef attr; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; @@ -1761,9 +1797,9 @@ } case _LOAD_ATTR_METHOD_LAZY_DICT: { - JitOptSymbol *owner; - JitOptSymbol *attr; - JitOptSymbol *self; + JitOptRef owner; + JitOptRef attr; + JitOptRef self; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; @@ -1781,9 +1817,9 @@ } case _MAYBE_EXPAND_METHOD: { - JitOptSymbol **args; - JitOptSymbol *self_or_null; - JitOptSymbol *callable; + JitOptRef *args; + JitOptRef self_or_null; + JitOptRef callable; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -1800,7 +1836,7 @@ /* _MONITOR_CALL is not a viable micro-op for tier 2 */ case _PY_FRAME_GENERAL: { - JitOptSymbol *new_frame; + JitOptRef new_frame; PyCodeObject *co = NULL; assert((this_instr + 2)->opcode == _PUSH_FRAME); co = get_code_with_logging((this_instr + 2)); @@ -1808,7 +1844,7 @@ ctx->done = true; break; } - new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -1816,7 +1852,7 @@ } case _CHECK_FUNCTION_VERSION: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)this_instr->operand0; if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyFunction_Type)) { @@ -1833,7 +1869,7 @@ } case _CHECK_METHOD_VERSION: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)this_instr->operand0; if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyMethod_Type)) { @@ -1855,7 +1891,7 @@ } case _CALL_NON_PY_GENERAL: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1864,8 +1900,8 @@ } case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { - JitOptSymbol *null; - JitOptSymbol *callable; + JitOptRef null; + JitOptRef callable; null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; sym_set_null(null); @@ -1874,8 +1910,8 @@ } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - JitOptSymbol *self_or_null; - JitOptSymbol *callable; + JitOptRef self_or_null; + JitOptRef callable; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; callable = sym_new_not_null(ctx); @@ -1893,8 +1929,8 @@ } case _CHECK_FUNCTION_EXACT_ARGS: { - JitOptSymbol *self_or_null; - JitOptSymbol *callable; + JitOptRef self_or_null; + JitOptRef callable; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; assert(sym_matches_type(callable, &PyFunction_Type)); @@ -1921,9 +1957,9 @@ } case _INIT_CALL_PY_EXACT_ARGS: { - JitOptSymbol **args; - JitOptSymbol *self_or_null; - JitOptSymbol *new_frame; + JitOptRef *args; + JitOptRef self_or_null; + JitOptRef new_frame; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; int argcount = oparg; @@ -1934,16 +1970,16 @@ ctx->done = true; break; } - assert(self_or_null != NULL); + assert(!PyJitRef_IsNull(self_or_null)); assert(args != NULL); if (sym_is_not_null(self_or_null)) { args--; argcount++; } if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, args, argcount); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args, argcount)); } else { - new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); } stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; @@ -1952,12 +1988,12 @@ } case _PUSH_FRAME: { - JitOptSymbol *new_frame; + JitOptRef new_frame; new_frame = stack_pointer[-1]; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; - ctx->frame = (_Py_UOpsAbstractFrame *)new_frame; + ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame); ctx->curr_frame_depth++; stack_pointer = ctx->frame->stack_pointer; co = get_code(this_instr); @@ -1984,7 +2020,7 @@ } case _GUARD_NOS_NULL: { - JitOptSymbol *null; + JitOptRef null; null = stack_pointer[-2]; if (sym_is_null(null)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -1994,7 +2030,7 @@ } case _GUARD_NOS_NOT_NULL: { - JitOptSymbol *nos; + JitOptRef nos; nos = stack_pointer[-2]; if (sym_is_not_null(nos)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -2004,7 +2040,7 @@ } case _GUARD_THIRD_NULL: { - JitOptSymbol *null; + JitOptRef null; null = stack_pointer[-3]; if (sym_is_null(null)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -2014,7 +2050,7 @@ } case _GUARD_CALLABLE_TYPE_1: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-3]; if (sym_get_const(ctx, callable) == (PyObject *)&PyType_Type) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -2024,8 +2060,8 @@ } case _CALL_TYPE_1: { - JitOptSymbol *arg; - JitOptSymbol *res; + JitOptRef arg; + JitOptRef res; arg = stack_pointer[-1]; PyObject* type = (PyObject *)sym_get_type(arg); if (type) { @@ -2043,7 +2079,7 @@ } case _GUARD_CALLABLE_STR_1: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-3]; if (sym_get_const(ctx, callable) == (PyObject *)&PyUnicode_Type) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -2053,8 +2089,8 @@ } case _CALL_STR_1: { - JitOptSymbol *arg; - JitOptSymbol *res; + JitOptRef arg; + JitOptRef res; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyUnicode_Type)) { res = arg; @@ -2069,7 +2105,7 @@ } case _GUARD_CALLABLE_TUPLE_1: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-3]; if (sym_get_const(ctx, callable) == (PyObject *)&PyTuple_Type) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -2079,8 +2115,8 @@ } case _CALL_TUPLE_1: { - JitOptSymbol *arg; - JitOptSymbol *res; + JitOptRef arg; + JitOptRef res; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyTuple_Type)) { res = arg; @@ -2095,9 +2131,9 @@ } case _CHECK_AND_ALLOCATE_OBJECT: { - JitOptSymbol **args; - JitOptSymbol *self_or_null; - JitOptSymbol *callable; + JitOptRef *args; + JitOptRef self_or_null; + JitOptRef callable; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -2112,8 +2148,8 @@ } case _CREATE_INIT_FRAME: { - JitOptSymbol *init_frame; - init_frame = NULL; + JitOptRef init_frame; + init_frame = PyJitRef_NULL; ctx->done = true; stack_pointer[-2 - oparg] = init_frame; stack_pointer += -1 - oparg; @@ -2128,7 +2164,7 @@ } case _CALL_BUILTIN_CLASS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2137,7 +2173,7 @@ } case _CALL_BUILTIN_O: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2146,7 +2182,7 @@ } case _CALL_BUILTIN_FAST: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2155,7 +2191,7 @@ } case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2164,7 +2200,7 @@ } case _GUARD_CALLABLE_LEN: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-3]; PyObject *len = _PyInterpreterState_GET()->callable_cache.len; if (sym_get_const(ctx, callable) == len) { @@ -2175,7 +2211,7 @@ } case _CALL_LEN: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyLong_Type); stack_pointer[-3] = res; stack_pointer += -2; @@ -2184,7 +2220,7 @@ } case _GUARD_CALLABLE_ISINSTANCE: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-4]; PyObject *isinstance = _PyInterpreterState_GET()->callable_cache.isinstance; if (sym_get_const(ctx, callable) == isinstance) { @@ -2195,9 +2231,9 @@ } case _CALL_ISINSTANCE: { - JitOptSymbol *cls; - JitOptSymbol *instance; - JitOptSymbol *res; + JitOptRef cls; + JitOptRef instance; + JitOptRef res; cls = stack_pointer[-1]; instance = stack_pointer[-2]; res = sym_new_type(ctx, &PyBool_Type); @@ -2218,7 +2254,7 @@ } case _GUARD_CALLABLE_LIST_APPEND: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-3]; PyObject *list_append = _PyInterpreterState_GET()->callable_cache.list_append; if (sym_get_const(ctx, callable) == list_append) { @@ -2235,7 +2271,7 @@ } case _CALL_METHOD_DESCRIPTOR_O: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2244,7 +2280,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2253,7 +2289,7 @@ } case _CALL_METHOD_DESCRIPTOR_NOARGS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2262,7 +2298,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2279,8 +2315,8 @@ /* _DO_CALL_KW is not a viable micro-op for tier 2 */ case _PY_FRAME_KW: { - JitOptSymbol *new_frame; - new_frame = NULL; + JitOptRef new_frame; + new_frame = PyJitRef_NULL; ctx->done = true; stack_pointer[-3 - oparg] = new_frame; stack_pointer += -2 - oparg; @@ -2305,7 +2341,7 @@ } case _CALL_KW_NON_PY: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-3 - oparg] = res; stack_pointer += -2 - oparg; @@ -2320,14 +2356,14 @@ /* _DO_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ case _MAKE_FUNCTION: { - JitOptSymbol *func; + JitOptRef func; func = sym_new_not_null(ctx); stack_pointer[-1] = func; break; } case _SET_FUNCTION_ATTRIBUTE: { - JitOptSymbol *func_out; + JitOptRef func_out; func_out = sym_new_not_null(ctx); stack_pointer[-2] = func_out; stack_pointer += -1; @@ -2336,7 +2372,7 @@ } case _RETURN_GENERATOR: { - JitOptSymbol *res; + JitOptRef res; ctx->frame->stack_pointer = stack_pointer; frame_pop(ctx); stack_pointer = ctx->frame->stack_pointer; @@ -2358,7 +2394,7 @@ } case _BUILD_SLICE: { - JitOptSymbol *slice; + JitOptRef slice; slice = sym_new_type(ctx, &PySlice_Type); stack_pointer[-oparg] = slice; stack_pointer += 1 - oparg; @@ -2367,21 +2403,21 @@ } case _CONVERT_VALUE: { - JitOptSymbol *result; + JitOptRef result; result = sym_new_not_null(ctx); stack_pointer[-1] = result; break; } case _FORMAT_SIMPLE: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -2390,8 +2426,8 @@ } case _COPY: { - JitOptSymbol *bottom; - JitOptSymbol *top; + JitOptRef bottom; + JitOptRef top; bottom = stack_pointer[-1 - (oparg-1)]; assert(oparg > 0); top = bottom; @@ -2402,9 +2438,9 @@ } case _BINARY_OP: { - JitOptSymbol *rhs; - JitOptSymbol *lhs; - JitOptSymbol *res; + JitOptRef rhs; + JitOptRef lhs; + JitOptRef res; rhs = stack_pointer[-1]; lhs = stack_pointer[-2]; bool lhs_int = sym_matches_type(lhs, &PyLong_Type); @@ -2447,11 +2483,11 @@ } case _SWAP: { - JitOptSymbol *top; - JitOptSymbol *bottom; + JitOptRef top; + JitOptRef bottom; top = stack_pointer[-1]; bottom = stack_pointer[-2 - (oparg-2)]; - JitOptSymbol *temp = bottom; + JitOptRef temp = bottom; bottom = top; top = temp; assert(oparg >= 2); @@ -2479,7 +2515,7 @@ /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ case _GUARD_IS_TRUE_POP: { - JitOptSymbol *flag; + JitOptRef flag; flag = stack_pointer[-1]; if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); @@ -2493,7 +2529,7 @@ } case _GUARD_IS_FALSE_POP: { - JitOptSymbol *flag; + JitOptRef flag; flag = stack_pointer[-1]; if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); @@ -2507,7 +2543,7 @@ } case _GUARD_IS_NONE_POP: { - JitOptSymbol *val; + JitOptRef val; val = stack_pointer[-1]; if (sym_is_const(ctx, val)) { PyObject *value = sym_get_const(ctx, val); @@ -2525,7 +2561,7 @@ } case _GUARD_IS_NOT_NONE_POP: { - JitOptSymbol *val; + JitOptRef val; val = stack_pointer[-1]; if (sym_is_const(ctx, val)) { PyObject *value = sym_get_const(ctx, val); @@ -2573,9 +2609,9 @@ } case _LOAD_CONST_INLINE: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -2583,17 +2619,17 @@ } case _POP_TOP_LOAD_CONST_INLINE: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); stack_pointer[-1] = value; break; } case _LOAD_CONST_INLINE_BORROW: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -2619,15 +2655,15 @@ } case _POP_TOP_LOAD_CONST_INLINE_BORROW: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); stack_pointer[-1] = value; break; } case _POP_TWO_LOAD_CONST_INLINE_BORROW: { - JitOptSymbol *value; + JitOptRef value; value = sym_new_not_null(ctx); stack_pointer[-2] = value; stack_pointer += -1; @@ -2636,9 +2672,9 @@ } case _POP_CALL_LOAD_CONST_INLINE_BORROW: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); stack_pointer[-2] = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -2646,9 +2682,9 @@ } case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); stack_pointer[-3] = value; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); @@ -2656,9 +2692,9 @@ } case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); stack_pointer[-4] = value; stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); @@ -2666,8 +2702,8 @@ } case _LOAD_CONST_UNDER_INLINE: { - JitOptSymbol *value; - JitOptSymbol *new; + JitOptRef value; + JitOptRef new; value = sym_new_not_null(ctx); new = sym_new_not_null(ctx); stack_pointer[-1] = value; @@ -2678,8 +2714,8 @@ } case _LOAD_CONST_UNDER_INLINE_BORROW: { - JitOptSymbol *value; - JitOptSymbol *new; + JitOptRef value; + JitOptRef new; value = sym_new_not_null(ctx); new = sym_new_not_null(ctx); stack_pointer[-1] = value; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 25de5d83166f64..838f25ad288ab0 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -88,6 +88,12 @@ out_of_space(JitOptContext *ctx) return &NO_SPACE_SYMBOL; } +JitOptRef +out_of_space_ref(JitOptContext *ctx) +{ + return PyJitRef_Wrap(out_of_space(ctx)); +} + static JitOptSymbol * sym_new(JitOptContext *ctx) { @@ -98,7 +104,7 @@ sym_new(JitOptContext *ctx) return NULL; } ctx->t_arena.ty_curr_number++; - self->tag = JIT_SYM_UNKNOWN_TAG; + self->tag = JIT_SYM_UNKNOWN_TAG;; return self; } @@ -117,25 +123,28 @@ sym_set_bottom(JitOptContext *ctx, JitOptSymbol *sym) } bool -_Py_uop_sym_is_bottom(JitOptSymbol *sym) +_Py_uop_sym_is_bottom(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); return sym->tag == JIT_SYM_BOTTOM_TAG; } bool -_Py_uop_sym_is_not_null(JitOptSymbol *sym) { +_Py_uop_sym_is_not_null(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); return sym->tag == JIT_SYM_NON_NULL_TAG || sym->tag > JIT_SYM_BOTTOM_TAG; } bool -_Py_uop_sym_is_const(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { return true; } if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value; - int truthiness = _Py_uop_sym_truthiness(ctx, value); + int truthiness = _Py_uop_sym_truthiness(ctx, PyJitRef_Wrap(value)); if (truthiness < 0) { return false; } @@ -146,21 +155,22 @@ _Py_uop_sym_is_const(JitOptContext *ctx, JitOptSymbol *sym) } bool -_Py_uop_sym_is_null(JitOptSymbol *sym) +_Py_uop_sym_is_null(JitOptRef ref) { - return sym->tag == JIT_SYM_NULL_TAG; + return PyJitRef_Unwrap(ref)->tag == JIT_SYM_NULL_TAG; } PyObject * -_Py_uop_sym_get_const(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { return sym->value.value; } if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value; - int truthiness = _Py_uop_sym_truthiness(ctx, value); + int truthiness = _Py_uop_sym_truthiness(ctx, PyJitRef_Wrap(value)); if (truthiness < 0) { return NULL; } @@ -172,8 +182,9 @@ _Py_uop_sym_get_const(JitOptContext *ctx, JitOptSymbol *sym) } void -_Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ) +_Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -222,11 +233,12 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ) } bool -_Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version) +_Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef ref, unsigned int version) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); PyTypeObject *type = _PyType_LookupByVersion(version); if (type) { - _Py_uop_sym_set_type(ctx, sym, type); + _Py_uop_sym_set_type(ctx, ref, type); } JitSymType tag = sym->tag; switch(tag) { @@ -279,8 +291,9 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int } void -_Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val) +_Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef ref, PyObject *const_val) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -301,10 +314,10 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val return; case JIT_SYM_TUPLE_TAG: if (PyTuple_CheckExact(const_val)) { - Py_ssize_t len = _Py_uop_sym_tuple_length(sym); + Py_ssize_t len = _Py_uop_sym_tuple_length(ref); if (len == PyTuple_GET_SIZE(const_val)) { for (Py_ssize_t i = 0; i < len; i++) { - JitOptSymbol *sym_item = _Py_uop_sym_tuple_getitem(ctx, sym, i); + JitOptRef sym_item = _Py_uop_sym_tuple_getitem(ctx, ref, i); PyObject *item = PyTuple_GET_ITEM(const_val, i); _Py_uop_sym_set_const(ctx, sym_item, item); } @@ -329,13 +342,14 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val return; case JIT_SYM_TRUTHINESS_TAG: if (!PyBool_Check(const_val) || - (_Py_uop_sym_is_const(ctx, sym) && - _Py_uop_sym_get_const(ctx, sym) != const_val)) + (_Py_uop_sym_is_const(ctx, ref) && + _Py_uop_sym_get_const(ctx, ref) != const_val)) { sym_set_bottom(ctx, sym); return; } - JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value; + JitOptRef value = PyJitRef_Wrap( + allocation_base(ctx) + sym->truthiness.value); PyTypeObject *type = _Py_uop_sym_get_type(value); if (const_val == (sym->truthiness.invert ? Py_False : Py_True)) { // value is truthy. This is only useful for bool: @@ -360,8 +374,9 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val } void -_Py_uop_sym_set_null(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_set_null(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_UNKNOWN_TAG) { sym->tag = JIT_SYM_NULL_TAG; } @@ -371,8 +386,9 @@ _Py_uop_sym_set_null(JitOptContext *ctx, JitOptSymbol *sym) } void -_Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_UNKNOWN_TAG) { sym->tag = JIT_SYM_NON_NULL_TAG; } @@ -381,66 +397,69 @@ _Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptSymbol *sym) } } - -JitOptSymbol * +JitOptRef _Py_uop_sym_new_unknown(JitOptContext *ctx) { JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - return res; + return PyJitRef_Wrap(res); } -JitOptSymbol * +JitOptRef _Py_uop_sym_new_not_null(JitOptContext *ctx) { JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } res->tag = JIT_SYM_NON_NULL_TAG; - return res; + return PyJitRef_Wrap(res); } -JitOptSymbol * +JitOptRef _Py_uop_sym_new_type(JitOptContext *ctx, PyTypeObject *typ) { JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - _Py_uop_sym_set_type(ctx, res, typ); - return res; + JitOptRef ref = PyJitRef_Wrap(res); + _Py_uop_sym_set_type(ctx, ref, typ); + return ref; } // Adds a new reference to const_val, owned by the symbol. -JitOptSymbol * +JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val) { assert(const_val != NULL); JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - _Py_uop_sym_set_const(ctx, res, const_val); - return res; + JitOptRef ref = PyJitRef_Wrap(res); + _Py_uop_sym_set_const(ctx, ref, const_val); + return ref; } -JitOptSymbol * +JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx) { JitOptSymbol *null_sym = sym_new(ctx); if (null_sym == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - _Py_uop_sym_set_null(ctx, null_sym); - return null_sym; + JitOptRef ref = PyJitRef_Wrap(null_sym); + _Py_uop_sym_set_null(ctx, ref); + return ref; } PyTypeObject * -_Py_uop_sym_get_type(JitOptSymbol *sym) +_Py_uop_sym_get_type(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -463,8 +482,9 @@ _Py_uop_sym_get_type(JitOptSymbol *sym) } unsigned int -_Py_uop_sym_get_type_version(JitOptSymbol *sym) +_Py_uop_sym_get_type_version(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -487,27 +507,28 @@ _Py_uop_sym_get_type_version(JitOptSymbol *sym) } bool -_Py_uop_sym_has_type(JitOptSymbol *sym) +_Py_uop_sym_has_type(JitOptRef sym) { return _Py_uop_sym_get_type(sym) != NULL; } bool -_Py_uop_sym_matches_type(JitOptSymbol *sym, PyTypeObject *typ) +_Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ) { assert(typ != NULL && PyType_Check(typ)); return _Py_uop_sym_get_type(sym) == typ; } bool -_Py_uop_sym_matches_type_version(JitOptSymbol *sym, unsigned int version) +_Py_uop_sym_matches_type_version(JitOptRef sym, unsigned int version) { return _Py_uop_sym_get_type_version(sym) == version; } int -_Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_truthiness(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); switch(sym->tag) { case JIT_SYM_NULL_TAG: case JIT_SYM_TYPE_VERSION_TAG: @@ -527,7 +548,8 @@ _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym) case JIT_SYM_TRUTHINESS_TAG: ; JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value; - int truthiness = _Py_uop_sym_truthiness(ctx, value); + int truthiness = _Py_uop_sym_truthiness(ctx, + PyJitRef_Wrap(value)); if (truthiness < 0) { return truthiness; } @@ -553,12 +575,12 @@ _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym) return -1; } -JitOptSymbol * -_Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptSymbol **args) +JitOptRef +_Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptRef *args) { JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } if (size > MAX_SYMBOLIC_TUPLE_SIZE) { res->tag = JIT_SYM_KNOWN_CLASS_TAG; @@ -568,15 +590,16 @@ _Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptSymbol **args) res->tag = JIT_SYM_TUPLE_TAG; res->tuple.length = size; for (int i = 0; i < size; i++) { - res->tuple.items[i] = (uint16_t)(args[i] - allocation_base(ctx)); + res->tuple.items[i] = (uint16_t)(PyJitRef_Unwrap(args[i]) - allocation_base(ctx)); } } - return res; + return PyJitRef_Wrap(res); } -JitOptSymbol * -_Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item) +JitOptRef +_Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptRef ref, int item) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); assert(item >= 0); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { PyObject *tuple = sym->value.value; @@ -585,14 +608,15 @@ _Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item) } } else if (sym->tag == JIT_SYM_TUPLE_TAG && item < sym->tuple.length) { - return allocation_base(ctx) + sym->tuple.items[item]; + return PyJitRef_Wrap(allocation_base(ctx) + sym->tuple.items[item]); } return _Py_uop_sym_new_not_null(ctx); } int -_Py_uop_sym_tuple_length(JitOptSymbol *sym) +_Py_uop_sym_tuple_length(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { PyObject *tuple = sym->value.value; if (PyTuple_CheckExact(tuple)) { @@ -607,7 +631,7 @@ _Py_uop_sym_tuple_length(JitOptSymbol *sym) // Return true if known to be immortal. bool -_Py_uop_sym_is_immortal(JitOptSymbol *sym) +_Py_uop_symbol_is_immortal(JitOptSymbol *sym) { if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { return _Py_IsImmortal(sym->value.value); @@ -621,19 +645,27 @@ _Py_uop_sym_is_immortal(JitOptSymbol *sym) return false; } -JitOptSymbol * -_Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptSymbol *value, bool truthy) +bool +_Py_uop_sym_is_immortal(JitOptRef ref) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + return _Py_uop_symbol_is_immortal(sym); +} + +JitOptRef +_Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef ref, bool truthy) { + JitOptSymbol *value = PyJitRef_Unwrap(ref); // It's clearer to invert this in the signature: bool invert = !truthy; if (value->tag == JIT_SYM_TRUTHINESS_TAG && value->truthiness.invert == invert) { - return value; + return ref; } JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - int truthiness = _Py_uop_sym_truthiness(ctx, value); + int truthiness = _Py_uop_sym_truthiness(ctx, ref); if (truthiness < 0) { res->tag = JIT_SYM_TRUTHINESS_TAG; res->truthiness.invert = invert; @@ -642,7 +674,7 @@ _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptSymbol *value, bool truthy) else { make_const(res, (truthiness ^ invert) ? Py_True : Py_False); } - return res; + return PyJitRef_Wrap(res); } // 0 on success, -1 on error. @@ -651,7 +683,7 @@ _Py_uop_frame_new( JitOptContext *ctx, PyCodeObject *co, int curr_stackentries, - JitOptSymbol **args, + JitOptRef *args, int arg_len) { assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); @@ -676,14 +708,14 @@ _Py_uop_frame_new( } for (int i = arg_len; i < co->co_nlocalsplus; i++) { - JitOptSymbol *local = _Py_uop_sym_new_unknown(ctx); + JitOptRef local = _Py_uop_sym_new_unknown(ctx); frame->locals[i] = local; } // Initialize the stack as well for (int i = 0; i < curr_stackentries; i++) { - JitOptSymbol *stackvar = _Py_uop_sym_new_unknown(ctx); + JitOptRef stackvar = _Py_uop_sym_new_unknown(ctx); frame->stack[i] = stackvar; } @@ -709,12 +741,12 @@ _Py_uop_abstractcontext_fini(JitOptContext *ctx) void _Py_uop_abstractcontext_init(JitOptContext *ctx) { - static_assert(sizeof(JitOptSymbol) <= 2 * sizeof(uint64_t), "JitOptSymbol has grown"); + static_assert(sizeof(JitOptSymbol) <= 3 * sizeof(uint64_t), "JitOptSymbol has grown"); ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; ctx->n_consumed = ctx->locals_and_stack; #ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { - ctx->locals_and_stack[i] = NULL; + ctx->locals_and_stack[i] = PyJitRef_NULL; } #endif @@ -767,44 +799,44 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) PyObject *tuple = NULL; // Use a single 'sym' variable so copy-pasting tests is easier. - JitOptSymbol *sym = _Py_uop_sym_new_unknown(ctx); - if (sym == NULL) { + JitOptRef ref = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(ref)) { goto fail; } - TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "top is NULL"); - TEST_PREDICATE(!_Py_uop_sym_is_not_null(sym), "top is not NULL"); - TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyLong_Type), "top matches a type"); - TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, sym), "top is a constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "top as constant is not NULL"); - TEST_PREDICATE(!_Py_uop_sym_is_bottom(sym), "top is bottom"); + TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "top is NULL"); + TEST_PREDICATE(!_Py_uop_sym_is_not_null(ref), "top is not NULL"); + TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyLong_Type), "top matches a type"); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, ref), "top is a constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "top as constant is not NULL"); + TEST_PREDICATE(!_Py_uop_sym_is_bottom(ref), "top is bottom"); - sym = make_bottom(ctx); - if (sym == NULL) { + ref = PyJitRef_Wrap(make_bottom(ctx)); + if (PyJitRef_IsNull(ref)) { goto fail; } - TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "bottom is NULL is not false"); - TEST_PREDICATE(!_Py_uop_sym_is_not_null(sym), "bottom is not NULL is not false"); - TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyLong_Type), "bottom matches a type"); - TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, sym), "bottom is a constant is not false"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "bottom as constant is not NULL"); - TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "bottom isn't bottom"); + TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "bottom is NULL is not false"); + TEST_PREDICATE(!_Py_uop_sym_is_not_null(ref), "bottom is not NULL is not false"); + TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyLong_Type), "bottom matches a type"); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, ref), "bottom is a constant is not false"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "bottom as constant is not NULL"); + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "bottom isn't bottom"); - sym = _Py_uop_sym_new_type(ctx, &PyLong_Type); - if (sym == NULL) { + ref = _Py_uop_sym_new_type(ctx, &PyLong_Type); + if (PyJitRef_IsNull(ref)) { goto fail; } - TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "int is NULL"); - TEST_PREDICATE(_Py_uop_sym_is_not_null(sym), "int isn't not NULL"); - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "int isn't int"); - TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyFloat_Type), "int matches float"); - TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, sym), "int is a constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "int as constant is not NULL"); + TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "int is NULL"); + TEST_PREDICATE(_Py_uop_sym_is_not_null(ref), "int isn't not NULL"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "int isn't int"); + TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyFloat_Type), "int matches float"); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, ref), "int is a constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "int as constant is not NULL"); - _Py_uop_sym_set_type(ctx, sym, &PyLong_Type); // Should be a no-op - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "(int and int) isn't int"); + _Py_uop_sym_set_type(ctx, ref, &PyLong_Type); // Should be a no-op + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "(int and int) isn't int"); - _Py_uop_sym_set_type(ctx, sym, &PyFloat_Type); // Should make it bottom - TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(int and float) isn't bottom"); + _Py_uop_sym_set_type(ctx, ref, &PyFloat_Type); // Should make it bottom + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "(int and float) isn't bottom"); val_42 = PyLong_FromLong(42); assert(val_42 != NULL); @@ -814,84 +846,84 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) assert(val_43 != NULL); assert(_Py_IsImmortal(val_43)); - sym = _Py_uop_sym_new_type(ctx, &PyLong_Type); - if (sym == NULL) { + ref = _Py_uop_sym_new_type(ctx, &PyLong_Type); + if (PyJitRef_IsNull(ref)) { goto fail; } - _Py_uop_sym_set_const(ctx, sym, val_42); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 1, "bool(42) is not True"); - TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "42 is NULL"); - TEST_PREDICATE(_Py_uop_sym_is_not_null(sym), "42 isn't not NULL"); - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "42 isn't an int"); - TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyFloat_Type), "42 matches float"); - TEST_PREDICATE(_Py_uop_sym_is_const(ctx, sym), "42 is not a constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) != NULL, "42 as constant is NULL"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == val_42, "42 as constant isn't 42"); - TEST_PREDICATE(_Py_uop_sym_is_immortal(sym), "42 is not immortal"); - - _Py_uop_sym_set_type(ctx, sym, &PyLong_Type); // Should be a no-op - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "(42 and 42) isn't an int"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == val_42, "(42 and 42) as constant isn't 42"); - - _Py_uop_sym_set_type(ctx, sym, &PyFloat_Type); // Should make it bottom - TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(42 and float) isn't bottom"); - - sym = _Py_uop_sym_new_type(ctx, &PyBool_Type); - TEST_PREDICATE(_Py_uop_sym_is_immortal(sym), "a bool is not immortal"); - - sym = _Py_uop_sym_new_type(ctx, &PyLong_Type); - if (sym == NULL) { + _Py_uop_sym_set_const(ctx, ref, val_42); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 1, "bool(42) is not True"); + TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "42 is NULL"); + TEST_PREDICATE(_Py_uop_sym_is_not_null(ref), "42 isn't not NULL"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "42 isn't an int"); + TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyFloat_Type), "42 matches float"); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, ref), "42 is not a constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) != NULL, "42 as constant is NULL"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == val_42, "42 as constant isn't 42"); + TEST_PREDICATE(_Py_uop_sym_is_immortal(ref), "42 is not immortal"); + + _Py_uop_sym_set_type(ctx, ref, &PyLong_Type); // Should be a no-op + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "(42 and 42) isn't an int"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == val_42, "(42 and 42) as constant isn't 42"); + + _Py_uop_sym_set_type(ctx, ref, &PyFloat_Type); // Should make it bottom + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "(42 and float) isn't bottom"); + + ref = _Py_uop_sym_new_type(ctx, &PyBool_Type); + TEST_PREDICATE(_Py_uop_sym_is_immortal(ref), "a bool is not immortal"); + + ref = _Py_uop_sym_new_type(ctx, &PyLong_Type); + if (PyJitRef_IsNull(ref)) { goto fail; } - _Py_uop_sym_set_const(ctx, sym, val_42); - _Py_uop_sym_set_const(ctx, sym, val_43); // Should make it bottom - TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(42 and 43) isn't bottom"); + _Py_uop_sym_set_const(ctx, ref, val_42); + _Py_uop_sym_set_const(ctx, ref, val_43); // Should make it bottom + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "(42 and 43) isn't bottom"); - sym = _Py_uop_sym_new_const(ctx, Py_None); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "bool(None) is not False"); - sym = _Py_uop_sym_new_const(ctx, Py_False); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "bool(False) is not False"); - sym = _Py_uop_sym_new_const(ctx, PyLong_FromLong(0)); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "bool(0) is not False"); + ref = _Py_uop_sym_new_const(ctx, Py_None); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "bool(None) is not False"); + ref = _Py_uop_sym_new_const(ctx, Py_False); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "bool(False) is not False"); + ref = _Py_uop_sym_new_const(ctx, PyLong_FromLong(0)); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "bool(0) is not False"); - JitOptSymbol *i1 = _Py_uop_sym_new_type(ctx, &PyFloat_Type); - JitOptSymbol *i2 = _Py_uop_sym_new_const(ctx, val_43); - JitOptSymbol *array[2] = { i1, i2 }; - sym = _Py_uop_sym_new_tuple(ctx, 2, array); + JitOptRef i1 = _Py_uop_sym_new_type(ctx, &PyFloat_Type); + JitOptRef i2 = _Py_uop_sym_new_const(ctx, val_43); + JitOptRef array[2] = { i1, i2 }; + ref = _Py_uop_sym_new_tuple(ctx, 2, array); TEST_PREDICATE( - _Py_uop_sym_matches_type(_Py_uop_sym_tuple_getitem(ctx, sym, 0), &PyFloat_Type), + _Py_uop_sym_matches_type(_Py_uop_sym_tuple_getitem(ctx, ref, 0), &PyFloat_Type), "tuple item does not match value used to create tuple" ); TEST_PREDICATE( - _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, sym, 1)) == val_43, + _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, ref, 1)) == val_43, "tuple item does not match value used to create tuple" ); PyObject *pair[2] = { val_42, val_43 }; tuple = _PyTuple_FromArray(pair, 2); - sym = _Py_uop_sym_new_const(ctx, tuple); + ref = _Py_uop_sym_new_const(ctx, tuple); TEST_PREDICATE( - _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, sym, 1)) == val_43, + _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, ref, 1)) == val_43, "tuple item does not match value used to create tuple" ); - sym = _Py_uop_sym_new_type(ctx, &PyTuple_Type); + ref = _Py_uop_sym_new_type(ctx, &PyTuple_Type); TEST_PREDICATE( - _Py_uop_sym_is_not_null(_Py_uop_sym_tuple_getitem(ctx, sym, 42)), + _Py_uop_sym_is_not_null(_Py_uop_sym_tuple_getitem(ctx, ref, 42)), "Unknown tuple item is not narrowed to non-NULL" ); - JitOptSymbol *value = _Py_uop_sym_new_type(ctx, &PyBool_Type); - sym = _Py_uop_sym_new_truthiness(ctx, value, false); - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyBool_Type), "truthiness is not boolean"); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == -1, "truthiness is not unknown"); - TEST_PREDICATE(_Py_uop_sym_is_const(ctx, sym) == false, "truthiness is constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "truthiness is not NULL"); + JitOptRef value = _Py_uop_sym_new_type(ctx, &PyBool_Type); + ref = _Py_uop_sym_new_truthiness(ctx, value, false); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyBool_Type), "truthiness is not boolean"); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == -1, "truthiness is not unknown"); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, ref) == false, "truthiness is constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "truthiness is not NULL"); TEST_PREDICATE(_Py_uop_sym_is_const(ctx, value) == false, "value is constant"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, value) == NULL, "value is not NULL"); - _Py_uop_sym_set_const(ctx, sym, Py_False); - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyBool_Type), "truthiness is not boolean"); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "truthiness is not True"); - TEST_PREDICATE(_Py_uop_sym_is_const(ctx, sym) == true, "truthiness is not constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == Py_False, "truthiness is not False"); + _Py_uop_sym_set_const(ctx, ref, Py_False); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyBool_Type), "truthiness is not boolean"); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "truthiness is not True"); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, ref) == true, "truthiness is not constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == Py_False, "truthiness is not False"); TEST_PREDICATE(_Py_uop_sym_is_const(ctx, value) == true, "value is not constant"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, value) == Py_True, "value is not True"); _Py_uop_abstractcontext_fini(ctx); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index c6a9fbcad8891f..2723e7ed7242ea 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -741,7 +741,7 @@ def visit(stmt: Stmt) -> None: continue #if not tkn.text.startswith(("Py", "_Py", "monitor")): # continue - if tkn.text.startswith(("sym_", "optimize_")): + if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")): # Optimize functions continue if tkn.text.endswith("Check"): diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index 75805dbd7f37f4..3b4fe64b02a807 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -72,8 +72,8 @@ def validate_uop(override: Uop, uop: Uop) -> None: def type_name(var: StackItem) -> str: if var.is_array(): - return "JitOptSymbol **" - return "JitOptSymbol *" + return "JitOptRef *" + return "JitOptRef " def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: From 269e19e0a7efc62ee4ffbd8424974dc6d4fbfd5d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 17 Jun 2025 13:16:59 -0600 Subject: [PATCH 590/989] gh-132775: Fix Interpreter.call() __main__ Visibility (gh-135595) As noted in the new tests, there are a few situations we must carefully accommodate for functions that get pickled during interp.call(). We do so by running the script from the main interpreter's __main__ module in a hidden module in the other interpreter. That hidden module is used as the function __globals__. --- Lib/test/test_interpreters/test_api.py | 181 ++++++++++ Modules/_interpretersmodule.c | 1 + Python/crossinterp.c | 444 ++++++++++++------------- Python/pystate.c | 1 + 4 files changed, 400 insertions(+), 227 deletions(-) diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index a409c1a691818e..612e8240603f03 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -1356,6 +1356,187 @@ def {funcname}(): with self.assertRaises(interpreters.NotShareableError): interp.call(defs.spam_returns_arg, arg) + def test_func_in___main___hidden(self): + # When a top-level function that uses global variables is called + # through Interpreter.call(), it will be pickled, sent over, + # and unpickled. That requires that it be found in the other + # interpreter's __main__ module. However, the original script + # that defined the function is only run in the main interpreter, + # so pickle.loads() would normally fail. + # + # We work around this by running the script in the other + # interpreter. However, this is a one-off solution for the sake + # of unpickling, so we avoid modifying that interpreter's + # __main__ module by running the script in a hidden module. + # + # In this test we verify that the function runs with the hidden + # module as its __globals__ when called in the other interpreter, + # and that the interpreter's __main__ module is unaffected. + text = dedent(""" + eggs = True + + def spam(*, explicit=False): + if explicit: + import __main__ + ns = __main__.__dict__ + else: + # For now we have to have a LOAD_GLOBAL in the + # function in order for globals() to actually return + # spam.__globals__. Maybe it doesn't go through pickle? + # XXX We will fix this later. + spam + ns = globals() + + func = ns.get('spam') + return [ + id(ns), + ns.get('__name__'), + ns.get('__file__'), + id(func), + None if func is None else repr(func), + ns.get('eggs'), + ns.get('ham'), + ] + + if __name__ == "__main__": + from concurrent import interpreters + interp = interpreters.create() + + ham = True + print([ + [ + spam(explicit=True), + spam(), + ], + [ + interp.call(spam, explicit=True), + interp.call(spam), + ], + ]) + """) + with os_helper.temp_dir() as tempdir: + filename = script_helper.make_script(tempdir, 'my-script', text) + res = script_helper.assert_python_ok(filename) + stdout = res.out.decode('utf-8').strip() + local, remote = eval(stdout) + + # In the main interpreter. + main, unpickled = local + nsid, _, _, funcid, func, _, _ = main + self.assertEqual(main, [ + nsid, + '__main__', + filename, + funcid, + func, + True, + True, + ]) + self.assertIsNot(func, None) + self.assertRegex(func, '^$') + self.assertEqual(unpickled, main) + + # In the subinterpreter. + main, unpickled = remote + nsid1, _, _, funcid1, _, _, _ = main + self.assertEqual(main, [ + nsid1, + '__main__', + None, + funcid1, + None, + None, + None, + ]) + nsid2, _, _, funcid2, func, _, _ = unpickled + self.assertEqual(unpickled, [ + nsid2, + '', + filename, + funcid2, + func, + True, + None, + ]) + self.assertIsNot(func, None) + self.assertRegex(func, '^$') + self.assertNotEqual(nsid2, nsid1) + self.assertNotEqual(funcid2, funcid1) + + def test_func_in___main___uses_globals(self): + # See the note in test_func_in___main___hidden about pickle + # and the __main__ module. + # + # Additionally, the solution to that problem must provide + # for global variables on which a pickled function might rely. + # + # To check that, we run a script that has two global functions + # and a global variable in the __main__ module. One of the + # functions sets the global variable and the other returns + # the value. + # + # The script calls those functions multiple times in another + # interpreter, to verify the following: + # + # * the global variable is properly initialized + # * the global variable retains state between calls + # * the setter modifies that persistent variable + # * the getter uses the variable + # * the calls in the other interpreter do not modify + # the main interpreter + # * those calls don't modify the interpreter's __main__ module + # * the functions and variable do not actually show up in the + # other interpreter's __main__ module + text = dedent(""" + count = 0 + + def inc(x=1): + global count + count += x + + def get_count(): + return count + + if __name__ == "__main__": + counts = [] + results = [count, counts] + + from concurrent import interpreters + interp = interpreters.create() + + val = interp.call(get_count) + counts.append(val) + + interp.call(inc) + val = interp.call(get_count) + counts.append(val) + + interp.call(inc, 3) + val = interp.call(get_count) + counts.append(val) + + results.append(count) + + modified = {name: interp.call(eval, f'{name!r} in vars()') + for name in ('count', 'inc', 'get_count')} + results.append(modified) + + print(results) + """) + with os_helper.temp_dir() as tempdir: + filename = script_helper.make_script(tempdir, 'my-script', text) + res = script_helper.assert_python_ok(filename) + stdout = res.out.decode('utf-8').strip() + before, counts, after, modified = eval(stdout) + self.assertEqual(modified, { + 'count': False, + 'inc': False, + 'get_count': False, + }) + self.assertEqual(before, 0) + self.assertEqual(after, 0) + self.assertEqual(counts, [0, 1, 4]) + def test_raises(self): interp = interpreters.create() with self.assertRaises(ExecutionFailed): diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index fdfb3e6dd3482d..e201ccd53daf8a 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -601,6 +601,7 @@ _make_call(struct interp_call *call, unwrap_not_shareable(tstate, failure); return -1; } + assert(!_PyErr_Occurred(tstate)); // Make the call. PyObject *resobj = PyObject_Call(func, args, kwargs); diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 0bd267e07d5f2b..aa958fe2f5cbd7 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -7,9 +7,12 @@ #include "pycore_ceval.h" // _Py_simple_func #include "pycore_crossinterp.h" // _PyXIData_t #include "pycore_function.h" // _PyFunction_VerifyStateless() +#include "pycore_global_strings.h" // _Py_ID() +#include "pycore_import.h" // _PyImport_SetModule() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_namespace.h" // _PyNamespace_New() #include "pycore_pythonrun.h" // _Py_SourceAsString() +#include "pycore_runtime.h" // _PyRuntime #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_typeobject.h" // _PyStaticType_InitBuiltin() @@ -22,6 +25,7 @@ _Py_GetMainfile(char *buffer, size_t maxlen) PyThreadState *tstate = _PyThreadState_GET(); PyObject *module = _Py_GetMainModule(tstate); if (_Py_CheckMainModule(module) < 0) { + Py_XDECREF(module); return -1; } Py_ssize_t size = _PyModule_GetFilenameUTF8(module, buffer, maxlen); @@ -30,27 +34,6 @@ _Py_GetMainfile(char *buffer, size_t maxlen) } -static PyObject * -import_get_module(PyThreadState *tstate, const char *modname) -{ - PyObject *module = NULL; - if (strcmp(modname, "__main__") == 0) { - module = _Py_GetMainModule(tstate); - if (_Py_CheckMainModule(module) < 0) { - assert(_PyErr_Occurred(tstate)); - return NULL; - } - } - else { - module = PyImport_ImportModule(modname); - if (module == NULL) { - return NULL; - } - } - return module; -} - - static PyObject * runpy_run_path(const char *filename, const char *modname) { @@ -81,97 +64,181 @@ set_exc_with_cause(PyObject *exctype, const char *msg) } -static PyObject * -pyerr_get_message(PyObject *exc) +/****************************/ +/* module duplication utils */ +/****************************/ + +struct sync_module_result { + PyObject *module; + PyObject *loaded; + PyObject *failed; +}; + +struct sync_module { + const char *filename; + char _filename[MAXPATHLEN+1]; + struct sync_module_result cached; +}; + +static void +sync_module_clear(struct sync_module *data) { - assert(!PyErr_Occurred()); - PyObject *args = PyException_GetArgs(exc); - if (args == NULL || args == Py_None || PyObject_Size(args) < 1) { - return NULL; - } - if (PyUnicode_Check(args)) { - return args; - } - PyObject *msg = PySequence_GetItem(args, 0); - Py_DECREF(args); - if (msg == NULL) { - PyErr_Clear(); - return NULL; - } - if (!PyUnicode_Check(msg)) { - Py_DECREF(msg); - return NULL; - } - return msg; + data->filename = NULL; + Py_CLEAR(data->cached.module); + Py_CLEAR(data->cached.loaded); + Py_CLEAR(data->cached.failed); } -#define MAX_MODNAME (255) -#define MAX_ATTRNAME (255) +static void +sync_module_capture_exc(PyThreadState *tstate, struct sync_module *data) +{ + assert(_PyErr_Occurred(tstate)); + PyObject *context = data->cached.failed; + PyObject *exc = _PyErr_GetRaisedException(tstate); + _PyErr_SetRaisedException(tstate, Py_NewRef(exc)); + if (context != NULL) { + PyException_SetContext(exc, context); + } + data->cached.failed = exc; +} -struct attributeerror_info { - char modname[MAX_MODNAME+1]; - char attrname[MAX_ATTRNAME+1]; -}; static int -_parse_attributeerror(PyObject *exc, struct attributeerror_info *info) +ensure_isolated_main(PyThreadState *tstate, struct sync_module *main) { - assert(exc != NULL); - assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)); - int res = -1; + // Load the module from the original file (or from a cache). - PyObject *msgobj = pyerr_get_message(exc); - if (msgobj == NULL) { + // First try the local cache. + if (main->cached.failed != NULL) { + // We'll deal with this in apply_isolated_main(). + assert(main->cached.module == NULL); + assert(main->cached.loaded == NULL); + return 0; + } + else if (main->cached.loaded != NULL) { + assert(main->cached.module != NULL); + return 0; + } + assert(main->cached.module == NULL); + + if (main->filename == NULL) { + _PyErr_SetString(tstate, PyExc_NotImplementedError, ""); return -1; } - const char *err = PyUnicode_AsUTF8(msgobj); - if (strncmp(err, "module '", 8) != 0) { - goto finally; + // It wasn't in the local cache so we'll need to populate it. + PyObject *mod = _Py_GetMainModule(tstate); + if (_Py_CheckMainModule(mod) < 0) { + // This is probably unrecoverable, so don't bother caching the error. + assert(_PyErr_Occurred(tstate)); + Py_XDECREF(mod); + return -1; } - err += 8; + PyObject *loaded = NULL; - const char *matched = strchr(err, '\''); - if (matched == NULL) { - goto finally; + // Try the per-interpreter cache for the loaded module. + // XXX Store it in sys.modules? + PyObject *interpns = PyInterpreterState_GetDict(tstate->interp); + assert(interpns != NULL); + PyObject *key = PyUnicode_FromString("CACHED_MODULE_NS___main__"); + if (key == NULL) { + // It's probably unrecoverable, so don't bother caching the error. + Py_DECREF(mod); + return -1; } - Py_ssize_t len = matched - err; - if (len > MAX_MODNAME) { - goto finally; + else if (PyDict_GetItemRef(interpns, key, &loaded) < 0) { + // It's probably unrecoverable, so don't bother caching the error. + Py_DECREF(mod); + Py_DECREF(key); + return -1; } - (void)strncpy(info->modname, err, len); - info->modname[len] = '\0'; - err = matched; + else if (loaded == NULL) { + // It wasn't already loaded from file. + loaded = PyModule_NewObject(&_Py_ID(__main__)); + if (loaded == NULL) { + goto error; + } + PyObject *ns = _PyModule_GetDict(loaded); - if (strncmp(err, "' has no attribute '", 20) != 0) { - goto finally; - } - err += 20; + // We don't want to trigger "if __name__ == '__main__':", + // so we use a bogus module name. + PyObject *loaded_ns = + runpy_run_path(main->filename, ""); + if (loaded_ns == NULL) { + goto error; + } + int res = PyDict_Update(ns, loaded_ns); + Py_DECREF(loaded_ns); + if (res < 0) { + goto error; + } - matched = strchr(err, '\''); - if (matched == NULL) { - goto finally; - } - len = matched - err; - if (len > MAX_ATTRNAME) { - goto finally; + // Set the per-interpreter cache entry. + if (PyDict_SetItem(interpns, key, loaded) < 0) { + goto error; + } } - (void)strncpy(info->attrname, err, len); - info->attrname[len] = '\0'; - err = matched + 1; - if (strlen(err) > 0) { - goto finally; + Py_DECREF(key); + main->cached = (struct sync_module_result){ + .module = mod, + .loaded = loaded, + }; + return 0; + +error: + sync_module_capture_exc(tstate, main); + Py_XDECREF(loaded); + Py_DECREF(mod); + Py_XDECREF(key); + return -1; +} + +#ifndef NDEBUG +static int +main_mod_matches(PyObject *expected) +{ + PyObject *mod = PyImport_GetModule(&_Py_ID(__main__)); + Py_XDECREF(mod); + return mod == expected; +} +#endif + +static int +apply_isolated_main(PyThreadState *tstate, struct sync_module *main) +{ + assert((main->cached.loaded == NULL) == (main->cached.loaded == NULL)); + if (main->cached.failed != NULL) { + // It must have failed previously. + assert(main->cached.loaded == NULL); + _PyErr_SetRaisedException(tstate, main->cached.failed); + return -1; } - res = 0; + assert(main->cached.loaded != NULL); -finally: - Py_DECREF(msgobj); - return res; + assert(main_mod_matches(main->cached.module)); + if (_PyImport_SetModule(&_Py_ID(__main__), main->cached.loaded) < 0) { + sync_module_capture_exc(tstate, main); + return -1; + } + return 0; } -#undef MAX_MODNAME -#undef MAX_ATTRNAME +static void +restore_main(PyThreadState *tstate, struct sync_module *main) +{ + assert(main->cached.failed == NULL); + assert(main->cached.module != NULL); + assert(main->cached.loaded != NULL); + PyObject *exc = _PyErr_GetRaisedException(tstate); + assert(main_mod_matches(main->cached.loaded)); + int res = _PyImport_SetModule(&_Py_ID(__main__), main->cached.module); + assert(res == 0); + if (res < 0) { + PyErr_FormatUnraisable("Exception ignored while restoring __main__"); + } + _PyErr_SetRaisedException(tstate, exc); +} /**************/ @@ -518,28 +585,6 @@ _PyPickle_Dumps(struct _pickle_context *ctx, PyObject *obj) } -struct sync_module_result { - PyObject *module; - PyObject *loaded; - PyObject *failed; -}; - -struct sync_module { - const char *filename; - char _filename[MAXPATHLEN+1]; - struct sync_module_result cached; -}; - -static void -sync_module_clear(struct sync_module *data) -{ - data->filename = NULL; - Py_CLEAR(data->cached.module); - Py_CLEAR(data->cached.loaded); - Py_CLEAR(data->cached.failed); -} - - struct _unpickle_context { PyThreadState *tstate; // We only special-case the __main__ module, @@ -553,142 +598,86 @@ _unpickle_context_clear(struct _unpickle_context *ctx) sync_module_clear(&ctx->main); } -static struct sync_module_result -_unpickle_context_get_module(struct _unpickle_context *ctx, - const char *modname) +static int +check_missing___main___attr(PyObject *exc) { - if (strcmp(modname, "__main__") == 0) { - return ctx->main.cached; + assert(!PyErr_Occurred()); + if (!PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)) { + return 0; } - else { - return (struct sync_module_result){ - .failed = PyExc_NotImplementedError, - }; + + // Get the error message. + PyObject *args = PyException_GetArgs(exc); + if (args == NULL || args == Py_None || PyObject_Size(args) < 1) { + assert(!PyErr_Occurred()); + return 0; } + PyObject *msgobj = args; + if (!PyUnicode_Check(msgobj)) { + msgobj = PySequence_GetItem(args, 0); + Py_DECREF(args); + if (msgobj == NULL) { + PyErr_Clear(); + return 0; + } + } + const char *err = PyUnicode_AsUTF8(msgobj); + + // Check if it's a missing __main__ attr. + int cmp = strncmp(err, "module '__main__' has no attribute '", 36); + Py_DECREF(msgobj); + return cmp == 0; } -static struct sync_module_result -_unpickle_context_set_module(struct _unpickle_context *ctx, - const char *modname) +static PyObject * +_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled) { - struct sync_module_result res = {0}; - struct sync_module_result *cached = NULL; - const char *filename = NULL; - const char *run_modname = modname; - if (strcmp(modname, "__main__") == 0) { - cached = &ctx->main.cached; - filename = ctx->main.filename; - // We don't want to trigger "if __name__ == '__main__':". - run_modname = ""; - } - else { - res.failed = PyExc_NotImplementedError; - goto finally; - } + PyThreadState *tstate = ctx->tstate; - res.module = import_get_module(ctx->tstate, modname); - if (res.module == NULL) { - res.failed = _PyErr_GetRaisedException(ctx->tstate); - assert(res.failed != NULL); - goto finally; + PyObject *exc = NULL; + PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads"); + if (loads == NULL) { + return NULL; } - if (filename == NULL) { - Py_CLEAR(res.module); - res.failed = PyExc_NotImplementedError; + // Make an initial attempt to unpickle. + PyObject *obj = PyObject_CallOneArg(loads, pickled); + if (obj != NULL) { goto finally; } - res.loaded = runpy_run_path(filename, run_modname); - if (res.loaded == NULL) { - Py_CLEAR(res.module); - res.failed = _PyErr_GetRaisedException(ctx->tstate); - assert(res.failed != NULL); + assert(_PyErr_Occurred(tstate)); + if (ctx == NULL) { goto finally; } - -finally: - if (cached != NULL) { - assert(cached->module == NULL); - assert(cached->loaded == NULL); - assert(cached->failed == NULL); - *cached = res; - } - return res; -} - - -static int -_handle_unpickle_missing_attr(struct _unpickle_context *ctx, PyObject *exc) -{ - // The caller must check if an exception is set or not when -1 is returned. - assert(!_PyErr_Occurred(ctx->tstate)); - assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)); - struct attributeerror_info info; - if (_parse_attributeerror(exc, &info) < 0) { - return -1; + exc = _PyErr_GetRaisedException(tstate); + if (!check_missing___main___attr(exc)) { + goto finally; } - // Get the module. - struct sync_module_result mod = _unpickle_context_get_module(ctx, info.modname); - if (mod.failed != NULL) { - // It must have failed previously. - return -1; - } - if (mod.module == NULL) { - mod = _unpickle_context_set_module(ctx, info.modname); - if (mod.failed != NULL) { - return -1; - } - assert(mod.module != NULL); + // Temporarily swap in a fake __main__ loaded from the original + // file and cached. Note that functions will use the cached ns + // for __globals__, // not the actual module. + if (ensure_isolated_main(tstate, &ctx->main) < 0) { + goto finally; } - - // Bail out if it is unexpectedly set already. - if (PyObject_HasAttrString(mod.module, info.attrname)) { - return -1; + if (apply_isolated_main(tstate, &ctx->main) < 0) { + goto finally; } - // Try setting the attribute. - PyObject *value = NULL; - if (PyDict_GetItemStringRef(mod.loaded, info.attrname, &value) <= 0) { - return -1; - } - assert(value != NULL); - int res = PyObject_SetAttrString(mod.module, info.attrname, value); - Py_DECREF(value); - if (res < 0) { - return -1; + // Try to unpickle once more. + obj = PyObject_CallOneArg(loads, pickled); + restore_main(tstate, &ctx->main); + if (obj == NULL) { + goto finally; } + Py_CLEAR(exc); - return 0; -} - -static PyObject * -_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled) -{ - PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads"); - if (loads == NULL) { - return NULL; - } - PyObject *obj = PyObject_CallOneArg(loads, pickled); - if (ctx != NULL) { - while (obj == NULL) { - assert(_PyErr_Occurred(ctx->tstate)); - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { - // We leave other failures unhandled. - break; - } - // Try setting the attr if not set. - PyObject *exc = _PyErr_GetRaisedException(ctx->tstate); - if (_handle_unpickle_missing_attr(ctx, exc) < 0) { - // Any resulting exceptions are ignored - // in favor of the original. - _PyErr_SetRaisedException(ctx->tstate, exc); - break; - } - Py_CLEAR(exc); - // Retry with the attribute set. - obj = PyObject_CallOneArg(loads, pickled); - } +finally: + if (exc != NULL) { + sync_module_capture_exc(tstate, &ctx->main); + // We restore the original exception. + // It might make sense to chain it (__context__). + _PyErr_SetRaisedException(tstate, exc); } Py_DECREF(loads); return obj; @@ -2889,6 +2878,7 @@ _ensure_main_ns(_PyXI_session *session, _PyXI_failure *failure) // Cache __main__.__dict__. PyObject *main_mod = _Py_GetMainModule(tstate); if (_Py_CheckMainModule(main_mod) < 0) { + Py_XDECREF(main_mod); if (failure != NULL) { *failure = (_PyXI_failure){ .code = _PyXI_ERR_MAIN_NS_FAILURE, diff --git a/Python/pystate.c b/Python/pystate.c index 0544b15aad1cc8..0d4c26f92cec90 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1142,6 +1142,7 @@ _Py_CheckMainModule(PyObject *module) PyObject *msg = PyUnicode_FromString("invalid __main__ module"); if (msg != NULL) { (void)PyErr_SetImportError(msg, &_Py_ID(__main__), NULL); + Py_DECREF(msg); } return -1; } From cb394101110e13a27e08bbf2fe9f38d847db004c Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Wed, 18 Jun 2025 00:51:41 +0530 Subject: [PATCH 591/989] Initial internal asyncio docs (#135469) Currently focused on `_asynciomodule.c` but could also receive updates about internals of the Python package. --- InternalDocs/README.md | 6 ++ InternalDocs/asyncio.md | 210 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 216 insertions(+) create mode 100644 InternalDocs/asyncio.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 4502902307cd5c..c20aa015c5bb74 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -41,3 +41,9 @@ Program Execution - [Garbage Collector Design](garbage_collector.md) - [Exception Handling](exception_handling.md) + + +Modules +--- + +- [asyncio](asyncio.md) \ No newline at end of file diff --git a/InternalDocs/asyncio.md b/InternalDocs/asyncio.md new file mode 100644 index 00000000000000..b60fe70478a6bc --- /dev/null +++ b/InternalDocs/asyncio.md @@ -0,0 +1,210 @@ +asyncio +======= + + +This document describes the working and implementation details of C +implementation of the +[`asyncio`](https://docs.python.org/3/library/asyncio.html) module. + + +## Pre-Python 3.14 implementation + +Before Python 3.14, the C implementation of `asyncio` used a +[`WeakSet`](https://docs.python.org/3/library/weakref.html#weakref.WeakSet) +to store all the tasks created by the event loop. `WeakSet` was used +so that the event loop doesn't hold strong references to the tasks, +allowing them to be garbage collected when they are no longer needed. +The current task of the event loop was stored in a dict mapping the +event loop to the current task. + +```c + /* Dictionary containing tasks that are currently active in + all running event loops. {EventLoop: Task} */ + PyObject *current_tasks; + + /* WeakSet containing all tasks scheduled to run on event loops. */ + PyObject *scheduled_tasks; +``` + +This implementation had a few drawbacks: + +1. **Performance**: Using a `WeakSet` for storing tasks is +inefficient, as it requires maintaining a full set of weak references +to tasks along with corresponding weakref callback to cleanup the +tasks when they are garbage collected. This increases the work done +by the garbage collector, and in applications with a large number of +tasks, this becomes a bottleneck, with increased memory usage and +lower performance. Looking up the current task was slow as it required +a dictionary lookup on the `current_tasks` dict. + +2. **Thread safety**: Before Python 3.14, concurrent iterations over +`WeakSet` was not thread safe[^1]. This meant calling APIs like +`asyncio.all_tasks()` could lead to inconsistent results or even +`RuntimeError` if used in multiple threads[^2]. + +3. **Poor scaling in free-threading**: Using global `WeakSet` for +storing all tasks across all threads lead to contention when adding +and removing tasks from the set which is a frequent operation. As such +it performed poorly in free-threading and did not scale well with the +number of threads. Similarly, accessing the current task in multiple +threads did not scale due to contention on the global `current_tasks` +dictionary. + +## Python 3.14 implementation + +To address these issues, Python 3.14 implements several changes to +improve the performance and thread safety of tasks management. + +- **Per-thread double linked list for tasks**: Python 3.14 introduces + a per-thread circular double linked list implementation for + storing tasks. This allows each thread to maintain its own list of + tasks and allows for lock free addition and removal of tasks. This + is designed to be efficient, and thread-safe and scales well with + the number of threads in free-threading. This also allows external + introspection tools such as `python -m asyncio pstree` to inspect + tasks running in all threads and was implemented as part of [Audit + asyncio thread + safety](https://github.com/python/cpython/issues/128002). + +- **Per-thread current task**: Python 3.14 stores the current task on + the current thread state instead of a global dictionary. This + allows for faster access to the current task without the need for + a dictionary lookup. Each thread maintains its own current task, + which is stored in the `PyThreadState` structure. This was + implemented in https://github.com/python/cpython/issues/129898. + +Storing the current task and list of all tasks per-thread instead of +storing it per-loop was chosen primarily to support external +introspection tools such as `python -m asyncio pstree` as looking up +arbitrary attributes on the loop object is not possible +externally. Storing data per-thread also makes it easy to support +third party event loop implementations such as `uvloop`, and is more +efficient for the single threaded asyncio use-case as it avoids the +overhead of attribute lookups on the loop object and several other +calls on the performance critical path of adding and removing tasks +from the per-loop task list. + +## Per-thread double linked list for tasks + +This implementation uses a circular doubly linked list to store tasks +on the thread states. This is used for all tasks which are instances +of `asyncio.Task` or subclasses of it, for third-party tasks a +fallback `WeakSet` implementation is used. The linked list is +implemented using an embedded `llist_node` structure within each +`TaskObj`. By embedding the list node directly into the task object, +the implementation avoids additional memory allocations for linked +list nodes. + +The `PyThreadState` structure gained a new field `asyncio_tasks_head`, +which serves as the head of the circular linked list of tasks. This +allows for lock free addition and removal of tasks from the list. + +It is possible that when a thread state is deallocated, there are +lingering tasks in its list; this can happen if another thread has +references to the tasks of this thread. Therefore, the +`PyInterpreterState` structure also gains a new `asyncio_tasks_head` +field to store any lingering tasks. When a thread state is +deallocated, any remaining lingering tasks are moved to the +interpreter state tasks list, and the thread state tasks list is +cleared. The `asyncio_tasks_lock` is used protect the interpreter's +tasks list from concurrent modifications. + +```c +typedef struct TaskObj { + ... + struct llist_node asyncio_node; +} TaskObj; + +typedef struct PyThreadState { + ... + struct llist_node asyncio_tasks_head; +} PyThreadState; + +typedef struct PyInterpreterState { + ... + struct llist_node asyncio_tasks_head; + PyMutex asyncio_tasks_lock; +} PyInterpreterState; +``` + +When a task is created, it is added to the current thread's list of +tasks by the `register_task` function. When the task is done, it is +removed from the list by the `unregister_task` function. In +free-threading, the thread id of the thread which created the task is +stored in `task_tid` field of the `TaskObj`. This is used to check if +the task is being removed from the correct thread's task list. If the +current thread is same as the thread which created it then no locking +is required, otherwise in free-threading, the `stop-the-world` pause +is used to pause all other threads and then safely remove the task +from the tasks list. + +```mermaid + +flowchart TD + subgraph one["Executing Thread"] + A["task = asyncio.create_task(coro())"] -->B("register_task(task)") + B --> C{"task->task_state?"} + C -->|pending| D["task_step(task)"] + C -->|done| F["unregister_task(task)"] + C -->|cancelled| F["unregister_task(task)"] + D --> C + F --> G{"free-threading?"} + G --> |false| H["unregister_task_safe(task)"] + G --> |true| J{"correct thread?
task->task_tid == _Py_ThreadId()"} + J --> |true| H + J --> |false| I["stop the world
pause all threads"] + I --> H["unregister_task_safe(task)"] + end + subgraph two["Thread deallocating"] + A1{"thread's task list empty?
llist_empty(tstate->asyncio_tasks_head)"} + A1 --> |true| B1["deallocate thread
free_threadstate(tstate)"] + A1 --> |false| C1["add tasks to interpreter's task list
llist_concat(&tstate->interp->asyncio_tasks_head,tstate->asyncio_tasks_head)"] + C1 --> B1 + end + + one --> two +``` + +`asyncio.all_tasks` now iterates over the per-thread task lists of all +threads and the interpreter's task list to get all the tasks. In +free-threading, this is done by pausing all the threads using the +`stop-the-world` pause to ensure that no tasks are being added or +removed while iterating over the lists. This allows for a consistent +view of all task lists across all threads and is thread safe. + +This design allows for lock free execution and scales well in +free-threading with multiple event loops running in different threads. + +## Per-thread current task + +This implementation stores the current task in the `PyThreadState` +structure, which allows for faster access to the current task without +the need for a dictionary lookup. + +```c +typedef struct PyThreadState { + ... + PyObject *asyncio_current_loop; + PyObject *asyncio_current_task; +} PyThreadState; +``` + +When a task is entered or left, the current task is updated in the +thread state using `enter_task` and `leave_task` functions. When +`current_task(loop)` is called where `loop` is the current running +event loop of the current thread, no locking is required as the +current task is stored in the thread state and is returned directly +(general case). Otherwise, if the `loop` is not current running event +loop, the `stop-the-world` pause is used to pause all threads in +free-threading and then by iterating over all the thread states and +checking if the `loop` matches with `tstate->asyncio_current_loop`, +the current task is found and returned. If no matching thread state is +found, `None` is returned. + +In free-threading, it avoids contention on a global dictionary as +threads can access the current task of thier running loop without any +locking. + + +[^1]: https://github.com/python/cpython/issues/123089 +[^2]: https://github.com/python/cpython/issues/80788 \ No newline at end of file From 52be7f445e454ccb44e368a22fe70a0fa6cab7c0 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 18 Jun 2025 08:36:02 +0900 Subject: [PATCH 592/989] gh-133931: Introduce _PyObject_XSetRefDelayed to replace Py_XSETREF (gh-134377) --- Include/internal/pycore_object.h | 21 ++++++++ Include/internal/pycore_pymem.h | 10 ---- .../test_free_threading/test_generators.py | 51 +++++++++++++++++++ Objects/genobject.c | 18 +++++-- Objects/obmalloc.c | 15 ++++++ Objects/typeobject.c | 10 ++-- 6 files changed, 104 insertions(+), 21 deletions(-) create mode 100644 Lib/test/test_free_threading/test_generators.py diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 50225623fe52db..50807e68e9a70c 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -767,6 +767,27 @@ _Py_TryIncref(PyObject *op) #endif } +// Enqueue an object to be freed possibly after some delay +#ifdef Py_GIL_DISABLED +PyAPI_FUNC(void) _PyObject_XDecRefDelayed(PyObject *obj); +#else +static inline void _PyObject_XDecRefDelayed(PyObject *obj) +{ + Py_XDECREF(obj); +} +#endif + +#ifdef Py_GIL_DISABLED +// Same as `Py_XSETREF` but in free-threading, it stores the object atomically +// and queues the old object to be decrefed at a safe point using QSBR. +PyAPI_FUNC(void) _PyObject_XSetRefDelayed(PyObject **p_obj, PyObject *obj); +#else +static inline void _PyObject_XSetRefDelayed(PyObject **p_obj, PyObject *obj) +{ + Py_XSETREF(*p_obj, obj); +} +#endif + #ifdef Py_REF_DEBUG extern void _PyInterpreterState_FinalizeRefTotal(PyInterpreterState *); extern void _Py_FinalizeRefTotal(_PyRuntimeState *); diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 02537bdfef8598..e669a30b072d18 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -90,16 +90,6 @@ extern int _PyMem_DebugEnabled(void); // Enqueue a pointer to be freed possibly after some delay. extern void _PyMem_FreeDelayed(void *ptr); -// Enqueue an object to be freed possibly after some delay -#ifdef Py_GIL_DISABLED -PyAPI_FUNC(void) _PyObject_XDecRefDelayed(PyObject *obj); -#else -static inline void _PyObject_XDecRefDelayed(PyObject *obj) -{ - Py_XDECREF(obj); -} -#endif - // Periodically process delayed free requests. extern void _PyMem_ProcessDelayed(PyThreadState *tstate); diff --git a/Lib/test/test_free_threading/test_generators.py b/Lib/test/test_free_threading/test_generators.py new file mode 100644 index 00000000000000..d01675eb38b370 --- /dev/null +++ b/Lib/test/test_free_threading/test_generators.py @@ -0,0 +1,51 @@ +import concurrent.futures +import unittest +from threading import Barrier +from unittest import TestCase +import random +import time + +from test.support import threading_helper, Py_GIL_DISABLED + +threading_helper.requires_working_threading(module=True) + + +def random_sleep(): + delay_us = random.randint(50, 100) + time.sleep(delay_us * 1e-6) + +def random_string(): + return ''.join(random.choice('0123456789ABCDEF') for _ in range(10)) + +def set_gen_name(g, b): + b.wait() + random_sleep() + g.__name__ = random_string() + return g.__name__ + +def set_gen_qualname(g, b): + b.wait() + random_sleep() + g.__qualname__ = random_string() + return g.__qualname__ + + +@unittest.skipUnless(Py_GIL_DISABLED, "Enable only in FT build") +class TestFTGenerators(TestCase): + NUM_THREADS = 4 + + def concurrent_write_with_func(self, func): + gen = (x for x in range(42)) + for j in range(1000): + with concurrent.futures.ThreadPoolExecutor(max_workers=self.NUM_THREADS) as executor: + b = Barrier(self.NUM_THREADS) + futures = {executor.submit(func, gen, b): i for i in range(self.NUM_THREADS)} + for fut in concurrent.futures.as_completed(futures): + gen_name = fut.result() + self.assertEqual(len(gen_name), 10) + + def test_concurrent_write(self): + with self.subTest(func=set_gen_name): + self.concurrent_write_with_func(func=set_gen_name) + with self.subTest(func=set_gen_qualname): + self.concurrent_write_with_func(func=set_gen_qualname) diff --git a/Objects/genobject.c b/Objects/genobject.c index da1462deaaa02c..d0cb75d2d17b07 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -704,7 +704,8 @@ static PyObject * gen_get_name(PyObject *self, void *Py_UNUSED(ignored)) { PyGenObject *op = _PyGen_CAST(self); - return Py_NewRef(op->gi_name); + PyObject *name = FT_ATOMIC_LOAD_PTR_ACQUIRE(op->gi_name); + return Py_NewRef(name); } static int @@ -718,7 +719,11 @@ gen_set_name(PyObject *self, PyObject *value, void *Py_UNUSED(ignored)) "__name__ must be set to a string object"); return -1; } - Py_XSETREF(op->gi_name, Py_NewRef(value)); + Py_BEGIN_CRITICAL_SECTION(self); + // gh-133931: To prevent use-after-free from other threads that reference + // the gi_name. + _PyObject_XSetRefDelayed(&op->gi_name, Py_NewRef(value)); + Py_END_CRITICAL_SECTION(); return 0; } @@ -726,7 +731,8 @@ static PyObject * gen_get_qualname(PyObject *self, void *Py_UNUSED(ignored)) { PyGenObject *op = _PyGen_CAST(self); - return Py_NewRef(op->gi_qualname); + PyObject *qualname = FT_ATOMIC_LOAD_PTR_ACQUIRE(op->gi_qualname); + return Py_NewRef(qualname); } static int @@ -740,7 +746,11 @@ gen_set_qualname(PyObject *self, PyObject *value, void *Py_UNUSED(ignored)) "__qualname__ must be set to a string object"); return -1; } - Py_XSETREF(op->gi_qualname, Py_NewRef(value)); + Py_BEGIN_CRITICAL_SECTION(self); + // gh-133931: To prevent use-after-free from other threads that reference + // the gi_qualname. + _PyObject_XSetRefDelayed(&op->gi_qualname, Py_NewRef(value)); + Py_END_CRITICAL_SECTION(); return 0; } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index d3931aab623b70..d4b8327cb73b37 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1231,6 +1231,21 @@ _PyObject_XDecRefDelayed(PyObject *ptr) } #endif +#ifdef Py_GIL_DISABLED +void +_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value) +{ + PyObject *old = *ptr; + FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value); + if (old == NULL) { + return; + } + if (!_Py_IsImmortal(old)) { + _PyObject_XDecRefDelayed(old); + } +} +#endif + static struct _mem_work_chunk * work_queue_first(struct llist_node *head) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index db923c164774b7..b9d549610693c1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3967,13 +3967,9 @@ _PyObject_SetDict(PyObject *obj, PyObject *value) return -1; } Py_BEGIN_CRITICAL_SECTION(obj); - PyObject *olddict = *dictptr; - FT_ATOMIC_STORE_PTR_RELEASE(*dictptr, Py_NewRef(value)); -#ifdef Py_GIL_DISABLED - _PyObject_XDecRefDelayed(olddict); -#else - Py_XDECREF(olddict); -#endif + // gh-133980: To prevent use-after-free from other threads that reference + // the __dict__ + _PyObject_XSetRefDelayed(dictptr, Py_NewRef(value)); Py_END_CRITICAL_SECTION(); return 0; } From 711700259135b5f9e21c56b199f4ebc0048b18b4 Mon Sep 17 00:00:00 2001 From: Vladyslav Lazoryk <80263725+lazorikv@users.noreply.github.com> Date: Wed, 18 Jun 2025 05:39:41 +0300 Subject: [PATCH 593/989] gh-135627: Remove documentation for LOAD_CONST_IMMORTAL opcode (GH-135632) Remove documentation for LOAD_CONST_IMMORTAL opcode --- Doc/library/dis.rst | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 44767b5dd2d5c9..11685a32f48e4f 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1094,14 +1094,6 @@ iterations of the loop. .. versionadded:: 3.14 -.. opcode:: LOAD_CONST_IMMORTAL (consti) - - Pushes ``co_consts[consti]`` onto the stack. - Can be used when the constant value is known to be immortal. - - .. versionadded:: 3.14 - - .. opcode:: LOAD_NAME (namei) Pushes the value associated with ``co_names[namei]`` onto the stack. From 28c71ee4b2eb66983b78018896ca56892580816a Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 17 Jun 2025 19:51:46 -0700 Subject: [PATCH 594/989] gh-127146: Allow ignored keys to be missing in test_sysconfig (#135622) Fixes the test on Emscripten where userbase can be missing. --- Lib/test/test_sysconfig.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index 2c0df9376abfc6..2eb8de4b29fe96 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -711,8 +711,8 @@ def test_sysconfigdata_json(self): ignore_keys |= {'prefix', 'exec_prefix', 'base', 'platbase', 'installed_base', 'installed_platbase'} for key in ignore_keys: - json_config_vars.pop(key) - system_config_vars.pop(key) + json_config_vars.pop(key, None) + system_config_vars.pop(key, None) self.assertEqual(system_config_vars, json_config_vars) From 2a49c54ab2c5cf0ee96a8e577971c1525a966940 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 17 Jun 2025 19:59:01 -0700 Subject: [PATCH 595/989] gh-127146: Emscripten: Skip test_url2pathname_resolve_host (#135634) Emscripten currently `gethostbyname_r()` returns an incorrect IP address for `localhost`. Will be resolved by upstream PR: https://github.com/emscripten-core/emscripten/pull/24593 --- Lib/test/test_urllib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index bc1030eea60c35..1d889ae7cf458f 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1569,6 +1569,7 @@ def test_url2pathname_require_scheme_errors(self): urllib.request.url2pathname, url, require_scheme=True) + @unittest.skipIf(support.is_emscripten, "Fixed by https://github.com/emscripten-core/emscripten/pull/24593") def test_url2pathname_resolve_host(self): fn = urllib.request.url2pathname sep = os.path.sep From ce58afb400672aeec79f2017ab99acbbc65cb4ab Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 17 Jun 2025 20:19:23 -0700 Subject: [PATCH 596/989] gh-127146: Emscripten: Fix test failure due to missing os.link (#135626) Check for existence of os.link, rather than assuming it exists. --- Lib/test/test_inspect/test_inspect.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index e584fb417b9d54..79eb103224b956 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -5875,9 +5875,9 @@ def test_operator_module_has_signatures(self): self._test_module_has_signatures(operator) def test_os_module_has_signatures(self): - unsupported_signature = {'chmod', 'link', 'utime'} + unsupported_signature = {'chmod', 'utime'} unsupported_signature |= {name for name in - ['get_terminal_size', 'posix_spawn', 'posix_spawnp', + ['get_terminal_size', 'link', 'posix_spawn', 'posix_spawnp', 'register_at_fork', 'startfile'] if hasattr(os, name)} self._test_module_has_signatures(os, unsupported_signature=unsupported_signature) From e4ccd46bf75fff2938d7c21c7284e49b0ab795b0 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 17 Jun 2025 20:20:43 -0700 Subject: [PATCH 597/989] gh-127146: Emscripten: Fix pathlib glob_dotdot test (#135624) The Emscripten path resolver uses the same mechanism for resolving `..` at a file system root as for resolving symlinks. This is because roots don't store their mountpoints. If the parent of a node is itself, it is a root but it might be a mountpoint in some other file system. If a path has enough `..`'s at the root, it will return ELOOP. Enough turns out to be 49. --- Lib/test/test_pathlib/test_pathlib.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 13356b4cfe082b..b2e2cdb3338beb 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -2954,7 +2954,13 @@ def test_glob_dotdot(self): else: # ".." segments are normalized first on Windows, so this path is stat()able. self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) - self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) + if sys.platform == "emscripten": + # Emscripten will return ELOOP if there are 49 or more ..'s. + # Can remove when https://github.com/emscripten-core/emscripten/pull/24591 is merged. + NDOTDOTS = 48 + else: + NDOTDOTS = 50 + self.assertEqual(set(p.glob("/".join([".."] * NDOTDOTS))), { P(self.base, *[".."] * NDOTDOTS)}) def test_glob_inaccessible(self): P = self.cls From 504ae606e192a88805996a7e934f38bc5cb7f28a Mon Sep 17 00:00:00 2001 From: Victorien <65306057+Viicos@users.noreply.github.com> Date: Wed, 18 Jun 2025 05:29:13 +0200 Subject: [PATCH 598/989] gh-119180: Only fetch globals and locals if necessary in `annotationlib.get_annotations()` (#135644) --- Lib/annotationlib.py | 83 ++++++++++--------- ...-06-17-22-44-19.gh-issue-119180.Ogv8Nj.rst | 2 + 2 files changed, 44 insertions(+), 41 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-17-22-44-19.gh-issue-119180.Ogv8Nj.rst diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index a7dfb91515a1c4..731817a9973799 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -939,48 +939,49 @@ def get_annotations( if not eval_str: return dict(ann) - if isinstance(obj, type): - # class - obj_globals = None - module_name = getattr(obj, "__module__", None) - if module_name: - module = sys.modules.get(module_name, None) - if module: - obj_globals = getattr(module, "__dict__", None) - obj_locals = dict(vars(obj)) - unwrap = obj - elif isinstance(obj, types.ModuleType): - # module - obj_globals = getattr(obj, "__dict__") - obj_locals = None - unwrap = None - elif callable(obj): - # this includes types.Function, types.BuiltinFunctionType, - # types.BuiltinMethodType, functools.partial, functools.singledispatch, - # "class funclike" from Lib/test/test_inspect... on and on it goes. - obj_globals = getattr(obj, "__globals__", None) - obj_locals = None - unwrap = obj - else: - obj_globals = obj_locals = unwrap = None - - if unwrap is not None: - while True: - if hasattr(unwrap, "__wrapped__"): - unwrap = unwrap.__wrapped__ - continue - if functools := sys.modules.get("functools"): - if isinstance(unwrap, functools.partial): - unwrap = unwrap.func + if globals is None or locals is None: + if isinstance(obj, type): + # class + obj_globals = None + module_name = getattr(obj, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + obj_globals = getattr(module, "__dict__", None) + obj_locals = dict(vars(obj)) + unwrap = obj + elif isinstance(obj, types.ModuleType): + # module + obj_globals = getattr(obj, "__dict__") + obj_locals = None + unwrap = None + elif callable(obj): + # this includes types.Function, types.BuiltinFunctionType, + # types.BuiltinMethodType, functools.partial, functools.singledispatch, + # "class funclike" from Lib/test/test_inspect... on and on it goes. + obj_globals = getattr(obj, "__globals__", None) + obj_locals = None + unwrap = obj + else: + obj_globals = obj_locals = unwrap = None + + if unwrap is not None: + while True: + if hasattr(unwrap, "__wrapped__"): + unwrap = unwrap.__wrapped__ continue - break - if hasattr(unwrap, "__globals__"): - obj_globals = unwrap.__globals__ - - if globals is None: - globals = obj_globals - if locals is None: - locals = obj_locals + if functools := sys.modules.get("functools"): + if isinstance(unwrap, functools.partial): + unwrap = unwrap.func + continue + break + if hasattr(unwrap, "__globals__"): + obj_globals = unwrap.__globals__ + + if globals is None: + globals = obj_globals + if locals is None: + locals = obj_locals # "Inject" type parameters into the local namespace # (unless they are shadowed by assignments *in* the local namespace), diff --git a/Misc/NEWS.d/next/Library/2025-06-17-22-44-19.gh-issue-119180.Ogv8Nj.rst b/Misc/NEWS.d/next/Library/2025-06-17-22-44-19.gh-issue-119180.Ogv8Nj.rst new file mode 100644 index 00000000000000..c5e5d5b4f8d85e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-17-22-44-19.gh-issue-119180.Ogv8Nj.rst @@ -0,0 +1,2 @@ +Only fetch globals and locals if necessary in +:func:`annotationlib.get_annotations` From 01c80b265060f016d3534eb74d540363808804e1 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 17 Jun 2025 21:34:30 -0700 Subject: [PATCH 599/989] gh-127146: Enable large files on Emscripten (#135635) Large files have been fully supported by Emscripten for a long time. --- configure | 7 ------- configure.ac | 4 ---- 2 files changed, 11 deletions(-) diff --git a/configure b/configure index 2b1482d9264d24..8334e247c3e1bb 100755 --- a/configure +++ b/configure @@ -12979,13 +12979,6 @@ if test "$ac_cv_sizeof_off_t" -gt "$ac_cv_sizeof_long" -a \ else have_largefile_support="no" fi -case $ac_sys_system in #( - Emscripten) : - have_largefile_support="no" - ;; #( - *) : - ;; -esac if test "x$have_largefile_support" = xyes then : diff --git a/configure.ac b/configure.ac index 7dbb332f1d30a7..82fb2d8c492d48 100644 --- a/configure.ac +++ b/configure.ac @@ -3172,10 +3172,6 @@ if test "$ac_cv_sizeof_off_t" -gt "$ac_cv_sizeof_long" -a \ else have_largefile_support="no" fi -dnl LFS does not work with Emscripten 3.1 -AS_CASE([$ac_sys_system], - [Emscripten], [have_largefile_support="no"] -) AS_VAR_IF([have_largefile_support], [yes], [ AC_DEFINE([HAVE_LARGEFILE_SUPPORT], [1], [Defined to enable large file support when an off_t is bigger than a long From 1c7efaf58a62d848421b2da97360ba3df7d7856b Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Wed, 18 Jun 2025 14:21:14 +0800 Subject: [PATCH 600/989] gh-134632: Add iOS/Android test skip for C API check for headers. (#135656) iOS and Android don't ship headers in the testbed, so we can't test for their existence. --- Lib/test/test_build_details.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_build_details.py b/Lib/test/test_build_details.py index 33ade161fb5058..ba4b8c5aa9b58e 100644 --- a/Lib/test/test_build_details.py +++ b/Lib/test/test_build_details.py @@ -117,13 +117,20 @@ def test_location(self): # Override generic format tests with tests for our specific implemenation. @needs_installed_python - @unittest.skipIf(is_android or is_apple_mobile, 'Android and iOS run tests via a custom testbed method that changes sys.executable') + @unittest.skipIf( + is_android or is_apple_mobile, + 'Android and iOS run tests via a custom testbed method that changes sys.executable' + ) def test_base_interpreter(self): value = self.key('base_interpreter') self.assertEqual(os.path.realpath(value), os.path.realpath(sys.executable)) @needs_installed_python + @unittest.skipIf( + is_android or is_apple_mobile, + "Android and iOS run tests via a custom testbed method that doesn't ship headers" + ) def test_c_api(self): value = self.key('c_api') self.assertTrue(os.path.exists(os.path.join(value['headers'], 'Python.h'))) From 5f6ab924653a44e08be23710d5023566e9e9214e Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Wed, 18 Jun 2025 11:56:49 +0100 Subject: [PATCH 601/989] gh-133390: Document `SQLITE_KEYWORDS` (GH-135659) --- Doc/library/sqlite3.rst | 9 +++++++++ .../2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index 12761baf792a6b..641e1f1de03a1d 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -507,6 +507,15 @@ Module constants Version number of the runtime SQLite library as a :class:`tuple` of :class:`integers `. +.. data:: SQLITE_KEYWORDS + + A :class:`tuple` containing all sqlite3 keywords. + + This constant is only available if Python was compiled with SQLite + 3.24.0 or greater. + + .. versionadded:: next + .. data:: threadsafety Integer constant required by the DB-API 2.0, stating the level of thread diff --git a/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst b/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst index 38d5c311b1d437..943e4addebcc9e 100644 --- a/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst +++ b/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst @@ -1 +1,2 @@ -Support keyword completion in the :mod:`sqlite3` command-line interface. +Support keyword completion in the :mod:`sqlite3` command-line interface and add +:data:`sqlite3.SQLITE_KEYWORDS` constant. From 9877d191f441741fc27ae5e7a6dd7ab6d4bcc6b7 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Thu, 19 Jun 2025 00:17:02 +1200 Subject: [PATCH 602/989] gh-135335: flush stdout/stderr in forkserver after preloading modules (#135338) If a preloaded module writes to stdout or stderr, and the stream is buffered, child processes will inherit the buffered data after forking. Attempt to prevent this by flushing the streams after preload. Co-authored-by: Mikhail Efimov Co-authored-by: Victor Stinner --- Lib/multiprocessing/forkserver.py | 4 +++ Lib/test/_test_multiprocessing.py | 29 +++++++++++++++++++ Lib/test/mp_preload_flush.py | 15 ++++++++++ ...-06-10-21-42-04.gh-issue-135335.WnUqb_.rst | 2 ++ 4 files changed, 50 insertions(+) create mode 100644 Lib/test/mp_preload_flush.py create mode 100644 Misc/NEWS.d/next/Library/2025-06-10-21-42-04.gh-issue-135335.WnUqb_.rst diff --git a/Lib/multiprocessing/forkserver.py b/Lib/multiprocessing/forkserver.py index 681af2610e9b37..c91891ff162c2d 100644 --- a/Lib/multiprocessing/forkserver.py +++ b/Lib/multiprocessing/forkserver.py @@ -222,6 +222,10 @@ def main(listener_fd, alive_r, preload, main_path=None, sys_path=None, except ImportError: pass + # gh-135335: flush stdout/stderr in case any of the preloaded modules + # wrote to them, otherwise children might inherit buffered data + util._flush_std_streams() + util._close_stdin() sig_r, sig_w = os.pipe() diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 75f31d858d3306..a1259ff1d63d18 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -6801,6 +6801,35 @@ def test_child_sys_path(self): self.assertEqual(child_sys_path[1:], sys.path[1:]) self.assertIsNone(import_error, msg=f"child could not import {self._mod_name}") + def test_std_streams_flushed_after_preload(self): + # gh-135335: Check fork server flushes standard streams after + # preloading modules + if multiprocessing.get_start_method() != "forkserver": + self.skipTest("forkserver specific test") + + # Create a test module in the temporary directory on the child's path + # TODO: This can all be simplified once gh-126631 is fixed and we can + # use __main__ instead of a module. + dirname = os.path.join(self._temp_dir, 'preloaded_module') + init_name = os.path.join(dirname, '__init__.py') + os.mkdir(dirname) + with open(init_name, "w") as f: + cmd = '''if 1: + import sys + print('stderr', end='', file=sys.stderr) + print('stdout', end='', file=sys.stdout) + ''' + f.write(cmd) + + name = os.path.join(os.path.dirname(__file__), 'mp_preload_flush.py') + env = {'PYTHONPATH': self._temp_dir} + _, out, err = test.support.script_helper.assert_python_ok(name, **env) + + # Check stderr first, as it is more likely to be useful to see in the + # event of a failure. + self.assertEqual(err.decode().rstrip(), 'stderr') + self.assertEqual(out.decode().rstrip(), 'stdout') + class MiscTestCase(unittest.TestCase): def test__all__(self): diff --git a/Lib/test/mp_preload_flush.py b/Lib/test/mp_preload_flush.py new file mode 100644 index 00000000000000..3501554d366a21 --- /dev/null +++ b/Lib/test/mp_preload_flush.py @@ -0,0 +1,15 @@ +import multiprocessing +import sys + +modname = 'preloaded_module' +if __name__ == '__main__': + if modname in sys.modules: + raise AssertionError(f'{modname!r} is not in sys.modules') + multiprocessing.set_start_method('forkserver') + multiprocessing.set_forkserver_preload([modname]) + for _ in range(2): + p = multiprocessing.Process() + p.start() + p.join() +elif modname not in sys.modules: + raise AssertionError(f'{modname!r} is not in sys.modules') diff --git a/Misc/NEWS.d/next/Library/2025-06-10-21-42-04.gh-issue-135335.WnUqb_.rst b/Misc/NEWS.d/next/Library/2025-06-10-21-42-04.gh-issue-135335.WnUqb_.rst new file mode 100644 index 00000000000000..466ba0d232cd1f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-10-21-42-04.gh-issue-135335.WnUqb_.rst @@ -0,0 +1,2 @@ +:mod:`multiprocessing`: Flush ``stdout`` and ``stderr`` after preloading +modules in the ``forkserver``. From 343719d98e60d28d6102002f8ad3fd9dc5a58bd1 Mon Sep 17 00:00:00 2001 From: Victorien <65306057+Viicos@users.noreply.github.com> Date: Wed, 18 Jun 2025 15:00:55 +0200 Subject: [PATCH 603/989] gh-135646: Raise consistent `NameError` exceptions in `ForwardRef.evaluate()` (#135663) --- Lib/annotationlib.py | 5 ++++- Lib/test/test_annotationlib.py | 4 +++- .../Library/2025-06-18-11-43-17.gh-issue-135646.r7ekEn.rst | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-18-11-43-17.gh-issue-135646.r7ekEn.rst diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index 731817a9973799..c83a1573ccd3d1 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -27,6 +27,9 @@ class Format(enum.IntEnum): _sentinel = object() +# Following `NAME_ERROR_MSG` in `ceval_macros.h`: +_NAME_ERROR_MSG = "name '{name:.200}' is not defined" + # Slots shared by ForwardRef and _Stringifier. The __forward__ names must be # preserved for compatibility with the old typing.ForwardRef class. The remaining @@ -184,7 +187,7 @@ def evaluate( elif is_forwardref_format: return self else: - raise NameError(arg) + raise NameError(_NAME_ERROR_MSG.format(name=arg), name=arg) else: code = self.__forward_code__ try: diff --git a/Lib/test/test_annotationlib.py b/Lib/test/test_annotationlib.py index fe091e52a86dc4..ae0e73f08c5bd0 100644 --- a/Lib/test/test_annotationlib.py +++ b/Lib/test/test_annotationlib.py @@ -1650,9 +1650,11 @@ def test_name_lookup_without_eval(self): with support.swap_attr(builtins, "int", dict): self.assertIs(ForwardRef("int").evaluate(), dict) - with self.assertRaises(NameError): + with self.assertRaises(NameError, msg="name 'doesntexist' is not defined") as exc: ForwardRef("doesntexist").evaluate() + self.assertEqual(exc.exception.name, "doesntexist") + def test_fwdref_invalid_syntax(self): fr = ForwardRef("if") with self.assertRaises(SyntaxError): diff --git a/Misc/NEWS.d/next/Library/2025-06-18-11-43-17.gh-issue-135646.r7ekEn.rst b/Misc/NEWS.d/next/Library/2025-06-18-11-43-17.gh-issue-135646.r7ekEn.rst new file mode 100644 index 00000000000000..5fbd751467dd58 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-18-11-43-17.gh-issue-135646.r7ekEn.rst @@ -0,0 +1 @@ +Raise consistent :exc:`NameError` exceptions in :func:`annotationlib.ForwardRef.evaluate` From 21f3d15534c08d9a49d5c119a0e690855173fde4 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 18 Jun 2025 16:34:18 +0200 Subject: [PATCH 604/989] gh-135676: lexical analysis: Improve section on Numeric literals (GH-134850) --- Doc/reference/datamodel.rst | 2 + Doc/reference/expressions.rst | 3 +- Doc/reference/lexical_analysis.rst | 218 ++++++++++++++++++++++------- 3 files changed, 168 insertions(+), 55 deletions(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 32a2e266262c52..4a099e81daccb3 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -262,6 +262,8 @@ Booleans (:class:`bool`) a string, the strings ``"False"`` or ``"True"`` are returned, respectively. +.. _datamodel-float: + :class:`numbers.Real` (:class:`float`) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 3d3bf1d9840eef..429b3cd1f006a2 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -134,8 +134,7 @@ Literals Python supports string and bytes literals and various numeric literals: .. productionlist:: python-grammar - literal: `stringliteral` | `bytesliteral` - : | `integer` | `floatnumber` | `imagnumber` + literal: `stringliteral` | `bytesliteral` | `NUMBER` Evaluation of a literal yields an object of the given type (string, bytes, integer, floating-point number, complex number) with the given value. The value diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index b22eb4db7945d1..567c70111c20ec 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -922,11 +922,20 @@ Numeric literals floating-point literal, hexadecimal literal octal literal, binary literal, decimal literal, imaginary literal, complex literal -There are three types of numeric literals: integers, floating-point numbers, and -imaginary numbers. There are no complex literals (complex numbers can be formed -by adding a real number and an imaginary number). +:data:`~token.NUMBER` tokens represent numeric literals, of which there are +three types: integers, floating-point numbers, and imaginary numbers. -Note that numeric literals do not include a sign; a phrase like ``-1`` is +.. grammar-snippet:: + :group: python-grammar + + NUMBER: `integer` | `floatnumber` | `imagnumber` + +The numeric value of a numeric literal is the same as if it were passed as a +string to the :class:`int`, :class:`float` or :class:`complex` class +constructor, respectively. +Note that not all valid inputs for those constructors are also valid literals. + +Numeric literals do not include a sign; a phrase like ``-1`` is actually an expression composed of the unary operator '``-``' and the literal ``1``. @@ -940,38 +949,67 @@ actually an expression composed of the unary operator '``-``' and the literal .. _integers: Integer literals ----------------- +^^^^^^^^^^^^^^^^ -Integer literals are described by the following lexical definitions: +Integer literals denote whole numbers. For example:: -.. productionlist:: python-grammar - integer: `decinteger` | `bininteger` | `octinteger` | `hexinteger` - decinteger: `nonzerodigit` (["_"] `digit`)* | "0"+ (["_"] "0")* - bininteger: "0" ("b" | "B") (["_"] `bindigit`)+ - octinteger: "0" ("o" | "O") (["_"] `octdigit`)+ - hexinteger: "0" ("x" | "X") (["_"] `hexdigit`)+ - nonzerodigit: "1"..."9" - digit: "0"..."9" - bindigit: "0" | "1" - octdigit: "0"..."7" - hexdigit: `digit` | "a"..."f" | "A"..."F" + 7 + 3 + 2147483647 There is no limit for the length of integer literals apart from what can be -stored in available memory. +stored in available memory:: + + 7922816251426433759354395033679228162514264337593543950336 + +Underscores can be used to group digits for enhanced readability, +and are ignored for determining the numeric value of the literal. +For example, the following literals are equivalent:: + + 100_000_000_000 + 100000000000 + 1_00_00_00_00_000 + +Underscores can only occur between digits. +For example, ``_123``, ``321_``, and ``123__321`` are *not* valid literals. -Underscores are ignored for determining the numeric value of the literal. They -can be used to group digits for enhanced readability. One underscore can occur -between digits, and after base specifiers like ``0x``. +Integers can be specified in binary (base 2), octal (base 8), or hexadecimal +(base 16) using the prefixes ``0b``, ``0o`` and ``0x``, respectively. +Hexadecimal digits 10 through 15 are represented by letters ``A``-``F``, +case-insensitive. For example:: -Note that leading zeros in a non-zero decimal number are not allowed. This is -for disambiguation with C-style octal literals, which Python used before version -3.0. + 0b100110111 + 0b_1110_0101 + 0o177 + 0o377 + 0xdeadbeef + 0xDead_Beef -Some examples of integer literals:: +An underscore can follow the base specifier. +For example, ``0x_1f`` is a valid literal, but ``0_x1f`` and ``0x__1f`` are +not. - 7 2147483647 0o177 0b100110111 - 3 79228162514264337593543950336 0o377 0xdeadbeef - 100_000_000_000 0b_1110_0101 +Leading zeros in a non-zero decimal number are not allowed. +For example, ``0123`` is not a valid literal. +This is for disambiguation with C-style octal literals, which Python used +before version 3.0. + +Formally, integer literals are described by the following lexical definitions: + +.. grammar-snippet:: + :group: python-grammar + + integer: `decinteger` | `bininteger` | `octinteger` | `hexinteger` | `zerointeger` + decinteger: `nonzerodigit` (["_"] `digit`)* + bininteger: "0" ("b" | "B") (["_"] `bindigit`)+ + octinteger: "0" ("o" | "O") (["_"] `octdigit`)+ + hexinteger: "0" ("x" | "X") (["_"] `hexdigit`)+ + zerointeger: "0"+ (["_"] "0")* + nonzerodigit: "1"..."9" + digit: "0"..."9" + bindigit: "0" | "1" + octdigit: "0"..."7" + hexdigit: `digit` | "a"..."f" | "A"..."F" .. versionchanged:: 3.6 Underscores are now allowed for grouping purposes in literals. @@ -984,26 +1022,58 @@ Some examples of integer literals:: .. _floating: Floating-point literals ------------------------ +^^^^^^^^^^^^^^^^^^^^^^^ -Floating-point literals are described by the following lexical definitions: +Floating-point (float) literals, such as ``3.14`` or ``1.5``, denote +:ref:`approximations of real numbers `. -.. productionlist:: python-grammar - floatnumber: `pointfloat` | `exponentfloat` - pointfloat: [`digitpart`] `fraction` | `digitpart` "." - exponentfloat: (`digitpart` | `pointfloat`) `exponent` - digitpart: `digit` (["_"] `digit`)* - fraction: "." `digitpart` - exponent: ("e" | "E") ["+" | "-"] `digitpart` +They consist of *integer* and *fraction* parts, each composed of decimal digits. +The parts are separated by a decimal point, ``.``:: + + 2.71828 + 4.0 + +Unlike in integer literals, leading zeros are allowed in the numeric parts. +For example, ``077.010`` is legal, and denotes the same number as ``77.10``. + +As in integer literals, single underscores may occur between digits to help +readability:: + + 96_485.332_123 + 3.14_15_93 -Note that the integer and exponent parts are always interpreted using radix 10. -For example, ``077e010`` is legal, and denotes the same number as ``77e10``. The -allowed range of floating-point literals is implementation-dependent. As in -integer literals, underscores are supported for digit grouping. +Either of these parts, but not both, can be empty. For example:: -Some examples of floating-point literals:: + 10. # (equivalent to 10.0) + .001 # (equivalent to 0.001) - 3.14 10. .001 1e100 3.14e-10 0e0 3.14_15_93 +Optionally, the integer and fraction may be followed by an *exponent*: +the letter ``e`` or ``E``, followed by an optional sign, ``+`` or ``-``, +and a number in the same format as the integer and fraction parts. +The ``e`` or ``E`` represents "times ten raised to the power of":: + + 1.0e3 # (represents 1.0×10³, or 1000.0) + 1.166e-5 # (represents 1.166×10⁻⁵, or 0.00001166) + 6.02214076e+23 # (represents 6.02214076×10²³, or 602214076000000000000000.) + +In floats with only integer and exponent parts, the decimal point may be +omitted:: + + 1e3 # (equivalent to 1.e3 and 1.0e3) + 0e0 # (equivalent to 0.) + +Formally, floating-point literals are described by the following +lexical definitions: + +.. grammar-snippet:: + :group: python-grammar + + floatnumber: + | `digitpart` "." [`digitpart`] [`exponent`] + | "." `digitpart` [`exponent`] + | `digitpart` `exponent` + digitpart: `digit` (["_"] `digit`)* + exponent: ("e" | "E") ["+" | "-"] `digitpart` .. versionchanged:: 3.6 Underscores are now allowed for grouping purposes in literals. @@ -1014,20 +1084,62 @@ Some examples of floating-point literals:: .. _imaginary: Imaginary literals ------------------- +^^^^^^^^^^^^^^^^^^ -Imaginary literals are described by the following lexical definitions: +Python has :ref:`complex number ` objects, but no complex +literals. +Instead, *imaginary literals* denote complex numbers with a zero +real part. -.. productionlist:: python-grammar - imagnumber: (`floatnumber` | `digitpart`) ("j" | "J") +For example, in math, the complex number 3+4.2\ *i* is written +as the real number 3 added to the imaginary number 4.2\ *i*. +Python uses a similar syntax, except the imaginary unit is written as ``j`` +rather than *i*:: + + 3+4.2j + +This is an expression composed +of the :ref:`integer literal ` ``3``, +the :ref:`operator ` '``+``', +and the :ref:`imaginary literal ` ``4.2j``. +Since these are three separate tokens, whitespace is allowed between them:: -An imaginary literal yields a complex number with a real part of 0.0. Complex -numbers are represented as a pair of floating-point numbers and have the same -restrictions on their range. To create a complex number with a nonzero real -part, add a floating-point number to it, e.g., ``(3+4j)``. Some examples of -imaginary literals:: + 3 + 4.2j - 3.14j 10.j 10j .001j 1e100j 3.14e-10j 3.14_15_93j +No whitespace is allowed *within* each token. +In particular, the ``j`` suffix, may not be separated from the number +before it. + +The number before the ``j`` has the same syntax as a floating-point literal. +Thus, the following are valid imaginary literals:: + + 4.2j + 3.14j + 10.j + .001j + 1e100j + 3.14e-10j + 3.14_15_93j + +Unlike in a floating-point literal the decimal point can be omitted if the +imaginary number only has an integer part. +The number is still evaluated as a floating-point number, not an integer:: + + 10j + 0j + 1000000000000000000000000j # equivalent to 1e+24j + +The ``j`` suffix is case-insensitive. +That means you can use ``J`` instead:: + + 3.14J # equivalent to 3.14j + +Formally, imaginary literals are described by the following lexical definition: + +.. grammar-snippet:: + :group: python-grammar + + imagnumber: (`floatnumber` | `digitpart`) ("j" | "J") .. _operators: From c55512311b7cb8b7c27c19f56cd8f872be29aedc Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 18 Jun 2025 18:26:01 +0300 Subject: [PATCH 605/989] gh-135376: Fix and improve test_random (GH-135377) * Remove duplicated code. Tests for Random and SystemRandom now share the code. * Move implementation agnostic tests that was only run for SystemRandom, so they are now run for Random too. * Add tests for __index__() support. * Add tests for randint(). --- Lib/test/test_random.py | 295 ++++++++++++++++++---------------------- 1 file changed, 134 insertions(+), 161 deletions(-) diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 31ebcb3b8b0b00..0217ebd132b110 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -151,6 +151,7 @@ def test_sample(self): # Exception raised if size of sample exceeds that of population self.assertRaises(ValueError, self.gen.sample, population, N+1) self.assertRaises(ValueError, self.gen.sample, [], -1) + self.assertRaises(TypeError, self.gen.sample, population, 1.0) def test_sample_distribution(self): # For the entire allowable range of 0 <= k <= N, validate that @@ -268,6 +269,7 @@ def test_choices(self): choices(data, range(4), k=5), choices(k=5, population=data, weights=range(4)), choices(k=5, population=data, cum_weights=range(4)), + choices(data, k=MyIndex(5)), ]: self.assertEqual(len(sample), 5) self.assertEqual(type(sample), list) @@ -378,122 +380,40 @@ def test_gauss(self): self.assertEqual(x1, x2) self.assertEqual(y1, y2) + @support.requires_IEEE_754 + def test_53_bits_per_float(self): + span = 2 ** 53 + cum = 0 + for i in range(100): + cum |= int(self.gen.random() * span) + self.assertEqual(cum, span-1) + def test_getrandbits(self): + getrandbits = self.gen.getrandbits # Verify ranges for k in range(1, 1000): - self.assertTrue(0 <= self.gen.getrandbits(k) < 2**k) - self.assertEqual(self.gen.getrandbits(0), 0) + self.assertTrue(0 <= getrandbits(k) < 2**k) + self.assertEqual(getrandbits(0), 0) # Verify all bits active - getbits = self.gen.getrandbits for span in [1, 2, 3, 4, 31, 32, 32, 52, 53, 54, 119, 127, 128, 129]: all_bits = 2**span-1 cum = 0 cpl_cum = 0 for i in range(100): - v = getbits(span) + v = getrandbits(span) cum |= v cpl_cum |= all_bits ^ v self.assertEqual(cum, all_bits) self.assertEqual(cpl_cum, all_bits) # Verify argument checking - self.assertRaises(TypeError, self.gen.getrandbits) - self.assertRaises(TypeError, self.gen.getrandbits, 1, 2) - self.assertRaises(ValueError, self.gen.getrandbits, -1) - self.assertRaises(OverflowError, self.gen.getrandbits, 1<<1000) - self.assertRaises(ValueError, self.gen.getrandbits, -1<<1000) - self.assertRaises(TypeError, self.gen.getrandbits, 10.1) - - def test_pickling(self): - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - state = pickle.dumps(self.gen, proto) - origseq = [self.gen.random() for i in range(10)] - newgen = pickle.loads(state) - restoredseq = [newgen.random() for i in range(10)] - self.assertEqual(origseq, restoredseq) - - def test_bug_1727780(self): - # verify that version-2-pickles can be loaded - # fine, whether they are created on 32-bit or 64-bit - # platforms, and that version-3-pickles load fine. - files = [("randv2_32.pck", 780), - ("randv2_64.pck", 866), - ("randv3.pck", 343)] - for file, value in files: - with open(support.findfile(file),"rb") as f: - r = pickle.load(f) - self.assertEqual(int(r.random()*1000), value) - - def test_bug_9025(self): - # Had problem with an uneven distribution in int(n*random()) - # Verify the fix by checking that distributions fall within expectations. - n = 100000 - randrange = self.gen.randrange - k = sum(randrange(6755399441055744) % 3 == 2 for i in range(n)) - self.assertTrue(0.30 < k/n < .37, (k/n)) - - def test_randbytes(self): - # Verify ranges - for n in range(1, 10): - data = self.gen.randbytes(n) - self.assertEqual(type(data), bytes) - self.assertEqual(len(data), n) - - self.assertEqual(self.gen.randbytes(0), b'') - - # Verify argument checking - self.assertRaises(TypeError, self.gen.randbytes) - self.assertRaises(TypeError, self.gen.randbytes, 1, 2) - self.assertRaises(ValueError, self.gen.randbytes, -1) - self.assertRaises(OverflowError, self.gen.randbytes, 1<<1000) - self.assertRaises((ValueError, OverflowError), self.gen.randbytes, -1<<1000) - self.assertRaises(TypeError, self.gen.randbytes, 1.0) - - def test_mu_sigma_default_args(self): - self.assertIsInstance(self.gen.normalvariate(), float) - self.assertIsInstance(self.gen.gauss(), float) - - -try: - random.SystemRandom().random() -except NotImplementedError: - SystemRandom_available = False -else: - SystemRandom_available = True - -@unittest.skipUnless(SystemRandom_available, "random.SystemRandom not available") -class SystemRandom_TestBasicOps(TestBasicOps, unittest.TestCase): - gen = random.SystemRandom() - - def test_autoseed(self): - # Doesn't need to do anything except not fail - self.gen.seed() - - def test_saverestore(self): - self.assertRaises(NotImplementedError, self.gen.getstate) - self.assertRaises(NotImplementedError, self.gen.setstate, None) - - def test_seedargs(self): - # Doesn't need to do anything except not fail - self.gen.seed(100) - - def test_gauss(self): - self.gen.gauss_next = None - self.gen.seed(100) - self.assertEqual(self.gen.gauss_next, None) - - def test_pickling(self): - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - self.assertRaises(NotImplementedError, pickle.dumps, self.gen, proto) - - def test_53_bits_per_float(self): - # This should pass whenever a C double has 53 bit precision. - span = 2 ** 53 - cum = 0 - for i in range(100): - cum |= int(self.gen.random() * span) - self.assertEqual(cum, span-1) + self.assertRaises(TypeError, getrandbits) + self.assertRaises(TypeError, getrandbits, 1, 2) + self.assertRaises(ValueError, getrandbits, -1) + self.assertRaises(OverflowError, getrandbits, 1<<1000) + self.assertRaises(ValueError, getrandbits, -1<<1000) + self.assertRaises(TypeError, getrandbits, 10.1) def test_bigrand(self): # The randrange routine should build-up the required number of bits @@ -572,6 +492,10 @@ def test_randrange_step(self): randrange(1000, step=100) with self.assertRaises(TypeError): randrange(1000, None, step=100) + with self.assertRaises(TypeError): + randrange(1000, step=MyIndex(1)) + with self.assertRaises(TypeError): + randrange(1000, None, step=MyIndex(1)) def test_randbelow_logic(self, _log=log, int=int): # check bitcount transition points: 2**i and 2**(i+1)-1 @@ -594,6 +518,116 @@ def test_randbelow_logic(self, _log=log, int=int): self.assertEqual(k, numbits) # note the stronger assertion self.assertTrue(2**k > n > 2**(k-1)) # note the stronger assertion + def test_randrange_index(self): + randrange = self.gen.randrange + self.assertIn(randrange(MyIndex(5)), range(5)) + self.assertIn(randrange(MyIndex(2), MyIndex(7)), range(2, 7)) + self.assertIn(randrange(MyIndex(5), MyIndex(15), MyIndex(2)), range(5, 15, 2)) + + def test_randint(self): + randint = self.gen.randint + self.assertIn(randint(2, 5), (2, 3, 4, 5)) + self.assertEqual(randint(2, 2), 2) + self.assertIn(randint(MyIndex(2), MyIndex(5)), (2, 3, 4, 5)) + self.assertEqual(randint(MyIndex(2), MyIndex(2)), 2) + + self.assertRaises(ValueError, randint, 5, 2) + self.assertRaises(TypeError, randint) + self.assertRaises(TypeError, randint, 2) + self.assertRaises(TypeError, randint, 2, 5, 1) + self.assertRaises(TypeError, randint, 2.0, 5) + self.assertRaises(TypeError, randint, 2, 5.0) + + def test_pickling(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + state = pickle.dumps(self.gen, proto) + origseq = [self.gen.random() for i in range(10)] + newgen = pickle.loads(state) + restoredseq = [newgen.random() for i in range(10)] + self.assertEqual(origseq, restoredseq) + + def test_bug_1727780(self): + # verify that version-2-pickles can be loaded + # fine, whether they are created on 32-bit or 64-bit + # platforms, and that version-3-pickles load fine. + files = [("randv2_32.pck", 780), + ("randv2_64.pck", 866), + ("randv3.pck", 343)] + for file, value in files: + with open(support.findfile(file),"rb") as f: + r = pickle.load(f) + self.assertEqual(int(r.random()*1000), value) + + def test_bug_9025(self): + # Had problem with an uneven distribution in int(n*random()) + # Verify the fix by checking that distributions fall within expectations. + n = 100000 + randrange = self.gen.randrange + k = sum(randrange(6755399441055744) % 3 == 2 for i in range(n)) + self.assertTrue(0.30 < k/n < .37, (k/n)) + + def test_randrange_bug_1590891(self): + start = 1000000000000 + stop = -100000000000000000000 + step = -200 + x = self.gen.randrange(start, stop, step) + self.assertTrue(stop < x <= start) + self.assertEqual((x+stop)%step, 0) + + def test_randbytes(self): + # Verify ranges + for n in range(1, 10): + data = self.gen.randbytes(n) + self.assertEqual(type(data), bytes) + self.assertEqual(len(data), n) + + self.assertEqual(self.gen.randbytes(0), b'') + + # Verify argument checking + self.assertRaises(TypeError, self.gen.randbytes) + self.assertRaises(TypeError, self.gen.randbytes, 1, 2) + self.assertRaises(ValueError, self.gen.randbytes, -1) + self.assertRaises(OverflowError, self.gen.randbytes, 1<<1000) + self.assertRaises((ValueError, OverflowError), self.gen.randbytes, -1<<1000) + self.assertRaises(TypeError, self.gen.randbytes, 1.0) + + def test_mu_sigma_default_args(self): + self.assertIsInstance(self.gen.normalvariate(), float) + self.assertIsInstance(self.gen.gauss(), float) + + +try: + random.SystemRandom().random() +except NotImplementedError: + SystemRandom_available = False +else: + SystemRandom_available = True + +@unittest.skipUnless(SystemRandom_available, "random.SystemRandom not available") +class SystemRandom_TestBasicOps(TestBasicOps, unittest.TestCase): + gen = random.SystemRandom() + + def test_autoseed(self): + # Doesn't need to do anything except not fail + self.gen.seed() + + def test_saverestore(self): + self.assertRaises(NotImplementedError, self.gen.getstate) + self.assertRaises(NotImplementedError, self.gen.setstate, None) + + def test_seedargs(self): + # Doesn't need to do anything except not fail + self.gen.seed(100) + + def test_gauss(self): + self.gen.gauss_next = None + self.gen.seed(100) + self.assertEqual(self.gen.gauss_next, None) + + def test_pickling(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + self.assertRaises(NotImplementedError, pickle.dumps, self.gen, proto) + class TestRawMersenneTwister(unittest.TestCase): @test.support.cpython_only @@ -779,38 +813,6 @@ def test_long_seed(self): seed = (1 << (10000 * 8)) - 1 # about 10K bytes self.gen.seed(seed) - def test_53_bits_per_float(self): - # This should pass whenever a C double has 53 bit precision. - span = 2 ** 53 - cum = 0 - for i in range(100): - cum |= int(self.gen.random() * span) - self.assertEqual(cum, span-1) - - def test_bigrand(self): - # The randrange routine should build-up the required number of bits - # in stages so that all bit positions are active. - span = 2 ** 500 - cum = 0 - for i in range(100): - r = self.gen.randrange(span) - self.assertTrue(0 <= r < span) - cum |= r - self.assertEqual(cum, span-1) - - def test_bigrand_ranges(self): - for i in [40,80, 160, 200, 211, 250, 375, 512, 550]: - start = self.gen.randrange(2 ** (i-2)) - stop = self.gen.randrange(2 ** i) - if stop <= start: - continue - self.assertTrue(start <= self.gen.randrange(start, stop) < stop) - - def test_rangelimits(self): - for start, stop in [(-2,0), (-(2**60)-2,-(2**60)), (2**60,2**60+2)]: - self.assertEqual(set(range(start,stop)), - set([self.gen.randrange(start,stop) for i in range(100)])) - def test_getrandbits(self): super().test_getrandbits() @@ -848,27 +850,6 @@ def test_randrange_uses_getrandbits(self): self.assertEqual(self.gen.randrange(2**99), 97904845777343510404718956115) - def test_randbelow_logic(self, _log=log, int=int): - # check bitcount transition points: 2**i and 2**(i+1)-1 - # show that: k = int(1.001 + _log(n, 2)) - # is equal to or one greater than the number of bits in n - for i in range(1, 1000): - n = 1 << i # check an exact power of two - numbits = i+1 - k = int(1.00001 + _log(n, 2)) - self.assertEqual(k, numbits) - self.assertEqual(n, 2**(k-1)) - - n += n - 1 # check 1 below the next power of two - k = int(1.00001 + _log(n, 2)) - self.assertIn(k, [numbits, numbits+1]) - self.assertTrue(2**k > n > 2**(k-2)) - - n -= n >> 15 # check a little farther below the next power of two - k = int(1.00001 + _log(n, 2)) - self.assertEqual(k, numbits) # note the stronger assertion - self.assertTrue(2**k > n > 2**(k-1)) # note the stronger assertion - def test_randbelow_without_getrandbits(self): # Random._randbelow() can only use random() when the built-in one # has been overridden but no new getrandbits() method was supplied. @@ -903,14 +884,6 @@ def test_randbelow_without_getrandbits(self): self.gen._randbelow_without_getrandbits(n, maxsize=maxsize) self.assertEqual(random_mock.call_count, 2) - def test_randrange_bug_1590891(self): - start = 1000000000000 - stop = -100000000000000000000 - step = -200 - x = self.gen.randrange(start, stop, step) - self.assertTrue(stop < x <= start) - self.assertEqual((x+stop)%step, 0) - def test_choices_algorithms(self): # The various ways of specifying weights should produce the same results choices = self.gen.choices From 4dea6b48cc24aede7250534923d3ce9f9c8b87e6 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Wed, 18 Jun 2025 22:11:35 +0530 Subject: [PATCH 606/989] gh-135639: fix `test_cycle` test (#135662) --- Lib/test/test_free_threading/test_itertools.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_free_threading/test_itertools.py b/Lib/test/test_free_threading/test_itertools.py index 8360afbf78cadd..b8663ade1d4aca 100644 --- a/Lib/test/test_free_threading/test_itertools.py +++ b/Lib/test/test_free_threading/test_itertools.py @@ -44,7 +44,10 @@ def test_cycle(self): def work(it): barrier.wait() for _ in range(number_of_cycles): - _ = next(it) + try: + next(it) + except StopIteration: + pass data = (1, 2, 3, 4) for it in range(number_of_iterations): From 46c60e0d0b716e8e6f0b74a0f9d0542605b1efd4 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Thu, 19 Jun 2025 02:32:43 +0900 Subject: [PATCH 607/989] Docs: Fix markups for emphasis (GH-135598) The word emphasis character `_` is not supported as sphinx markup, so changed to `*`. --- Doc/c-api/refcounting.rst | 2 +- Doc/library/logging.config.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/refcounting.rst b/Doc/c-api/refcounting.rst index b23f016f9b0a06..57a0728d4e9af4 100644 --- a/Doc/c-api/refcounting.rst +++ b/Doc/c-api/refcounting.rst @@ -210,7 +210,7 @@ of Python objects. Py_SETREF(dst, src); - That arranges to set *dst* to *src* _before_ releasing the reference + That arranges to set *dst* to *src* *before* releasing the reference to the old value of *dst*, so that any code triggered as a side-effect of *dst* getting torn down no longer believes *dst* points to a valid object. diff --git a/Doc/library/logging.config.rst b/Doc/library/logging.config.rst index f8c71005a53028..96cca3073fec7e 100644 --- a/Doc/library/logging.config.rst +++ b/Doc/library/logging.config.rst @@ -586,7 +586,7 @@ configuration dictionary for the handler named ``foo``, and later (once that handler has been configured) it points to the configured handler instance. Thus, ``cfg://handlers.foo`` could resolve to either a dictionary or a handler instance. In general, it is wise to name handlers in a way such that dependent -handlers are configured _after_ any handlers they depend on; that allows +handlers are configured *after* any handlers they depend on; that allows something like ``cfg://handlers.foo`` to be used in configuring a handler that depends on handler ``foo``. If that dependent handler were named ``bar``, problems would result, because the configuration of ``bar`` would be attempted From 17ac3933c3c860e08f7963cf270116a39a063be7 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 18 Jun 2025 14:24:05 -0400 Subject: [PATCH 608/989] gh-135641: Fix flaky `test_capi.test_lock_two_threads` test case (gh-135642) The mutex may have the `_Py_HAS_PARKED` bit set. --- Modules/_testinternalcapi/test_lock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c index 8d678412fe7179..8d8cb992b0e07f 100644 --- a/Modules/_testinternalcapi/test_lock.c +++ b/Modules/_testinternalcapi/test_lock.c @@ -57,7 +57,10 @@ lock_thread(void *arg) _Py_atomic_store_int(&test_data->started, 1); PyMutex_Lock(m); - assert(m->_bits == 1); + // gh-135641: in rare cases the lock may still have `_Py_HAS_PARKED` set + // (m->_bits == 3) due to bucket collisions in the parking lot hash table + // between this mutex and the `test_data.done` event. + assert(m->_bits == 1 || m->_bits == 3); PyMutex_Unlock(m); assert(m->_bits == 0); From e9b647dd30d22cef465972d898a34c4b1bb6615d Mon Sep 17 00:00:00 2001 From: alexey semenyuk Date: Thu, 19 Jun 2025 02:10:20 +0500 Subject: [PATCH 609/989] gh-134538: Add link to shutil.rmtree example in function docs (GH-135540) --- Doc/library/shutil.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index c78dfe1aafa0ed..e7c4c4f46bd011 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -327,6 +327,10 @@ Directory and files operations The deprecated *onerror* is similar to *onexc*, except that the third parameter it receives is the tuple returned from :func:`sys.exc_info`. + .. seealso:: + :ref:`shutil-rmtree-example` for an example of handling the removal + of a directory tree that contains read-only files. + .. audit-event:: shutil.rmtree path,dir_fd shutil.rmtree .. versionchanged:: 3.3 From 15f2bac02c5e106f04a93ce73fd93cc305253405 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Wed, 18 Jun 2025 19:31:23 -0400 Subject: [PATCH 610/989] gh-135450: Remove assertion in `_PyCode_CheckNoExternalState` (gh-135466) The assertion reflected a misunderstanding of situations where "hidden" variables might exist, namely generator expressions and comprehensions. --- Lib/test/test_interpreters/test_api.py | 16 ++++++++++++++++ Objects/codeobject.c | 1 - 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 612e8240603f03..0ee4582b5d1568 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -944,6 +944,22 @@ def test_created_with_capi(self): with self.assertRaisesRegex(InterpreterError, 'unrecognized'): interp.exec('raise Exception("it worked!")') + def test_list_comprehension(self): + # gh-135450: List comprehensions caused an assertion failure + # in _PyCode_CheckNoExternalState() + import string + r_interp, w_interp = self.pipe() + + interp = interpreters.create() + interp.exec(f"""if True: + import os + comp = [str(i) for i in range(10)] + os.write({w_interp}, ''.join(comp).encode()) + """) + self.assertEqual(os.read(r_interp, 10).decode(), string.digits) + interp.close() + + # test__interpreters covers the remaining # Interpreter.exec() behavior. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 34b50ef97d544b..3f53d4cfeb20ac 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1999,7 +1999,6 @@ _PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts, const char **p_errmsg) { const char *errmsg = NULL; - assert(counts->locals.hidden.total == 0); if (counts->numfree > 0) { // It's a closure. errmsg = "closures not supported"; } From 725da50520c5cc3002f6f55bf67c56853507c604 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 18 Jun 2025 17:57:14 -0600 Subject: [PATCH 611/989] gh-133485: Use interpreters.Interpreter in InterpreterPoolExecutor (gh-133957) Most importantly, this resolves the issues with functions and types defined in __main__. It also expands the number of supported objects and simplifies the implementation. --- Doc/library/concurrent.futures.rst | 19 +- Lib/concurrent/futures/interpreter.py | 181 +++--------- Lib/test/test_concurrent_futures/test_init.py | 4 + .../test_interpreter_pool.py | 263 ++++++++++++++---- Python/crossinterp.c | 4 +- 5 files changed, 266 insertions(+), 205 deletions(-) diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index 3c8d9ab111e09e..dd92765038c4f7 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -265,7 +265,7 @@ Each worker's interpreter is isolated from all the other interpreters. "Isolated" means each interpreter has its own runtime state and operates completely independently. For example, if you redirect :data:`sys.stdout` in one interpreter, it will not be automatically -redirected any other interpreter. If you import a module in one +redirected to any other interpreter. If you import a module in one interpreter, it is not automatically imported in any other. You would need to import the module separately in interpreter where you need it. In fact, each module imported in an interpreter is @@ -287,7 +287,7 @@ efficient alternative is to serialize with :mod:`pickle` and then send the bytes over a shared :mod:`socket ` or :func:`pipe `. -.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=(), shared=None) +.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=()) A :class:`ThreadPoolExecutor` subclass that executes calls asynchronously using a pool of at most *max_workers* threads. Each thread runs @@ -304,21 +304,10 @@ the bytes over a shared :mod:`socket ` or and *initargs* using :mod:`pickle` when sending them to the worker's interpreter. - .. note:: - Functions defined in the ``__main__`` module cannot be pickled - and thus cannot be used. - .. note:: The executor may replace uncaught exceptions from *initializer* with :class:`~concurrent.futures.interpreter.ExecutionFailed`. - The optional *shared* argument is a :class:`dict` of objects that all - interpreters in the pool share. The *shared* items are added to each - interpreter's ``__main__`` module. Not all objects are shareable. - Shareable objects include the builtin singletons, :class:`str` - and :class:`bytes`, and :class:`memoryview`. See :pep:`734` - for more info. - Other caveats from parent :class:`ThreadPoolExecutor` apply here. :meth:`~Executor.submit` and :meth:`~Executor.map` work like normal, @@ -326,10 +315,6 @@ except the worker serializes the callable and arguments using :mod:`pickle` when sending them to its interpreter. The worker likewise serializes the return value when sending it back. -.. note:: - Functions defined in the ``__main__`` module cannot be pickled - and thus cannot be used. - When a worker's current task raises an uncaught exception, the worker always tries to preserve the exception as-is. If that is successful then it also sets the ``__cause__`` to a corresponding diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py index f12b4ac33cda27..cbb60ce80c1813 100644 --- a/Lib/concurrent/futures/interpreter.py +++ b/Lib/concurrent/futures/interpreter.py @@ -1,56 +1,39 @@ """Implements InterpreterPoolExecutor.""" -import contextlib -import pickle +from concurrent import interpreters +import sys import textwrap from . import thread as _thread -import _interpreters -import _interpqueues +import traceback -class ExecutionFailed(_interpreters.InterpreterError): - """An unhandled exception happened during execution.""" - - def __init__(self, excinfo): - msg = excinfo.formatted - if not msg: - if excinfo.type and excinfo.msg: - msg = f'{excinfo.type.__name__}: {excinfo.msg}' - else: - msg = excinfo.type.__name__ or excinfo.msg - super().__init__(msg) - self.excinfo = excinfo - - def __str__(self): +def do_call(results, func, args, kwargs): + try: + return func(*args, **kwargs) + except BaseException as exc: + # Send the captured exception out on the results queue, + # but still leave it unhandled for the interpreter to handle. try: - formatted = self.excinfo.errdisplay - except Exception: - return super().__str__() - else: - return textwrap.dedent(f""" -{super().__str__()} - -Uncaught in the interpreter: - -{formatted} - """.strip()) + results.put(exc) + except interpreters.NotShareableError: + # The exception is not shareable. + print('exception is not shareable:', file=sys.stderr) + traceback.print_exception(exc) + results.put(None) + raise # re-raise class WorkerContext(_thread.WorkerContext): @classmethod - def prepare(cls, initializer, initargs, shared): + def prepare(cls, initializer, initargs): def resolve_task(fn, args, kwargs): if isinstance(fn, str): # XXX Circle back to this later. raise TypeError('scripts not supported') else: - # Functions defined in the __main__ module can't be pickled, - # so they can't be used here. In the future, we could possibly - # borrow from multiprocessing to work around this. task = (fn, args, kwargs) - data = pickle.dumps(task) - return data + return task if initializer is not None: try: @@ -62,68 +45,24 @@ def resolve_task(fn, args, kwargs): else: initdata = None def create_context(): - return cls(initdata, shared) + return cls(initdata) return create_context, resolve_task - @classmethod - @contextlib.contextmanager - def _capture_exc(cls, resultsid): - try: - yield - except BaseException as exc: - # Send the captured exception out on the results queue, - # but still leave it unhandled for the interpreter to handle. - _interpqueues.put(resultsid, (None, exc)) - raise # re-raise - - @classmethod - def _send_script_result(cls, resultsid): - _interpqueues.put(resultsid, (None, None)) - - @classmethod - def _call(cls, func, args, kwargs, resultsid): - with cls._capture_exc(resultsid): - res = func(*args or (), **kwargs or {}) - # Send the result back. - with cls._capture_exc(resultsid): - _interpqueues.put(resultsid, (res, None)) - - @classmethod - def _call_pickled(cls, pickled, resultsid): - with cls._capture_exc(resultsid): - fn, args, kwargs = pickle.loads(pickled) - cls._call(fn, args, kwargs, resultsid) - - def __init__(self, initdata, shared=None): + def __init__(self, initdata): self.initdata = initdata - self.shared = dict(shared) if shared else None - self.interpid = None - self.resultsid = None + self.interp = None + self.results = None def __del__(self): - if self.interpid is not None: + if self.interp is not None: self.finalize() - def _exec(self, script): - assert self.interpid is not None - excinfo = _interpreters.exec(self.interpid, script, restrict=True) - if excinfo is not None: - raise ExecutionFailed(excinfo) - def initialize(self): - assert self.interpid is None, self.interpid - self.interpid = _interpreters.create(reqrefs=True) + assert self.interp is None, self.interp + self.interp = interpreters.create() try: - _interpreters.incref(self.interpid) - maxsize = 0 - self.resultsid = _interpqueues.create(maxsize) - - self._exec(f'from {__name__} import WorkerContext') - - if self.shared: - _interpreters.set___main___attrs( - self.interpid, self.shared, restrict=True) + self.results = interpreters.create_queue(maxsize) if self.initdata: self.run(self.initdata) @@ -132,53 +71,25 @@ def initialize(self): raise # re-raise def finalize(self): - interpid = self.interpid - resultsid = self.resultsid - self.resultsid = None - self.interpid = None - if resultsid is not None: - try: - _interpqueues.destroy(resultsid) - except _interpqueues.QueueNotFoundError: - pass - if interpid is not None: - try: - _interpreters.decref(interpid) - except _interpreters.InterpreterNotFoundError: - pass + interp = self.interp + results = self.results + self.results = None + self.interp = None + if results is not None: + del results + if interp is not None: + interp.close() def run(self, task): - data = task - script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})' - try: - self._exec(script) - except ExecutionFailed as exc: - exc_wrapper = exc - else: - exc_wrapper = None - - # Return the result, or raise the exception. - while True: - try: - obj = _interpqueues.get(self.resultsid) - except _interpqueues.QueueNotFoundError: + return self.interp.call(do_call, self.results, *task) + except interpreters.ExecutionFailed as wrapper: + # Wait for the exception data to show up. + exc = self.results.get() + if exc is None: + # The exception must have been not shareable. raise # re-raise - except _interpqueues.QueueError: - continue - except ModuleNotFoundError: - # interpreters._queues doesn't exist, which means - # QueueEmpty doesn't. Act as though it does. - continue - else: - break - (res, exc), unboundop = obj - assert unboundop is None, unboundop - if exc is not None: - assert res is None, res - assert exc_wrapper is not None - raise exc from exc_wrapper - return res + raise exc from wrapper class BrokenInterpreterPool(_thread.BrokenThreadPool): @@ -192,11 +103,11 @@ class InterpreterPoolExecutor(_thread.ThreadPoolExecutor): BROKEN = BrokenInterpreterPool @classmethod - def prepare_context(cls, initializer, initargs, shared): - return WorkerContext.prepare(initializer, initargs, shared) + def prepare_context(cls, initializer, initargs): + return WorkerContext.prepare(initializer, initargs) def __init__(self, max_workers=None, thread_name_prefix='', - initializer=None, initargs=(), shared=None): + initializer=None, initargs=()): """Initializes a new InterpreterPoolExecutor instance. Args: @@ -206,8 +117,6 @@ def __init__(self, max_workers=None, thread_name_prefix='', initializer: A callable or script used to initialize each worker interpreter. initargs: A tuple of arguments to pass to the initializer. - shared: A mapping of shareabled objects to be inserted into - each worker interpreter. """ super().__init__(max_workers, thread_name_prefix, - initializer, initargs, shared=shared) + initializer, initargs) diff --git a/Lib/test/test_concurrent_futures/test_init.py b/Lib/test/test_concurrent_futures/test_init.py index df640929309318..6b8484c0d5f197 100644 --- a/Lib/test/test_concurrent_futures/test_init.py +++ b/Lib/test/test_concurrent_futures/test_init.py @@ -20,6 +20,10 @@ def init(x): global INITIALIZER_STATUS INITIALIZER_STATUS = x + # InterpreterPoolInitializerTest.test_initializer fails + # if we don't have a LOAD_GLOBAL. (It could be any global.) + # We will address this separately. + INITIALIZER_STATUS def get_init_status(): return INITIALIZER_STATUS diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py index 5fd5684e1035e9..844dfdd6fc901c 100644 --- a/Lib/test/test_concurrent_futures/test_interpreter_pool.py +++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py @@ -2,35 +2,78 @@ import contextlib import io import os -import pickle +import sys import time import unittest -from concurrent.futures.interpreter import ( - ExecutionFailed, BrokenInterpreterPool, -) +from concurrent.futures.interpreter import BrokenInterpreterPool +from concurrent import interpreters from concurrent.interpreters import _queues as queues import _interpreters from test import support +from test.support import os_helper +from test.support import script_helper import test.test_asyncio.utils as testasyncio_utils from .executor import ExecutorTest, mul from .util import BaseTestCase, InterpreterPoolMixin, setup_module +WINDOWS = sys.platform.startswith('win') + + +@contextlib.contextmanager +def nonblocking(fd): + blocking = os.get_blocking(fd) + if blocking: + os.set_blocking(fd, False) + try: + yield + finally: + if blocking: + os.set_blocking(fd, blocking) + + +def read_file_with_timeout(fd, nbytes, timeout): + with nonblocking(fd): + end = time.time() + timeout + try: + return os.read(fd, nbytes) + except BlockingIOError: + pass + while time.time() < end: + try: + return os.read(fd, nbytes) + except BlockingIOError: + continue + else: + raise TimeoutError('nothing to read') + + +if not WINDOWS: + import select + def read_file_with_timeout(fd, nbytes, timeout): + r, _, _ = select.select([fd], [], [], timeout) + if fd not in r: + raise TimeoutError('nothing to read') + return os.read(fd, nbytes) + + def noop(): pass def write_msg(fd, msg): + import os os.write(fd, msg + b'\0') -def read_msg(fd): +def read_msg(fd, timeout=10.0): msg = b'' - while ch := os.read(fd, 1): - if ch == b'\0': - return msg + ch = read_file_with_timeout(fd, 1, timeout) + while ch != b'\0': msg += ch + ch = os.read(fd, 1) + return msg def get_current_name(): @@ -113,6 +156,38 @@ def test_init_func(self): self.assertEqual(before, b'\0') self.assertEqual(after, msg) + def test_init_with___main___global(self): + # See https://github.com/python/cpython/pull/133957#issuecomment-2927415311. + text = """if True: + from concurrent.futures import InterpreterPoolExecutor + + INITIALIZER_STATUS = 'uninitialized' + + def init(x): + global INITIALIZER_STATUS + INITIALIZER_STATUS = x + INITIALIZER_STATUS + + def get_init_status(): + return INITIALIZER_STATUS + + if __name__ == "__main__": + exe = InterpreterPoolExecutor(initializer=init, + initargs=('initialized',)) + fut = exe.submit(get_init_status) + print(fut.result()) # 'initialized' + exe.shutdown(wait=True) + print(INITIALIZER_STATUS) # 'uninitialized' + """ + with os_helper.temp_dir() as tempdir: + filename = script_helper.make_script(tempdir, 'my-script', text) + res = script_helper.assert_python_ok(filename) + stdout = res.out.decode('utf-8').strip() + self.assertEqual(stdout.splitlines(), [ + 'initialized', + 'uninitialized', + ]) + def test_init_closure(self): count = 0 def init1(): @@ -121,10 +196,19 @@ def init2(): nonlocal count count += 1 - with self.assertRaises(pickle.PicklingError): - self.executor_type(initializer=init1) - with self.assertRaises(pickle.PicklingError): - self.executor_type(initializer=init2) + with contextlib.redirect_stderr(io.StringIO()) as stderr: + with self.executor_type(initializer=init1) as executor: + fut = executor.submit(lambda: None) + self.assertIn('NotShareableError', stderr.getvalue()) + with self.assertRaises(BrokenInterpreterPool): + fut.result() + + with contextlib.redirect_stderr(io.StringIO()) as stderr: + with self.executor_type(initializer=init2) as executor: + fut = executor.submit(lambda: None) + self.assertIn('NotShareableError', stderr.getvalue()) + with self.assertRaises(BrokenInterpreterPool): + fut.result() def test_init_instance_method(self): class Spam: @@ -132,26 +216,12 @@ def initializer(self): raise NotImplementedError spam = Spam() - with self.assertRaises(pickle.PicklingError): - self.executor_type(initializer=spam.initializer) - - def test_init_shared(self): - msg = b'eggs' - r, w = self.pipe() - script = f"""if True: - import os - if __name__ != '__main__': - import __main__ - spam = __main__.spam - os.write({w}, spam + b'\\0') - """ - - executor = self.executor_type(shared={'spam': msg}) - fut = executor.submit(exec, script) - fut.result() - after = read_msg(r) - - self.assertEqual(after, msg) + with contextlib.redirect_stderr(io.StringIO()) as stderr: + with self.executor_type(initializer=spam.initializer) as executor: + fut = executor.submit(lambda: None) + self.assertIn('NotShareableError', stderr.getvalue()) + with self.assertRaises(BrokenInterpreterPool): + fut.result() @unittest.expectedFailure def test_init_exception_in_script(self): @@ -178,8 +248,6 @@ def test_init_exception_in_func(self): stderr = stderr.getvalue() self.assertIn('ExecutionFailed: Exception: spam', stderr) self.assertIn('Uncaught in the interpreter:', stderr) - self.assertIn('The above exception was the direct cause of the following exception:', - stderr) @unittest.expectedFailure def test_submit_script(self): @@ -208,10 +276,14 @@ def task2(): return spam executor = self.executor_type() - with self.assertRaises(pickle.PicklingError): - executor.submit(task1) - with self.assertRaises(pickle.PicklingError): - executor.submit(task2) + + fut = executor.submit(task1) + with self.assertRaises(_interpreters.NotShareableError): + fut.result() + + fut = executor.submit(task2) + with self.assertRaises(_interpreters.NotShareableError): + fut.result() def test_submit_local_instance(self): class Spam: @@ -219,8 +291,9 @@ def __init__(self): self.value = True executor = self.executor_type() - with self.assertRaises(pickle.PicklingError): - executor.submit(Spam) + fut = executor.submit(Spam) + with self.assertRaises(_interpreters.NotShareableError): + fut.result() def test_submit_instance_method(self): class Spam: @@ -229,8 +302,9 @@ def run(self): spam = Spam() executor = self.executor_type() - with self.assertRaises(pickle.PicklingError): - executor.submit(spam.run) + fut = executor.submit(spam.run) + with self.assertRaises(_interpreters.NotShareableError): + fut.result() def test_submit_func_globals(self): executor = self.executor_type() @@ -242,13 +316,14 @@ def test_submit_func_globals(self): @unittest.expectedFailure def test_submit_exception_in_script(self): + # Scripts are not supported currently. fut = self.executor.submit('raise Exception("spam")') with self.assertRaises(Exception) as captured: fut.result() self.assertIs(type(captured.exception), Exception) self.assertEqual(str(captured.exception), 'spam') cause = captured.exception.__cause__ - self.assertIs(type(cause), ExecutionFailed) + self.assertIs(type(cause), interpreters.ExecutionFailed) for attr in ('__name__', '__qualname__', '__module__'): self.assertEqual(getattr(cause.excinfo.type, attr), getattr(Exception, attr)) @@ -261,7 +336,7 @@ def test_submit_exception_in_func(self): self.assertIs(type(captured.exception), Exception) self.assertEqual(str(captured.exception), 'spam') cause = captured.exception.__cause__ - self.assertIs(type(cause), ExecutionFailed) + self.assertIs(type(cause), interpreters.ExecutionFailed) for attr in ('__name__', '__qualname__', '__module__'): self.assertEqual(getattr(cause.excinfo.type, attr), getattr(Exception, attr)) @@ -269,16 +344,93 @@ def test_submit_exception_in_func(self): def test_saturation(self): blocker = queues.create() - executor = self.executor_type(4, shared=dict(blocker=blocker)) + executor = self.executor_type(4) for i in range(15 * executor._max_workers): - executor.submit(exec, 'import __main__; __main__.blocker.get()') - #executor.submit('blocker.get()') + executor.submit(blocker.get) self.assertEqual(len(executor._threads), executor._max_workers) for i in range(15 * executor._max_workers): blocker.put_nowait(None) executor.shutdown(wait=True) + def test_blocking(self): + # There is no guarantee that a worker will be created for every + # submitted task. That's because there's a race between: + # + # * a new worker thread, created when task A was just submitted, + # becoming non-idle when it picks up task A + # * after task B is added to the queue, a new worker thread + # is started only if there are no idle workers + # (the check in ThreadPoolExecutor._adjust_thread_count()) + # + # That means we must not block waiting for *all* tasks to report + # "ready" before we unblock the known-ready workers. + ready = queues.create() + blocker = queues.create() + + def run(taskid, ready, blocker): + # There can't be any globals here. + ready.put_nowait(taskid) + blocker.get() # blocking + + numtasks = 10 + futures = [] + with self.executor_type() as executor: + # Request the jobs. + for i in range(numtasks): + fut = executor.submit(run, i, ready, blocker) + futures.append(fut) + pending = numtasks + while pending > 0: + # Wait for any to be ready. + done = 0 + for _ in range(pending): + try: + ready.get(timeout=1) # blocking + except interpreters.QueueEmpty: + pass + else: + done += 1 + pending -= done + # Unblock the workers. + for _ in range(done): + blocker.put_nowait(None) + + def test_blocking_with_limited_workers(self): + # This is essentially the same as test_blocking, + # but we explicitly force a limited number of workers, + # instead of it happening implicitly sometimes due to a race. + ready = queues.create() + blocker = queues.create() + + def run(taskid, ready, blocker): + # There can't be any globals here. + ready.put_nowait(taskid) + blocker.get() # blocking + + numtasks = 10 + futures = [] + with self.executor_type(4) as executor: + # Request the jobs. + for i in range(numtasks): + fut = executor.submit(run, i, ready, blocker) + futures.append(fut) + pending = numtasks + while pending > 0: + # Wait for any to be ready. + done = 0 + for _ in range(pending): + try: + ready.get(timeout=1) # blocking + except interpreters.QueueEmpty: + pass + else: + done += 1 + pending -= done + # Unblock the workers. + for _ in range(done): + blocker.put_nowait(None) + @support.requires_gil_enabled("gh-117344: test is flaky without the GIL") def test_idle_thread_reuse(self): executor = self.executor_type() @@ -289,12 +441,21 @@ def test_idle_thread_reuse(self): executor.shutdown(wait=True) def test_pickle_errors_propagate(self): - # GH-125864: Pickle errors happen before the script tries to execute, so the - # queue used to wait infinitely. - + # GH-125864: Pickle errors happen before the script tries to execute, + # so the queue used to wait infinitely. fut = self.executor.submit(PickleShenanigans(0)) - with self.assertRaisesRegex(RuntimeError, "gotcha"): + expected = interpreters.NotShareableError + with self.assertRaisesRegex(expected, 'args not shareable') as cm: fut.result() + self.assertRegex(str(cm.exception.__cause__), 'unpickled') + + def test_no_stale_references(self): + # Weak references don't cross between interpreters. + raise unittest.SkipTest('not applicable') + + def test_free_reference(self): + # Weak references don't cross between interpreters. + raise unittest.SkipTest('not applicable') class AsyncioTest(InterpretersMixin, testasyncio_utils.TestCase): diff --git a/Python/crossinterp.c b/Python/crossinterp.c index aa958fe2f5cbd7..c44bcd559000b3 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -674,7 +674,9 @@ _PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled) finally: if (exc != NULL) { - sync_module_capture_exc(tstate, &ctx->main); + if (_PyErr_Occurred(tstate)) { + sync_module_capture_exc(tstate, &ctx->main); + } // We restore the original exception. // It might make sense to chain it (__context__). _PyErr_SetRaisedException(tstate, exc); From 140731ff671395fb7a869c2784429c14dc83fb27 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 19 Jun 2025 09:00:41 +0200 Subject: [PATCH 612/989] Document that PyType_GetModuleByDef returns a borrowed reference (GH-135666) --- Doc/c-api/type.rst | 4 ++++ Doc/data/refcounts.dat | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst index 2176b8e492f306..5bdbff4e0ad990 100644 --- a/Doc/c-api/type.rst +++ b/Doc/c-api/type.rst @@ -282,6 +282,10 @@ Type Objects and other places where a method's defining class cannot be passed using the :c:type:`PyCMethod` calling convention. + The returned reference is :term:`borrowed ` from *type*, + and will be valid as long as you hold a reference to *type*. + Do not release it with :c:func:`Py_DECREF` or similar. + .. versionadded:: 3.11 .. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index f5f02f0a79c93d..99cc823c0c3772 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -2385,6 +2385,10 @@ PyType_GetFlags:PyTypeObject*:type:0: PyType_GetName:PyObject*::+1: PyType_GetName:PyTypeObject*:type:0: +PyType_GetModuleByDef:PyObject*::0: +PyType_GetModuleByDef:PyTypeObject*:type:0: +PyType_GetModuleByDef:PyModuleDef*:def:: + PyType_GetQualName:PyObject*::+1: PyType_GetQualName:PyTypeObject*:type:0: From 7a20c72cb612653fa46c41c3a969aefa2b52738b Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 19 Jun 2025 11:36:56 +0200 Subject: [PATCH 613/989] gh-111758: Run UBSan in GitHub Actions (GH-135578) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .github/workflows/build.yml | 8 +++ .github/workflows/reusable-ubsan.yml | 74 ++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 .github/workflows/reusable-ubsan.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 54ebc914b46821..c6171571857af6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -527,6 +527,14 @@ jobs: config_hash: ${{ needs.build-context.outputs.config-hash }} free-threading: ${{ matrix.free-threading }} + build-ubsan: + name: Undefined behavior sanitizer + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + uses: ./.github/workflows/reusable-ubsan.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + cross-build-linux: name: Cross build Linux runs-on: ubuntu-latest diff --git a/.github/workflows/reusable-ubsan.yml b/.github/workflows/reusable-ubsan.yml new file mode 100644 index 00000000000000..cf93932f13b827 --- /dev/null +++ b/.github/workflows/reusable-ubsan.yml @@ -0,0 +1,74 @@ +name: Reusable Undefined Behavior Sanitizer + +on: + workflow_call: + inputs: + config_hash: + required: true + type: string + +env: + FORCE_COLOR: 1 + +jobs: + build-ubsan-reusable: + name: 'Undefined behavior sanitizer' + runs-on: ubuntu-24.04 + timeout-minutes: 60 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ inputs.config_hash }} + - name: Install dependencies + run: | + sudo ./.github/workflows/posix-deps-apt.sh + # Install clang-20 + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 20 + - name: UBSAN option setup + run: | + echo "UBSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1" >> "$GITHUB_ENV" + echo "CC=clang" >> "$GITHUB_ENV" + echo "CXX=clang++" >> "$GITHUB_ENV" + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: ${{ github.event_name == 'push' }} + max-size: "200M" + - name: Configure CPython + run: >- + ./configure + --config-cache + --with-undefined-behavior-sanitizer + --with-pydebug + - name: Set up UBSAN log after configuration + run: | + echo "UBSAN_OPTIONS=${UBSAN_OPTIONS}:log_path=${GITHUB_WORKSPACE}/ubsan_log" >> "$GITHUB_ENV" + - name: Build CPython + run: make -j4 + - name: Display build info + run: make pythoninfo + - name: Tests + run: ./python -m test -j4 + - name: Display UBSAN logs + if: always() + run: find "${GITHUB_WORKSPACE}" -name 'ubsan_log.*' | xargs head -n 1000 + - name: Archive UBSAN logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: >- + ubsan-logs + path: ubsan_log.* + if-no-files-found: ignore From 5c25c884b93eb79f640c47d6dba20f11fdf0ade4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 19 Jun 2025 11:40:40 +0200 Subject: [PATCH 614/989] gh-126112: Fix test_os.TimerfdTests: use 10 ms resolution (#135681) Use 10 ms for CLOCK_RES instead of 100 ms to tolerate slow buildbots. --- Lib/test/test_os.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 88b5b0e6e358bb..8f56b4559f46c6 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -13,7 +13,6 @@ import locale import os import pickle -import platform import select import selectors import shutil @@ -4291,13 +4290,8 @@ def test_eventfd_select(self): @unittest.skipIf(sys.platform == "android", "gh-124873: Test is flaky on Android") @support.requires_linux_version(2, 6, 30) class TimerfdTests(unittest.TestCase): - # 1 ms accuracy is reliably achievable on every platform except Android - # emulators, where we allow 10 ms (gh-108277). - if sys.platform == "android" and platform.android_ver().is_emulator: - CLOCK_RES_PLACES = 2 - else: - CLOCK_RES_PLACES = 3 - + # gh-126112: Use 10 ms to tolerate slow buildbots + CLOCK_RES_PLACES = 2 # 10 ms CLOCK_RES = 10 ** -CLOCK_RES_PLACES CLOCK_RES_NS = 10 ** (9 - CLOCK_RES_PLACES) From 9731dd2c8df3509095ea45493bcefabe732eaf60 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 19 Jun 2025 11:10:29 +0100 Subject: [PATCH 615/989] GH-135379: Specialize int operations for compact ints only (GH-135668) --- Include/cpython/longintrepr.h | 6 + Include/internal/pycore_long.h | 7 +- Include/internal/pycore_opcode_metadata.h | 8 +- Include/internal/pycore_optimizer.h | 10 + Include/internal/pycore_uop_ids.h | 280 +++++++++--------- Include/internal/pycore_uop_metadata.h | 16 +- Lib/test/test_capi/test_opt.py | 6 +- ...-06-18-12-19-13.gh-issue-135379.TCvGpj.rst | 3 + Objects/longobject.c | 45 ++- Python/bytecodes.c | 43 ++- Python/executor_cases.c.h | 77 ++--- Python/generated_cases.c.h | 69 ++--- Python/optimizer_analysis.c | 3 + Python/optimizer_bytecodes.c | 36 ++- Python/optimizer_cases.c.h | 41 ++- Python/optimizer_symbols.c | 147 ++++++++- Tools/cases_generator/analyzer.py | 1 + 17 files changed, 515 insertions(+), 283 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-12-19-13.gh-issue-135379.TCvGpj.rst diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h index 4b6f97a5e475d6..9f12298d4fcc37 100644 --- a/Include/cpython/longintrepr.h +++ b/Include/cpython/longintrepr.h @@ -124,6 +124,12 @@ _PyLong_IsCompact(const PyLongObject* op) { return op->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS); } +static inline int +PyLong_CheckCompact(const PyObject *op) +{ + return PyLong_CheckExact(op) && _PyLong_IsCompact((PyLongObject *)op); +} + #define PyUnstable_Long_IsCompact _PyLong_IsCompact static inline Py_ssize_t diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index b8efba74bdc421..852d22bb8c5749 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -112,9 +112,9 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, int64_t); // Export for 'math' shared extension PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, int64_t); -PyAPI_FUNC(PyObject*) _PyCompactLong_Add(PyLongObject *left, PyLongObject *right); -PyAPI_FUNC(PyObject*) _PyCompactLong_Multiply(PyLongObject *left, PyLongObject *right); -PyAPI_FUNC(PyObject*) _PyCompactLong_Subtract(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(_PyStackRef) _PyCompactLong_Add(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(_PyStackRef) _PyCompactLong_Multiply(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(_PyStackRef) _PyCompactLong_Subtract(PyLongObject *left, PyLongObject *right); // Export for 'binascii' shared extension. PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; @@ -213,7 +213,6 @@ _PyLong_BothAreCompact(const PyLongObject* a, const PyLongObject* b) { assert(PyLong_Check(b)); return (a->long_value.lv_tag | b->long_value.lv_tag) < (2 << NON_SIZE_BITS); } - static inline bool _PyLong_IsZero(const PyLongObject *op) { diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 59c85ddeb3d663..dd1bf2d1d2b51a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1072,12 +1072,12 @@ extern const struct opcode_metadata _PyOpcode_opcode_metadata[267]; const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP] = { true, INSTR_FMT_IBC0000, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, + [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, + [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_OP_SUBSCR_DICT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG }, [BINARY_OP_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, @@ -1085,7 +1085,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, + [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_INTERPOLATION] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1129,7 +1129,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [CLEANUP_THROW] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [COMPARE_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [COMPARE_OP_FLOAT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG }, - [COMPARE_OP_INT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [COMPARE_OP_INT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG }, [COMPARE_OP_STR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG }, [CONTAINS_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CONTAINS_OP_DICT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index b27341cb1dbbeb..576c27947824b4 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -179,6 +179,7 @@ typedef enum _JitSymType { JIT_SYM_KNOWN_VALUE_TAG = 7, JIT_SYM_TUPLE_TAG = 8, JIT_SYM_TRUTHINESS_TAG = 9, + JIT_SYM_COMPACT_INT = 10, } JitSymType; typedef struct _jit_opt_known_class { @@ -211,6 +212,10 @@ typedef struct { uint16_t value; } JitOptTruthiness; +typedef struct { + uint8_t tag; +} JitOptCompactInt; + typedef union _jit_opt_symbol { uint8_t tag; JitOptKnownClass cls; @@ -218,6 +223,7 @@ typedef union _jit_opt_symbol { JitOptKnownVersion version; JitOptTuple tuple; JitOptTruthiness truthiness; + JitOptCompactInt compact; } JitOptSymbol; @@ -308,6 +314,7 @@ extern JitOptRef _Py_uop_sym_new_unknown(JitOptContext *ctx); extern JitOptRef _Py_uop_sym_new_not_null(JitOptContext *ctx); extern JitOptRef _Py_uop_sym_new_type( JitOptContext *ctx, PyTypeObject *typ); + extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx); extern bool _Py_uop_sym_has_type(JitOptRef sym); @@ -325,6 +332,9 @@ extern JitOptRef _Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptRef * extern JitOptRef _Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptRef sym, int item); extern int _Py_uop_sym_tuple_length(JitOptRef sym); extern JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef value, bool truthy); +extern bool _Py_uop_sym_is_compact_int(JitOptRef sym); +extern JitOptRef _Py_uop_sym_new_compact_int(JitOptContext *ctx); +extern void _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef sym); extern void _Py_uop_abstractcontext_init(JitOptContext *ctx); extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx); diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index f67626ec5c6ac5..aa11ddb75e19fb 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -146,32 +146,34 @@ extern "C" { #define _GUARD_NOS_LIST 401 #define _GUARD_NOS_NOT_NULL 402 #define _GUARD_NOS_NULL 403 -#define _GUARD_NOS_TUPLE 404 -#define _GUARD_NOS_UNICODE 405 -#define _GUARD_NOT_EXHAUSTED_LIST 406 -#define _GUARD_NOT_EXHAUSTED_RANGE 407 -#define _GUARD_NOT_EXHAUSTED_TUPLE 408 -#define _GUARD_THIRD_NULL 409 -#define _GUARD_TOS_ANY_SET 410 -#define _GUARD_TOS_DICT 411 -#define _GUARD_TOS_FLOAT 412 -#define _GUARD_TOS_INT 413 -#define _GUARD_TOS_LIST 414 -#define _GUARD_TOS_SLICE 415 -#define _GUARD_TOS_TUPLE 416 -#define _GUARD_TOS_UNICODE 417 -#define _GUARD_TYPE_VERSION 418 -#define _GUARD_TYPE_VERSION_AND_LOCK 419 +#define _GUARD_NOS_OVERFLOWED 404 +#define _GUARD_NOS_TUPLE 405 +#define _GUARD_NOS_UNICODE 406 +#define _GUARD_NOT_EXHAUSTED_LIST 407 +#define _GUARD_NOT_EXHAUSTED_RANGE 408 +#define _GUARD_NOT_EXHAUSTED_TUPLE 409 +#define _GUARD_THIRD_NULL 410 +#define _GUARD_TOS_ANY_SET 411 +#define _GUARD_TOS_DICT 412 +#define _GUARD_TOS_FLOAT 413 +#define _GUARD_TOS_INT 414 +#define _GUARD_TOS_LIST 415 +#define _GUARD_TOS_OVERFLOWED 416 +#define _GUARD_TOS_SLICE 417 +#define _GUARD_TOS_TUPLE 418 +#define _GUARD_TOS_UNICODE 419 +#define _GUARD_TYPE_VERSION 420 +#define _GUARD_TYPE_VERSION_AND_LOCK 421 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 420 -#define _INIT_CALL_PY_EXACT_ARGS 421 -#define _INIT_CALL_PY_EXACT_ARGS_0 422 -#define _INIT_CALL_PY_EXACT_ARGS_1 423 -#define _INIT_CALL_PY_EXACT_ARGS_2 424 -#define _INIT_CALL_PY_EXACT_ARGS_3 425 -#define _INIT_CALL_PY_EXACT_ARGS_4 426 -#define _INSERT_NULL 427 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 422 +#define _INIT_CALL_PY_EXACT_ARGS 423 +#define _INIT_CALL_PY_EXACT_ARGS_0 424 +#define _INIT_CALL_PY_EXACT_ARGS_1 425 +#define _INIT_CALL_PY_EXACT_ARGS_2 426 +#define _INIT_CALL_PY_EXACT_ARGS_3 427 +#define _INIT_CALL_PY_EXACT_ARGS_4 428 +#define _INSERT_NULL 429 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -181,173 +183,173 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 428 +#define _IS_NONE 430 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 429 -#define _ITER_CHECK_RANGE 430 -#define _ITER_CHECK_TUPLE 431 -#define _ITER_JUMP_LIST 432 -#define _ITER_JUMP_RANGE 433 -#define _ITER_JUMP_TUPLE 434 -#define _ITER_NEXT_LIST 435 -#define _ITER_NEXT_LIST_TIER_TWO 436 -#define _ITER_NEXT_RANGE 437 -#define _ITER_NEXT_TUPLE 438 -#define _JUMP_TO_TOP 439 +#define _ITER_CHECK_LIST 431 +#define _ITER_CHECK_RANGE 432 +#define _ITER_CHECK_TUPLE 433 +#define _ITER_JUMP_LIST 434 +#define _ITER_JUMP_RANGE 435 +#define _ITER_JUMP_TUPLE 436 +#define _ITER_NEXT_LIST 437 +#define _ITER_NEXT_LIST_TIER_TWO 438 +#define _ITER_NEXT_RANGE 439 +#define _ITER_NEXT_TUPLE 440 +#define _JUMP_TO_TOP 441 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 440 -#define _LOAD_ATTR_CLASS 441 +#define _LOAD_ATTR 442 +#define _LOAD_ATTR_CLASS 443 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 442 -#define _LOAD_ATTR_METHOD_LAZY_DICT 443 -#define _LOAD_ATTR_METHOD_NO_DICT 444 -#define _LOAD_ATTR_METHOD_WITH_VALUES 445 -#define _LOAD_ATTR_MODULE 446 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 447 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 448 -#define _LOAD_ATTR_PROPERTY_FRAME 449 -#define _LOAD_ATTR_SLOT 450 -#define _LOAD_ATTR_WITH_HINT 451 +#define _LOAD_ATTR_INSTANCE_VALUE 444 +#define _LOAD_ATTR_METHOD_LAZY_DICT 445 +#define _LOAD_ATTR_METHOD_NO_DICT 446 +#define _LOAD_ATTR_METHOD_WITH_VALUES 447 +#define _LOAD_ATTR_MODULE 448 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 449 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 450 +#define _LOAD_ATTR_PROPERTY_FRAME 451 +#define _LOAD_ATTR_SLOT 452 +#define _LOAD_ATTR_WITH_HINT 453 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 452 +#define _LOAD_BYTECODE 454 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 453 -#define _LOAD_CONST_INLINE_BORROW 454 -#define _LOAD_CONST_UNDER_INLINE 455 -#define _LOAD_CONST_UNDER_INLINE_BORROW 456 +#define _LOAD_CONST_INLINE 455 +#define _LOAD_CONST_INLINE_BORROW 456 +#define _LOAD_CONST_UNDER_INLINE 457 +#define _LOAD_CONST_UNDER_INLINE_BORROW 458 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 457 -#define _LOAD_FAST_0 458 -#define _LOAD_FAST_1 459 -#define _LOAD_FAST_2 460 -#define _LOAD_FAST_3 461 -#define _LOAD_FAST_4 462 -#define _LOAD_FAST_5 463 -#define _LOAD_FAST_6 464 -#define _LOAD_FAST_7 465 +#define _LOAD_FAST 459 +#define _LOAD_FAST_0 460 +#define _LOAD_FAST_1 461 +#define _LOAD_FAST_2 462 +#define _LOAD_FAST_3 463 +#define _LOAD_FAST_4 464 +#define _LOAD_FAST_5 465 +#define _LOAD_FAST_6 466 +#define _LOAD_FAST_7 467 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 466 -#define _LOAD_FAST_BORROW_0 467 -#define _LOAD_FAST_BORROW_1 468 -#define _LOAD_FAST_BORROW_2 469 -#define _LOAD_FAST_BORROW_3 470 -#define _LOAD_FAST_BORROW_4 471 -#define _LOAD_FAST_BORROW_5 472 -#define _LOAD_FAST_BORROW_6 473 -#define _LOAD_FAST_BORROW_7 474 +#define _LOAD_FAST_BORROW 468 +#define _LOAD_FAST_BORROW_0 469 +#define _LOAD_FAST_BORROW_1 470 +#define _LOAD_FAST_BORROW_2 471 +#define _LOAD_FAST_BORROW_3 472 +#define _LOAD_FAST_BORROW_4 473 +#define _LOAD_FAST_BORROW_5 474 +#define _LOAD_FAST_BORROW_6 475 +#define _LOAD_FAST_BORROW_7 476 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 475 -#define _LOAD_GLOBAL_BUILTINS 476 -#define _LOAD_GLOBAL_MODULE 477 +#define _LOAD_GLOBAL 477 +#define _LOAD_GLOBAL_BUILTINS 478 +#define _LOAD_GLOBAL_MODULE 479 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 478 -#define _LOAD_SMALL_INT_0 479 -#define _LOAD_SMALL_INT_1 480 -#define _LOAD_SMALL_INT_2 481 -#define _LOAD_SMALL_INT_3 482 -#define _LOAD_SPECIAL 483 +#define _LOAD_SMALL_INT 480 +#define _LOAD_SMALL_INT_0 481 +#define _LOAD_SMALL_INT_1 482 +#define _LOAD_SMALL_INT_2 483 +#define _LOAD_SMALL_INT_3 484 +#define _LOAD_SPECIAL 485 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 484 +#define _MAKE_CALLARGS_A_TUPLE 486 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 485 +#define _MAKE_WARM 487 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 486 -#define _MAYBE_EXPAND_METHOD_KW 487 -#define _MONITOR_CALL 488 -#define _MONITOR_CALL_KW 489 -#define _MONITOR_JUMP_BACKWARD 490 -#define _MONITOR_RESUME 491 +#define _MAYBE_EXPAND_METHOD 488 +#define _MAYBE_EXPAND_METHOD_KW 489 +#define _MONITOR_CALL 490 +#define _MONITOR_CALL_KW 491 +#define _MONITOR_JUMP_BACKWARD 492 +#define _MONITOR_RESUME 493 #define _NOP NOP -#define _POP_CALL 492 -#define _POP_CALL_LOAD_CONST_INLINE_BORROW 493 -#define _POP_CALL_ONE 494 -#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 495 -#define _POP_CALL_TWO 496 -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 497 +#define _POP_CALL 494 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 495 +#define _POP_CALL_ONE 496 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 497 +#define _POP_CALL_TWO 498 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 499 #define _POP_EXCEPT POP_EXCEPT #define _POP_ITER POP_ITER -#define _POP_JUMP_IF_FALSE 498 -#define _POP_JUMP_IF_TRUE 499 +#define _POP_JUMP_IF_FALSE 500 +#define _POP_JUMP_IF_TRUE 501 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 500 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 501 -#define _POP_TWO 502 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 503 +#define _POP_TOP_LOAD_CONST_INLINE 502 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 503 +#define _POP_TWO 504 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 505 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 504 +#define _PUSH_FRAME 506 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 505 -#define _PY_FRAME_GENERAL 506 -#define _PY_FRAME_KW 507 -#define _QUICKEN_RESUME 508 -#define _REPLACE_WITH_TRUE 509 +#define _PUSH_NULL_CONDITIONAL 507 +#define _PY_FRAME_GENERAL 508 +#define _PY_FRAME_KW 509 +#define _QUICKEN_RESUME 510 +#define _REPLACE_WITH_TRUE 511 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 510 -#define _SEND 511 -#define _SEND_GEN_FRAME 512 +#define _SAVE_RETURN_OFFSET 512 +#define _SEND 513 +#define _SEND_GEN_FRAME 514 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 513 -#define _STORE_ATTR 514 -#define _STORE_ATTR_INSTANCE_VALUE 515 -#define _STORE_ATTR_SLOT 516 -#define _STORE_ATTR_WITH_HINT 517 +#define _START_EXECUTOR 515 +#define _STORE_ATTR 516 +#define _STORE_ATTR_INSTANCE_VALUE 517 +#define _STORE_ATTR_SLOT 518 +#define _STORE_ATTR_WITH_HINT 519 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 518 -#define _STORE_FAST_0 519 -#define _STORE_FAST_1 520 -#define _STORE_FAST_2 521 -#define _STORE_FAST_3 522 -#define _STORE_FAST_4 523 -#define _STORE_FAST_5 524 -#define _STORE_FAST_6 525 -#define _STORE_FAST_7 526 +#define _STORE_FAST 520 +#define _STORE_FAST_0 521 +#define _STORE_FAST_1 522 +#define _STORE_FAST_2 523 +#define _STORE_FAST_3 524 +#define _STORE_FAST_4 525 +#define _STORE_FAST_5 526 +#define _STORE_FAST_6 527 +#define _STORE_FAST_7 528 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 527 -#define _STORE_SUBSCR 528 -#define _STORE_SUBSCR_DICT 529 -#define _STORE_SUBSCR_LIST_INT 530 -#define _SWAP 531 -#define _SWAP_2 532 -#define _SWAP_3 533 -#define _TIER2_RESUME_CHECK 534 -#define _TO_BOOL 535 +#define _STORE_SLICE 529 +#define _STORE_SUBSCR 530 +#define _STORE_SUBSCR_DICT 531 +#define _STORE_SUBSCR_LIST_INT 532 +#define _SWAP 533 +#define _SWAP_2 534 +#define _SWAP_3 535 +#define _TIER2_RESUME_CHECK 536 +#define _TO_BOOL 537 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 536 +#define _TO_BOOL_LIST 538 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 537 +#define _TO_BOOL_STR 539 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 538 -#define _UNPACK_SEQUENCE_LIST 539 -#define _UNPACK_SEQUENCE_TUPLE 540 -#define _UNPACK_SEQUENCE_TWO_TUPLE 541 +#define _UNPACK_SEQUENCE 540 +#define _UNPACK_SEQUENCE_LIST 541 +#define _UNPACK_SEQUENCE_TUPLE 542 +#define _UNPACK_SEQUENCE_TWO_TUPLE 543 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 541 +#define MAX_UOP_ID 543 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 40914c578c9038..11345a00785817 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -86,9 +86,11 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_NOS_INT] = HAS_EXIT_FLAG, [_GUARD_TOS_INT] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_INT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG, - [_BINARY_OP_ADD_INT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG, - [_BINARY_OP_SUBTRACT_INT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_GUARD_NOS_OVERFLOWED] = HAS_EXIT_FLAG, + [_GUARD_TOS_OVERFLOWED] = HAS_EXIT_FLAG, + [_BINARY_OP_MULTIPLY_INT] = HAS_EXIT_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_INT] = HAS_EXIT_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_INT] = HAS_EXIT_FLAG | HAS_PURE_FLAG, [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG, [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG, [_BINARY_OP_MULTIPLY_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, @@ -188,7 +190,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_ATTR_SLOT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_COMPARE_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_COMPARE_OP_FLOAT] = HAS_ARG_FLAG, - [_COMPARE_OP_INT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_COMPARE_OP_INT] = HAS_ARG_FLAG, [_COMPARE_OP_STR] = HAS_ARG_FLAG, [_IS_OP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_CONTAINS_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -475,6 +477,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_NOS_LIST] = "_GUARD_NOS_LIST", [_GUARD_NOS_NOT_NULL] = "_GUARD_NOS_NOT_NULL", [_GUARD_NOS_NULL] = "_GUARD_NOS_NULL", + [_GUARD_NOS_OVERFLOWED] = "_GUARD_NOS_OVERFLOWED", [_GUARD_NOS_TUPLE] = "_GUARD_NOS_TUPLE", [_GUARD_NOS_UNICODE] = "_GUARD_NOS_UNICODE", [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", @@ -486,6 +489,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_TOS_FLOAT] = "_GUARD_TOS_FLOAT", [_GUARD_TOS_INT] = "_GUARD_TOS_INT", [_GUARD_TOS_LIST] = "_GUARD_TOS_LIST", + [_GUARD_TOS_OVERFLOWED] = "_GUARD_TOS_OVERFLOWED", [_GUARD_TOS_SLICE] = "_GUARD_TOS_SLICE", [_GUARD_TOS_TUPLE] = "_GUARD_TOS_TUPLE", [_GUARD_TOS_UNICODE] = "_GUARD_TOS_UNICODE", @@ -789,6 +793,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _GUARD_TOS_INT: return 0; + case _GUARD_NOS_OVERFLOWED: + return 0; + case _GUARD_TOS_OVERFLOWED: + return 0; case _BINARY_OP_MULTIPLY_INT: return 2; case _BINARY_OP_ADD_INT: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 41833836e720ba..0fea0649d75709 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -407,12 +407,12 @@ def testfunc(n, m): x = 0 for i in range(m): for j in MyIter(n): - x += 1000*i + j + x += j return x - x = testfunc(TIER2_THRESHOLD, TIER2_THRESHOLD) + x = testfunc(TIER2_THRESHOLD, 2) - self.assertEqual(x, sum(range(TIER2_THRESHOLD)) * TIER2_THRESHOLD * 1001) + self.assertEqual(x, sum(range(TIER2_THRESHOLD)) * 2) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-12-19-13.gh-issue-135379.TCvGpj.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-12-19-13.gh-issue-135379.TCvGpj.rst new file mode 100644 index 00000000000000..089d00c77da25d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-12-19-13.gh-issue-135379.TCvGpj.rst @@ -0,0 +1,3 @@ +Changes specialization of ``BINARY_OP`` for ints to only specialize for +"compact" ints. This streamlines the fast path at the cost of fewer +specializations when very large integers are used. diff --git a/Objects/longobject.c b/Objects/longobject.c index dfa02851cd8877..59b10355ad9df8 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -10,6 +10,7 @@ #include "pycore_long.h" // _Py_SmallInts #include "pycore_object.h" // _PyObject_Init() #include "pycore_runtime.h" // _PY_NSMALLPOSINTS +#include "pycore_stackref.h" #include "pycore_structseq.h" // _PyStructSequence_FiniBuiltin() #include "pycore_unicodeobject.h" // _PyUnicode_Equal() @@ -316,6 +317,33 @@ _PyLong_FromSTwoDigits(stwodigits x) return (PyLongObject*)_PyLong_FromLarge(x); } +/* Create a new medium int object from a medium int. + * Do not raise. Return NULL if not medium or can't allocate. */ +static inline _PyStackRef +medium_from_stwodigits(stwodigits x) +{ + if (IS_SMALL_INT(x)) { + return PyStackRef_FromPyObjectBorrow(get_small_int((sdigit)x)); + } + assert(x != 0); + if(!is_medium_int(x)) { + return PyStackRef_NULL; + } + PyLongObject *v = (PyLongObject *)_Py_FREELIST_POP(PyLongObject, ints); + if (v == NULL) { + v = PyObject_Malloc(sizeof(PyLongObject)); + if (v == NULL) { + return PyStackRef_NULL; + } + _PyObject_Init((PyObject*)v, &PyLong_Type); + } + digit abs_x = x < 0 ? -x : x; + _PyLong_SetSignAndDigitCount(v, x<0?-1:1, 1); + v->long_value.ob_digit[0] = abs_x; + return PyStackRef_FromPyObjectStealMortal((PyObject *)v); +} + + /* If a freshly-allocated int is already shared, it must be a small integer, so negating it must go to PyLong_FromLong */ Py_LOCAL_INLINE(void) @@ -3771,12 +3799,12 @@ long_add(PyLongObject *a, PyLongObject *b) return z; } -PyObject * +_PyStackRef _PyCompactLong_Add(PyLongObject *a, PyLongObject *b) { assert(_PyLong_BothAreCompact(a, b)); - stwodigits z = medium_value(a) + medium_value(b); - return (PyObject *)_PyLong_FromSTwoDigits(z); + stwodigits v = medium_value(a) + medium_value(b); + return medium_from_stwodigits(v); } static PyObject * @@ -3816,11 +3844,12 @@ long_sub(PyLongObject *a, PyLongObject *b) return z; } -PyObject * +_PyStackRef _PyCompactLong_Subtract(PyLongObject *a, PyLongObject *b) { assert(_PyLong_BothAreCompact(a, b)); - return (PyObject *)_PyLong_FromSTwoDigits(medium_value(a) - medium_value(b)); + stwodigits v = medium_value(a) - medium_value(b); + return medium_from_stwodigits(v); } static PyObject * @@ -4264,12 +4293,14 @@ long_mul(PyLongObject *a, PyLongObject *b) return z; } -PyObject * +/* This function returns NULL if the result is not compact, + * or if it fails to allocate, but never raises */ +_PyStackRef _PyCompactLong_Multiply(PyLongObject *a, PyLongObject *b) { assert(_PyLong_BothAreCompact(a, b)); stwodigits v = medium_value(a) * medium_value(b); - return (PyObject *)_PyLong_FromSTwoDigits(v); + return medium_from_stwodigits(v); } static PyObject * diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2dd1d27747a693..c83bc15c751510 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -569,12 +569,24 @@ dummy_func( op(_GUARD_NOS_INT, (left, unused -- left, unused)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - EXIT_IF(!PyLong_CheckExact(left_o)); + EXIT_IF(!PyLong_CheckCompact(left_o)); } op(_GUARD_TOS_INT, (value -- value)) { PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - EXIT_IF(!PyLong_CheckExact(value_o)); + EXIT_IF(!PyLong_CheckCompact(value_o)); + } + + op(_GUARD_NOS_OVERFLOWED, (left, unused -- left, unused)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + assert(Py_TYPE(left_o) == &PyLong_Type); + EXIT_IF(!_PyLong_IsCompact((PyLongObject *)left_o)); + } + + op(_GUARD_TOS_OVERFLOWED, (value -- value)) { + PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); + assert(Py_TYPE(value_o) == &PyLong_Type); + EXIT_IF(!_PyLong_IsCompact((PyLongObject *)value_o)); } pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { @@ -582,15 +594,14 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + EXIT_IF(PyStackRef_IsNull(res)); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); - ERROR_IF(res_o == NULL); - res = PyStackRef_FromPyObjectSteal(res_o); } pure op(_BINARY_OP_ADD_INT, (left, right -- res)) { @@ -598,15 +609,14 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + EXIT_IF(PyStackRef_IsNull(res)); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); - ERROR_IF(res_o == NULL); - res = PyStackRef_FromPyObjectSteal(res_o); } pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { @@ -614,21 +624,22 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + EXIT_IF(PyStackRef_IsNull(res)); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); - ERROR_IF(res_o == NULL); - res = PyStackRef_FromPyObjectSteal(res_o); } macro(BINARY_OP_MULTIPLY_INT) = _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_MULTIPLY_INT; + macro(BINARY_OP_ADD_INT) = _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_ADD_INT; + macro(BINARY_OP_SUBTRACT_INT) = _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_SUBTRACT_INT; @@ -2737,8 +2748,8 @@ dummy_func( PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)left_o)); - DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)right_o)); + assert(_PyLong_IsCompact((PyLongObject *)left_o)); + assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && _PyLong_DigitCount((PyLongObject *)right_o) <= 1); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3b58e7fab5553d..b6338dd483b4a0 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -852,7 +852,7 @@ _PyStackRef left; left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!PyLong_CheckCompact(left_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -863,7 +863,31 @@ _PyStackRef value; value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!PyLong_CheckCompact(value_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _GUARD_NOS_OVERFLOWED: { + _PyStackRef left; + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + assert(Py_TYPE(left_o) == &PyLong_Type); + if (!_PyLong_IsCompact((PyLongObject *)left_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _GUARD_TOS_OVERFLOWED: { + _PyStackRef value; + value = stack_pointer[-1]; + PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); + assert(Py_TYPE(value_o) == &PyLong_Type); + if (!_PyLong_IsCompact((PyLongObject *)value_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -880,20 +904,15 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - JUMP_TO_ERROR(); - } - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -910,20 +929,15 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - JUMP_TO_ERROR(); - } - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -940,20 +954,15 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - JUMP_TO_ERROR(); - } - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -3807,14 +3816,8 @@ left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - if (!_PyLong_IsCompact((PyLongObject *)left_o)) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - if (!_PyLong_IsCompact((PyLongObject *)right_o)) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } + assert(_PyLong_IsCompact((PyLongObject *)left_o)); + assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && _PyLong_DigitCount((PyLongObject *)right_o) <= 1); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 4fc1d5266d0a87..82142aee9a3aea 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -158,7 +158,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!PyLong_CheckCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -168,7 +168,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!PyLong_CheckCompact(left_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -182,19 +182,16 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); } - STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - JUMP_TO_LABEL(pop_2_error); - } - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; stack_pointer += -1; @@ -486,7 +483,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!PyLong_CheckCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -496,7 +493,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!PyLong_CheckCompact(left_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -510,19 +507,16 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); } - STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - JUMP_TO_LABEL(pop_2_error); - } - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; stack_pointer += -1; @@ -700,7 +694,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!PyLong_CheckCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -862,7 +856,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!PyLong_CheckCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -940,7 +934,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!PyLong_CheckCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1070,7 +1064,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!PyLong_CheckCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1080,7 +1074,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!PyLong_CheckCompact(left_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1094,19 +1088,16 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) { + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); } - STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - JUMP_TO_LABEL(pop_2_error); - } - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; stack_pointer += -1; @@ -4902,7 +4893,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!PyLong_CheckCompact(value_o)) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -4912,7 +4903,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!PyLong_CheckCompact(left_o)) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -4924,16 +4915,8 @@ right = value; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - if (!_PyLong_IsCompact((PyLongObject *)left_o)) { - UPDATE_MISS_STATS(COMPARE_OP); - assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); - JUMP_TO_PREDICTED(COMPARE_OP); - } - if (!_PyLong_IsCompact((PyLongObject *)right_o)) { - UPDATE_MISS_STATS(COMPARE_OP); - assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); - JUMP_TO_PREDICTED(COMPARE_OP); - } + assert(_PyLong_IsCompact((PyLongObject *)left_o)); + assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && _PyLong_DigitCount((PyLongObject *)right_o) <= 1); @@ -11490,7 +11473,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!PyLong_CheckCompact(value_o)) { UPDATE_MISS_STATS(STORE_SUBSCR); assert(_PyOpcode_Deopt[opcode] == (STORE_SUBSCR)); JUMP_TO_PREDICTED(STORE_SUBSCR); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index de337f637163ba..03ad8e1db24635 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -333,6 +333,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) #define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) +#define sym_set_compact_int(SYM) _Py_uop_sym_set_compact_int(ctx, SYM) #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness #define frame_new _Py_uop_frame_new @@ -341,6 +342,8 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length #define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_compact_int _Py_uop_sym_is_compact_int +#define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness static int diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 07b05fc3323ece..f01c3787e5b4c7 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -27,6 +27,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) #define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) +#define sym_set_compact_int(SYM) _Py_uop_sym_set_compact_int(ctx, SYM) #define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new #define frame_pop _Py_uop_frame_pop @@ -34,6 +35,8 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length #define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_new_compact_int _Py_uop_sym_new_compact_int +#define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness extern int @@ -105,17 +108,27 @@ dummy_func(void) { } op(_GUARD_TOS_INT, (value -- value)) { - if (sym_matches_type(value, &PyLong_Type)) { + if (sym_is_compact_int(value)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(value, &PyLong_Type); + else { + if (sym_get_type(value) == &PyLong_Type) { + REPLACE_OP(this_instr, _GUARD_TOS_OVERFLOWED, 0, 0); + } + sym_set_compact_int(value); + } } op(_GUARD_NOS_INT, (left, unused -- left, unused)) { - if (sym_matches_type(left, &PyLong_Type)) { + if (sym_is_compact_int(left)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(left, &PyLong_Type); + else { + if (sym_get_type(left) == &PyLong_Type) { + REPLACE_OP(this_instr, _GUARD_NOS_OVERFLOWED, 0, 0); + } + sym_set_compact_int(left); + } } op(_CHECK_ATTR_CLASS, (type_version/2, owner -- owner)) { @@ -222,15 +235,15 @@ dummy_func(void) { } op(_BINARY_OP_ADD_INT, (left, right -- res)) { - res = sym_new_type(ctx, &PyLong_Type); + res = sym_new_compact_int(ctx); } op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { - res = sym_new_type(ctx, &PyLong_Type); + res = sym_new_compact_int(ctx); } op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { - res = sym_new_type(ctx, &PyLong_Type); + res = sym_new_compact_int(ctx); } op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { @@ -434,6 +447,15 @@ dummy_func(void) { res = sym_new_truthiness(ctx, value, false); } + op(_UNARY_NEGATIVE, (value -- res)) { + if (sym_is_compact_int(value)) { + res = sym_new_compact_int(ctx); + } + else { + res = sym_new_not_null(ctx); + } + } + op(_UNARY_INVERT, (value -- res)) { if (sym_matches_type(value, &PyLong_Type)) { res = sym_new_type(ctx, &PyLong_Type); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 354331ef618f67..6bc506c6298f56 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -142,8 +142,15 @@ } case _UNARY_NEGATIVE: { + JitOptRef value; JitOptRef res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + if (sym_is_compact_int(value)) { + res = sym_new_compact_int(ctx); + } + else { + res = sym_new_not_null(ctx); + } stack_pointer[-1] = res; break; } @@ -301,26 +308,44 @@ case _GUARD_NOS_INT: { JitOptRef left; left = stack_pointer[-2]; - if (sym_matches_type(left, &PyLong_Type)) { + if (sym_is_compact_int(left)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(left, &PyLong_Type); + else { + if (sym_get_type(left) == &PyLong_Type) { + REPLACE_OP(this_instr, _GUARD_NOS_OVERFLOWED, 0, 0); + } + sym_set_compact_int(left); + } break; } case _GUARD_TOS_INT: { JitOptRef value; value = stack_pointer[-1]; - if (sym_matches_type(value, &PyLong_Type)) { + if (sym_is_compact_int(value)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(value, &PyLong_Type); + else { + if (sym_get_type(value) == &PyLong_Type) { + REPLACE_OP(this_instr, _GUARD_TOS_OVERFLOWED, 0, 0); + } + sym_set_compact_int(value); + } + break; + } + + case _GUARD_NOS_OVERFLOWED: { + break; + } + + case _GUARD_TOS_OVERFLOWED: { break; } case _BINARY_OP_MULTIPLY_INT: { JitOptRef res; - res = sym_new_type(ctx, &PyLong_Type); + res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -329,7 +354,7 @@ case _BINARY_OP_ADD_INT: { JitOptRef res; - res = sym_new_type(ctx, &PyLong_Type); + res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -338,7 +363,7 @@ case _BINARY_OP_SUBTRACT_INT: { JitOptRef res; - res = sym_new_type(ctx, &PyLong_Type); + res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 838f25ad288ab0..8ae670d10280ec 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -30,17 +30,19 @@ we often skip in-between states for convenience: | | NULL | | | <- Anything below this level is an object. -| NON_NULL -| | | <- Anything below this level has a known type version. -| TYPE_VERSION | -| | | <- Anything below this level has a known type. -| KNOWN_CLASS | -| | | | <- Anything below this level has a known truthiness. -| | | TRUTHINESS -| | | | -| TUPLE | | -| | | | <- Anything below this level is a known constant. -| KNOWN_VALUE +| NON_NULL-+ +| | | <- Anything below this level has a known type version. +| TYPE_VERSION | +| | | <- Anything below this level has a known type. +| KNOWN_CLASS | +| | | | | | +| | | INT* | | +| | | | | | <- Anything below this level has a known truthiness. +| | | | | TRUTHINESS +| | | | | | +| TUPLE | | | | +| | | | | | <- Anything below this level is a known constant. +| KNOWN_VALUE--+ | | <- Anything below this level is unreachable. BOTTOM @@ -52,6 +54,8 @@ result of a truth test, which would allow us to narrow the symbol to KNOWN_VALUE the same symbol, that would be a contradiction, and the symbol would be set to BOTTOM (indicating that the code is unreachable). +INT* is a limited range int, currently a "compact" int. + */ #ifdef Py_DEBUG @@ -229,6 +233,11 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) sym_set_bottom(ctx, sym); } return; + case JIT_SYM_COMPACT_INT: + if (typ != &PyLong_Type) { + sym_set_bottom(ctx, sym); + } + return; } } @@ -286,6 +295,12 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef ref, unsigned int ver return false; } return true; + case JIT_SYM_COMPACT_INT: + if (version != PyLong_Type.tp_version_tag) { + sym_set_bottom(ctx, sym); + return false; + } + return true; } Py_UNREACHABLE(); } @@ -370,6 +385,14 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef ref, PyObject *const_val) // TODO: More types (GH-130415)! make_const(sym, const_val); return; + case JIT_SYM_COMPACT_INT: + if (PyLong_CheckCompact(const_val)) { + make_const(sym, const_val); + } + else { + sym_set_bottom(ctx, sym); + } + return; } } @@ -477,6 +500,9 @@ _Py_uop_sym_get_type(JitOptRef ref) return &PyTuple_Type; case JIT_SYM_TRUTHINESS_TAG: return &PyBool_Type; + case JIT_SYM_COMPACT_INT: + return &PyLong_Type; + } Py_UNREACHABLE(); } @@ -502,6 +528,8 @@ _Py_uop_sym_get_type_version(JitOptRef ref) return PyTuple_Type.tp_version_tag; case JIT_SYM_TRUTHINESS_TAG: return PyBool_Type.tp_version_tag; + case JIT_SYM_COMPACT_INT: + return PyLong_Type.tp_version_tag; } Py_UNREACHABLE(); } @@ -535,6 +563,7 @@ _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptRef ref) case JIT_SYM_BOTTOM_TAG: case JIT_SYM_NON_NULL_TAG: case JIT_SYM_UNKNOWN_TAG: + case JIT_SYM_COMPACT_INT: return -1; case JIT_SYM_KNOWN_CLASS_TAG: /* TODO : @@ -645,6 +674,16 @@ _Py_uop_symbol_is_immortal(JitOptSymbol *sym) return false; } +bool +_Py_uop_sym_is_compact_int(JitOptRef ref) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { + return (bool)PyLong_CheckCompact(sym->value.value); + } + return sym->tag == JIT_SYM_COMPACT_INT; +} + bool _Py_uop_sym_is_immortal(JitOptRef ref) { @@ -652,6 +691,50 @@ _Py_uop_sym_is_immortal(JitOptRef ref) return _Py_uop_symbol_is_immortal(sym); } +void +_Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef ref) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + JitSymType tag = sym->tag; + switch(tag) { + case JIT_SYM_NULL_TAG: + sym_set_bottom(ctx, sym); + return; + case JIT_SYM_KNOWN_CLASS_TAG: + if (sym->cls.type == &PyLong_Type) { + sym->tag = JIT_SYM_COMPACT_INT; + } else { + sym_set_bottom(ctx, sym); + } + return; + case JIT_SYM_TYPE_VERSION_TAG: + if (sym->version.version == PyLong_Type.tp_version_tag) { + sym->tag = JIT_SYM_COMPACT_INT; + } + else { + sym_set_bottom(ctx, sym); + } + return; + case JIT_SYM_KNOWN_VALUE_TAG: + if (!PyLong_CheckCompact(sym->value.value)) { + Py_CLEAR(sym->value.value); + sym_set_bottom(ctx, sym); + } + return; + case JIT_SYM_TUPLE_TAG: + case JIT_SYM_TRUTHINESS_TAG: + sym_set_bottom(ctx, sym); + return; + case JIT_SYM_BOTTOM_TAG: + case JIT_SYM_COMPACT_INT: + return; + case JIT_SYM_NON_NULL_TAG: + case JIT_SYM_UNKNOWN_TAG: + sym->tag = JIT_SYM_COMPACT_INT; + return; + } +} + JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef ref, bool truthy) { @@ -677,6 +760,17 @@ _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef ref, bool truthy) return PyJitRef_Wrap(res); } +JitOptRef +_Py_uop_sym_new_compact_int(JitOptContext *ctx) +{ + JitOptSymbol *sym = sym_new(ctx); + if (sym == NULL) { + return out_of_space_ref(ctx); + } + sym->tag = JIT_SYM_COMPACT_INT; + return PyJitRef_Wrap(sym); +} + // 0 on success, -1 on error. _Py_UOpsAbstractFrame * _Py_uop_frame_new( @@ -796,6 +890,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) _Py_uop_abstractcontext_init(ctx); PyObject *val_42 = NULL; PyObject *val_43 = NULL; + PyObject *val_big = NULL; PyObject *tuple = NULL; // Use a single 'sym' variable so copy-pasting tests is easier. @@ -926,9 +1021,38 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == Py_False, "truthiness is not False"); TEST_PREDICATE(_Py_uop_sym_is_const(ctx, value) == true, "value is not constant"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, value) == Py_True, "value is not True"); + + + val_big = PyNumber_Lshift(_PyLong_GetOne(), PyLong_FromLong(66)); + if (val_big == NULL) { + goto fail; + } + + JitOptRef ref_42 = _Py_uop_sym_new_const(ctx, val_42); + JitOptRef ref_big = _Py_uop_sym_new_const(ctx, val_big); + JitOptRef ref_int = _Py_uop_sym_new_compact_int(ctx); + TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_42), "42 is not a compact int"); + TEST_PREDICATE(!_Py_uop_sym_is_compact_int(ref_big), "(1 << 66) is a compact int"); + TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "compact int is not a compact int"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "compact int is not an int"); + + _Py_uop_sym_set_type(ctx, ref_int, &PyLong_Type); // Should have no effect + TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "compact int is not a compact int after cast"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "compact int is not an int after cast"); + + _Py_uop_sym_set_type(ctx, ref_int, &PyFloat_Type); // Should make it bottom + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref_int), "compact int cast to float isn't bottom"); + + ref_int = _Py_uop_sym_new_compact_int(ctx); + _Py_uop_sym_set_const(ctx, ref_int, val_43); + TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "43 is not a compact int"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "43 is not an int"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref_int) == val_43, "43 isn't 43"); + _Py_uop_abstractcontext_fini(ctx); Py_DECREF(val_42); Py_DECREF(val_43); + Py_DECREF(val_big); Py_DECREF(tuple); Py_RETURN_NONE; @@ -936,6 +1060,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) _Py_uop_abstractcontext_fini(ctx); Py_XDECREF(val_42); Py_XDECREF(val_43); + Py_XDECREF(val_big); Py_DECREF(tuple); return NULL; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 2723e7ed7242ea..8020c9c5c8a887 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -687,6 +687,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsValid", "PyStackRef_Wrap", "PyStackRef_Unwrap", + "PyLong_CheckCompact", ) From ecd83e02b128bf0879d9bb1d3940e40bcb14bdc6 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 19 Jun 2025 12:46:33 +0100 Subject: [PATCH 616/989] gh-133439: Fix the error message in the sqlite3 CLI (GH-133807) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Serhiy Storchaka --- Lib/sqlite3/__main__.py | 6 +++--- Lib/test/test_sqlite3/test_cli.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py index 9e74b49ee828bc..9443afbfcc273f 100644 --- a/Lib/sqlite3/__main__.py +++ b/Lib/sqlite3/__main__.py @@ -63,7 +63,7 @@ def runsource(self, source, filename="", symbol="single"): if source[0] == ".": match source[1:].strip(): case "version": - print(f"{sqlite3.sqlite_version}") + print(sqlite3.sqlite_version) case "help": print("Enter SQL code and press enter.") case "quit": @@ -72,8 +72,8 @@ def runsource(self, source, filename="", symbol="single"): pass case _ as unknown: t = theme.traceback - self.write(f'{t.type}Error{t.reset}:{t.message} unknown' - f'command or invalid arguments: "{unknown}".\n{t.reset}') + self.write(f'{t.type}Error{t.reset}: {t.message}unknown ' + f'command: "{unknown}"{t.reset}\n') else: if not sqlite3.complete_statement(source): return True diff --git a/Lib/test/test_sqlite3/test_cli.py b/Lib/test/test_sqlite3/test_cli.py index d993e28c4bb3a6..720fa3c4c1ea8b 100644 --- a/Lib/test/test_sqlite3/test_cli.py +++ b/Lib/test/test_sqlite3/test_cli.py @@ -138,7 +138,7 @@ def test_interact_dot_commands_unknown(self): self.assertEndsWith(out, self.PS1) self.assertEqual(out.count(self.PS1), 2) self.assertEqual(out.count(self.PS2), 0) - self.assertIn("Error", err) + self.assertIn('Error: unknown command: "', err) # test "unknown_command" is pointed out in the error message self.assertIn("unknown_command", err) From 13efe3f5993cb957fc0473a273dd7b02193e9e25 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 19 Jun 2025 12:47:29 +0100 Subject: [PATCH 617/989] gh-133934: Mention special commands in sqlite3 `.help` message (GH-135224) --- Lib/sqlite3/__main__.py | 6 +++++- .../Library/2025-06-06-17-34-18.gh-issue-133934.yT1r68.rst | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-06-17-34-18.gh-issue-133934.yT1r68.rst diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py index 9443afbfcc273f..35344ecceff526 100644 --- a/Lib/sqlite3/__main__.py +++ b/Lib/sqlite3/__main__.py @@ -65,7 +65,11 @@ def runsource(self, source, filename="", symbol="single"): case "version": print(sqlite3.sqlite_version) case "help": - print("Enter SQL code and press enter.") + t = theme.syntax + print(f"Enter SQL code or one of the below commands, and press enter.\n\n" + f"{t.builtin}.version{t.reset} Print underlying SQLite library version\n" + f"{t.builtin}.help{t.reset} Print this help message\n" + f"{t.builtin}.quit{t.reset} Exit the CLI, equivalent to CTRL-D\n") case "quit": sys.exit(0) case "": diff --git a/Misc/NEWS.d/next/Library/2025-06-06-17-34-18.gh-issue-133934.yT1r68.rst b/Misc/NEWS.d/next/Library/2025-06-06-17-34-18.gh-issue-133934.yT1r68.rst new file mode 100644 index 00000000000000..4de7b4cceca977 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-06-17-34-18.gh-issue-133934.yT1r68.rst @@ -0,0 +1 @@ +Improve :mod:`sqlite3` CLI's ``.help`` message. From 1b969f6d577bc1480b708e6604f708ab740f8715 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Thu, 19 Jun 2025 19:54:21 +0800 Subject: [PATCH 618/989] gh-135379: Add back const cast to _PyLong_IsCompact (GH-135706) --- Include/cpython/longintrepr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h index 9f12298d4fcc37..19a57284e0edb1 100644 --- a/Include/cpython/longintrepr.h +++ b/Include/cpython/longintrepr.h @@ -125,9 +125,9 @@ _PyLong_IsCompact(const PyLongObject* op) { } static inline int -PyLong_CheckCompact(const PyObject *op) +PyLong_CheckCompact(PyObject *op) { - return PyLong_CheckExact(op) && _PyLong_IsCompact((PyLongObject *)op); + return PyLong_CheckExact(op) && _PyLong_IsCompact((const PyLongObject *)op); } #define PyUnstable_Long_IsCompact _PyLong_IsCompact From ff639af8eee11e7ca09999b2724bc10652a00e5d Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Thu, 19 Jun 2025 09:56:43 -0300 Subject: [PATCH 619/989] Docs: Emphasize parameter name in `pkgutil.iter_importers` (GH-135597) --- Doc/library/pkgutil.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst index 20b8f6bcf19117..47d24b6f7d06bb 100644 --- a/Doc/library/pkgutil.rst +++ b/Doc/library/pkgutil.rst @@ -69,8 +69,8 @@ support. Yield :term:`finder` objects for the given module name. - If fullname contains a ``'.'``, the finders will be for the package - containing fullname, otherwise they will be all registered top level + If *fullname* contains a ``'.'``, the finders will be for the package + containing *fullname*, otherwise they will be all registered top level finders (i.e. those on both :data:`sys.meta_path` and :data:`sys.path_hooks`). If the named module is in a package, that package is imported as a side From 0243260284d3630d58a11694904476349d14a6ed Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Thu, 19 Jun 2025 21:09:09 +0800 Subject: [PATCH 620/989] gh-135379: Move PyLong_CheckCompact to private header and rename it (GH-135707) --- Include/cpython/longintrepr.h | 6 ------ Include/internal/pycore_long.h | 6 ++++++ Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 4 ++-- Python/generated_cases.c.h | 24 ++++++++++++------------ Python/optimizer_symbols.c | 6 +++--- Tools/cases_generator/analyzer.py | 2 +- 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h index 19a57284e0edb1..4b6f97a5e475d6 100644 --- a/Include/cpython/longintrepr.h +++ b/Include/cpython/longintrepr.h @@ -124,12 +124,6 @@ _PyLong_IsCompact(const PyLongObject* op) { return op->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS); } -static inline int -PyLong_CheckCompact(PyObject *op) -{ - return PyLong_CheckExact(op) && _PyLong_IsCompact((const PyLongObject *)op); -} - #define PyUnstable_Long_IsCompact _PyLong_IsCompact static inline Py_ssize_t diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 852d22bb8c5749..3c213783cd432b 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -312,6 +312,12 @@ _PyLong_FlipSign(PyLongObject *op) { #define _PyLong_FALSE_TAG TAG_FROM_SIGN_AND_SIZE(0, 0) #define _PyLong_TRUE_TAG TAG_FROM_SIGN_AND_SIZE(1, 1) +static inline int +_PyLong_CheckExactAndCompact(PyObject *op) +{ + return PyLong_CheckExact(op) && _PyLong_IsCompact((const PyLongObject *)op); +} + #ifdef __cplusplus } #endif diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c83bc15c751510..307844d38ccfcc 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -569,12 +569,12 @@ dummy_func( op(_GUARD_NOS_INT, (left, unused -- left, unused)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - EXIT_IF(!PyLong_CheckCompact(left_o)); + EXIT_IF(!_PyLong_CheckExactAndCompact(left_o)); } op(_GUARD_TOS_INT, (value -- value)) { PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - EXIT_IF(!PyLong_CheckCompact(value_o)); + EXIT_IF(!_PyLong_CheckExactAndCompact(value_o)); } op(_GUARD_NOS_OVERFLOWED, (left, unused -- left, unused)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b6338dd483b4a0..8f506172550afe 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -852,7 +852,7 @@ _PyStackRef left; left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckCompact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -863,7 +863,7 @@ _PyStackRef value; value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckCompact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 82142aee9a3aea..8f7932f0033c6f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -158,7 +158,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckCompact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -168,7 +168,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckCompact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -483,7 +483,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckCompact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -493,7 +493,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckCompact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -694,7 +694,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckCompact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -856,7 +856,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckCompact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -934,7 +934,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckCompact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1064,7 +1064,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckCompact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1074,7 +1074,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckCompact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -4893,7 +4893,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckCompact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -4903,7 +4903,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckCompact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -11473,7 +11473,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckCompact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(STORE_SUBSCR); assert(_PyOpcode_Deopt[opcode] == (STORE_SUBSCR)); JUMP_TO_PREDICTED(STORE_SUBSCR); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 8ae670d10280ec..64cc1b9074fcf0 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -386,7 +386,7 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef ref, PyObject *const_val) make_const(sym, const_val); return; case JIT_SYM_COMPACT_INT: - if (PyLong_CheckCompact(const_val)) { + if (_PyLong_CheckExactAndCompact(const_val)) { make_const(sym, const_val); } else { @@ -679,7 +679,7 @@ _Py_uop_sym_is_compact_int(JitOptRef ref) { JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { - return (bool)PyLong_CheckCompact(sym->value.value); + return (bool)_PyLong_CheckExactAndCompact(sym->value.value); } return sym->tag == JIT_SYM_COMPACT_INT; } @@ -716,7 +716,7 @@ _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef ref) } return; case JIT_SYM_KNOWN_VALUE_TAG: - if (!PyLong_CheckCompact(sym->value.value)) { + if (!_PyLong_CheckExactAndCompact(sym->value.value)) { Py_CLEAR(sym->value.value); sym_set_bottom(ctx, sym); } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 8020c9c5c8a887..6ff0223d2ef3e7 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -687,7 +687,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsValid", "PyStackRef_Wrap", "PyStackRef_Unwrap", - "PyLong_CheckCompact", + "_PyLong_CheckExactAndCompact", ) From 7cc89496922b7edb033e2ed47550c7c9e2ae8525 Mon Sep 17 00:00:00 2001 From: Andrii Hrimov Date: Thu, 19 Jun 2025 16:47:35 +0200 Subject: [PATCH 621/989] gh-135273: Unify `ZoneInfo.from_file` signatures (#135274) Align `ZoneInfo.from_file` pure-Python signature with Argument Clinic signature. --- Doc/library/zoneinfo.rst | 4 ++-- Lib/zoneinfo/_zoneinfo.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/zoneinfo.rst b/Doc/library/zoneinfo.rst index a57f3b8b3e858c..53d8e2598ec1c7 100644 --- a/Doc/library/zoneinfo.rst +++ b/Doc/library/zoneinfo.rst @@ -195,7 +195,7 @@ The ``ZoneInfo`` class The ``ZoneInfo`` class has two alternate constructors: -.. classmethod:: ZoneInfo.from_file(fobj, /, key=None) +.. classmethod:: ZoneInfo.from_file(file_obj, /, key=None) Constructs a ``ZoneInfo`` object from a file-like object returning bytes (e.g. a file opened in binary mode or an :class:`io.BytesIO` object). @@ -325,7 +325,7 @@ The behavior of a ``ZoneInfo`` file depends on how it was constructed: >>> a is b False -3. ``ZoneInfo.from_file(fobj, /, key=None)``: When constructed from a file, the +3. ``ZoneInfo.from_file(file_obj, /, key=None)``: When constructed from a file, the ``ZoneInfo`` object raises an exception on pickling. If an end user wants to pickle a ``ZoneInfo`` constructed from a file, it is recommended that they use a wrapper type or a custom serialization function: either serializing by diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py index b77dc0ed391bb3..3ffdb4c837192b 100644 --- a/Lib/zoneinfo/_zoneinfo.py +++ b/Lib/zoneinfo/_zoneinfo.py @@ -75,12 +75,12 @@ def _new_instance(cls, key): return obj @classmethod - def from_file(cls, fobj, /, key=None): + def from_file(cls, file_obj, /, key=None): obj = super().__new__(cls) obj._key = key obj._file_path = None - obj._load_file(fobj) - obj._file_repr = repr(fobj) + obj._load_file(file_obj) + obj._file_repr = repr(file_obj) # Disable pickling for objects created from files obj.__reduce__ = obj._file_reduce From 754190287ece5a2e66684161aadafb18f5f44868 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Thu, 19 Jun 2025 12:01:29 -0300 Subject: [PATCH 622/989] Docs: Add missing lines between regex and text (GH-134505) --- Doc/howto/regex.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst index e543f6d5657d79..7486a378dbb06f 100644 --- a/Doc/howto/regex.rst +++ b/Doc/howto/regex.rst @@ -1016,7 +1016,9 @@ extension. This regular expression matches ``foo.bar`` and Now, consider complicating the problem a bit; what if you want to match filenames where the extension is not ``bat``? Some incorrect attempts: -``.*[.][^b].*$`` The first attempt above tries to exclude ``bat`` by requiring +``.*[.][^b].*$`` + +The first attempt above tries to exclude ``bat`` by requiring that the first character of the extension is not a ``b``. This is wrong, because the pattern also doesn't match ``foo.bar``. @@ -1043,7 +1045,9 @@ confusing. A negative lookahead cuts through all this confusion: -``.*[.](?!bat$)[^.]*$`` The negative lookahead means: if the expression ``bat`` +``.*[.](?!bat$)[^.]*$`` + +The negative lookahead means: if the expression ``bat`` doesn't match at this point, try the rest of the pattern; if ``bat$`` does match, the whole pattern will fail. The trailing ``$`` is required to ensure that something like ``sample.batch``, where the extension only starts with From 9c3c02019cf0bc7792bbdd3a314e45642178e3b5 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 19 Jun 2025 18:46:40 +0300 Subject: [PATCH 623/989] gh-135709: Fix two compile warnings on WASM buildbot (#135712) --- Modules/_testcapi/long.c | 2 +- Modules/_testcapimodule.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 42243023a45768..6313abf5485fff 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -228,7 +228,7 @@ pylongwriter_create(PyObject *module, PyObject *args) goto error; } - if (num < 0 || num >= PyLong_BASE) { + if (num < 0 || num >= (long)PyLong_BASE) { PyErr_SetString(PyExc_ValueError, "digit doesn't fit into digit"); goto error; } diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 281c5b41137ac2..71fffedee146fa 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -2424,7 +2424,7 @@ test_critical_sections(PyObject *module, PyObject *Py_UNUSED(args)) // Used by `finalize_thread_hang`. -#ifdef _POSIX_THREADS +#if defined(_POSIX_THREADS) && !defined(__wasi__) static void finalize_thread_hang_cleanup_callback(void *Py_UNUSED(arg)) { // Should not reach here. Py_FatalError("pthread thread termination was triggered unexpectedly"); From c76568339867422eca35876cabf82c06a55bbf56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Jun 2025 19:27:19 +0200 Subject: [PATCH 624/989] gh-135561: ensure that the GIL is held when handling an HACL* error in `_hmac` (#135562) --- ...-06-16-15-03-03.gh-issue-135561.mJCN8D.rst | 2 ++ Modules/hmacmodule.c | 21 ++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-16-15-03-03.gh-issue-135561.mJCN8D.rst diff --git a/Misc/NEWS.d/next/Library/2025-06-16-15-03-03.gh-issue-135561.mJCN8D.rst b/Misc/NEWS.d/next/Library/2025-06-16-15-03-03.gh-issue-135561.mJCN8D.rst new file mode 100644 index 00000000000000..ee743f161138e6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-16-15-03-03.gh-issue-135561.mJCN8D.rst @@ -0,0 +1,2 @@ +Fix a crash on DEBUG builds when an HACL* HMAC routine fails. Patch by +Bénédikt Tran. diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index b404d5732ec857..9c92d9d145f5a3 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -493,36 +493,41 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) static int _hacl_convert_errno(hacl_errno_t code, PyObject *algorithm) { + assert(PyGILState_GetThisThreadState() != NULL); + if (code == Hacl_Streaming_Types_Success) { + return 0; + } + + PyGILState_STATE gstate = PyGILState_Ensure(); switch (code) { - case Hacl_Streaming_Types_Success: { - return 0; - } case Hacl_Streaming_Types_InvalidAlgorithm: { // only makes sense if an algorithm is known at call time assert(algorithm != NULL); assert(PyUnicode_CheckExact(algorithm)); PyErr_Format(PyExc_ValueError, "invalid algorithm: %U", algorithm); - return -1; + break; } case Hacl_Streaming_Types_InvalidLength: { PyErr_SetString(PyExc_ValueError, "invalid length"); - return -1; + break; } case Hacl_Streaming_Types_MaximumLengthExceeded: { PyErr_SetString(PyExc_OverflowError, "maximum length exceeded"); - return -1; + break; } case Hacl_Streaming_Types_OutOfMemory: { PyErr_NoMemory(); - return -1; + break; } default: { PyErr_Format(PyExc_RuntimeError, "HACL* internal routine failed with error code: %d", code); - return -1; + break; } } + PyGILState_Release(gstate); + return -1; } /* From bb9596fcfa50eac31c728b23a614bcbbf0386cd9 Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Thu, 19 Jun 2025 21:55:13 +0200 Subject: [PATCH 625/989] Add tomasr8 as a codeowner for AST, gettext and the Tier 2 optimizer (#135727) --- .github/CODEOWNERS | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 7edadff2754e4d..08d7a80d7726d3 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -45,8 +45,9 @@ Python/flowgraph.c @markshannon @iritkatriel Python/instruction_sequence.c @iritkatriel Python/bytecodes.c @markshannon Python/optimizer*.c @markshannon -Python/optimizer_analysis.c @Fidget-Spinner -Python/optimizer_bytecodes.c @Fidget-Spinner +Python/optimizer_analysis.c @Fidget-Spinner @tomasr8 +Python/optimizer_bytecodes.c @Fidget-Spinner @tomasr8 +Python/optimizer_symbols.c @tomasr8 Python/symtable.c @JelleZijlstra @carljm Lib/_pyrepl/* @pablogsal @lysnikolaou @ambv Lib/test/test_patma.py @brandtbucher @@ -188,13 +189,13 @@ Include/internal/pycore_time.h @pganssle @abalkin /Tools/cases_generator/ @markshannon # AST -Python/ast.c @isidentical @JelleZijlstra @eclips4 -Python/ast_preprocess.c @isidentical @eclips4 -Parser/asdl.py @isidentical @JelleZijlstra @eclips4 -Parser/asdl_c.py @isidentical @JelleZijlstra @eclips4 -Lib/ast.py @isidentical @JelleZijlstra @eclips4 -Lib/_ast_unparse.py @isidentical @JelleZijlstra @eclips4 -Lib/test/test_ast/ @eclips4 +Python/ast.c @isidentical @JelleZijlstra @eclips4 @tomasr8 +Python/ast_preprocess.c @isidentical @eclips4 @tomasr8 +Parser/asdl.py @isidentical @JelleZijlstra @eclips4 @tomasr8 +Parser/asdl_c.py @isidentical @JelleZijlstra @eclips4 @tomasr8 +Lib/ast.py @isidentical @JelleZijlstra @eclips4 @tomasr8 +Lib/_ast_unparse.py @isidentical @JelleZijlstra @eclips4 @tomasr8 +Lib/test/test_ast/ @eclips4 @tomasr8 # Mock /Lib/unittest/mock.py @cjw296 @@ -341,3 +342,6 @@ Modules/_xxtestfuzz/ @ammaraskar Python/remote_debug.h @pablogsal Python/remote_debugging.c @pablogsal Modules/_remote_debugging_module.c @pablogsal @ambv @1st1 + +# gettext +**/*gettext* @tomasr8 From 1ddfe593200fec992d283a9b4d6ad2f1b535c018 Mon Sep 17 00:00:00 2001 From: Nadeshiko Manju Date: Fri, 20 Jun 2025 04:23:38 +0800 Subject: [PATCH 626/989] gh-135543: Emit sys.remote_exec audit event when sys.remote_exec is called (GH-135544) --- Doc/library/sys.rst | 13 +++++++-- Lib/test/audit-tests.py | 28 +++++++++++++++++++ Lib/test/support/__init__.py | 22 +++++++++++++++ Lib/test/test_audit.py | 8 ++++++ Lib/test/test_sys.py | 19 ++----------- ...-06-16-02-31-42.gh-issue-135543.6b0HOF.rst | 2 ++ Python/ceval_gil.c | 2 +- Python/sysmodule.c | 5 ++++ 8 files changed, 79 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-02-31-42.gh-issue-135543.6b0HOF.rst diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 71f9999464ab52..f8f727f4a23410 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1933,12 +1933,21 @@ always available. Unless explicitly noted otherwise, all variables are read-only interpreter is pre-release (alpha, beta, or release candidate) then the local and remote interpreters must be the same exact version. - .. audit-event:: remote_debugger_script script_path + .. audit-event:: sys.remote_exec pid script_path + + When the code is executed in the remote process, an + :ref:`auditing event ` ``sys.remote_exec`` is raised with + the *pid* and the path to the script file. + This event is raised in the process that called :func:`sys.remote_exec`. + + .. audit-event:: cpython.remote_debugger_script script_path When the script is executed in the remote process, an :ref:`auditing event ` - ``sys.remote_debugger_script`` is raised + ``cpython.remote_debugger_script`` is raised with the path in the remote process. + This event is raised in the remote process, not the one + that called :func:`sys.remote_exec`. .. availability:: Unix, Windows. .. versionadded:: 3.14 diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py index 08b638e4b8d524..6884ac0dbe6ff0 100644 --- a/Lib/test/audit-tests.py +++ b/Lib/test/audit-tests.py @@ -643,6 +643,34 @@ def test_assert_unicode(): else: raise RuntimeError("Expected sys.audit(9) to fail.") +def test_sys_remote_exec(): + import tempfile + + pid = os.getpid() + event_pid = -1 + event_script_path = "" + remote_event_script_path = "" + def hook(event, args): + if event not in ["sys.remote_exec", "cpython.remote_debugger_script"]: + return + print(event, args) + match event: + case "sys.remote_exec": + nonlocal event_pid, event_script_path + event_pid = args[0] + event_script_path = args[1] + case "cpython.remote_debugger_script": + nonlocal remote_event_script_path + remote_event_script_path = args[0] + + sys.addaudithook(hook) + with tempfile.NamedTemporaryFile(mode='w+', delete=True) as tmp_file: + tmp_file.write("a = 1+1\n") + tmp_file.flush() + sys.remote_exec(pid, tmp_file.name) + assertEqual(event_pid, pid) + assertEqual(event_script_path, tmp_file.name) + assertEqual(remote_event_script_path, tmp_file.name) if __name__ == "__main__": from test.support import suppress_msvcrt_asserts diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 48e74adcce3bb7..51c0ce11e8269d 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -46,6 +46,7 @@ # sys "MS_WINDOWS", "is_jython", "is_android", "is_emscripten", "is_wasi", "is_apple_mobile", "check_impl_detail", "unix_shell", "setswitchinterval", + "support_remote_exec_only", # os "get_pagesize", # network @@ -3069,6 +3070,27 @@ def is_libssl_fips_mode(): return False # more of a maybe, unless we add this to the _ssl module. return get_fips_mode() != 0 +def _supports_remote_attaching(): + PROCESS_VM_READV_SUPPORTED = False + + try: + from _remote_debugging import PROCESS_VM_READV_SUPPORTED + except ImportError: + pass + + return PROCESS_VM_READV_SUPPORTED + +def _support_remote_exec_only_impl(): + if not sys.is_remote_debug_enabled(): + return unittest.skip("Remote debugging is not enabled") + if sys.platform not in ("darwin", "linux", "win32"): + return unittest.skip("Test only runs on Linux, Windows and macOS") + if sys.platform == "linux" and not _supports_remote_attaching(): + return unittest.skip("Test only runs on Linux with process_vm_readv support") + return _id + +def support_remote_exec_only(test): + return _support_remote_exec_only_impl()(test) class EqualToForwardRef: """Helper to ease use of annotationlib.ForwardRef in tests. diff --git a/Lib/test/test_audit.py b/Lib/test/test_audit.py index 5f9eb381f605d9..077765fcda210a 100644 --- a/Lib/test/test_audit.py +++ b/Lib/test/test_audit.py @@ -322,6 +322,14 @@ def test_assert_unicode(self): if returncode: self.fail(stderr) + @support.support_remote_exec_only + @support.cpython_only + def test_sys_remote_exec(self): + returncode, events, stderr = self.run_python("test_sys_remote_exec") + self.assertTrue(any(["sys.remote_exec" in event for event in events])) + self.assertTrue(any(["cpython.remote_debugger_script" in event for event in events])) + if returncode: + self.fail(stderr) if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 39e62027f03e5a..73a72024bba84d 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1943,22 +1943,7 @@ def write(self, s): self.assertEqual(out, b"") self.assertEqual(err, b"") - -def _supports_remote_attaching(): - PROCESS_VM_READV_SUPPORTED = False - - try: - from _remote_debugging import PROCESS_VM_READV_SUPPORTED - except ImportError: - pass - - return PROCESS_VM_READV_SUPPORTED - -@unittest.skipIf(not sys.is_remote_debug_enabled(), "Remote debugging is not enabled") -@unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux" and sys.platform != "win32", - "Test only runs on Linux, Windows and MacOS") -@unittest.skipIf(sys.platform == "linux" and not _supports_remote_attaching(), - "Test only runs on Linux with process_vm_readv support") +@test.support.support_remote_exec_only @test.support.cpython_only class TestRemoteExec(unittest.TestCase): def tearDown(self): @@ -2117,7 +2102,7 @@ def audit_hook(event, arg): returncode, stdout, stderr = self._run_remote_exec_test(script, prologue=prologue) self.assertEqual(returncode, 0) self.assertIn(b"Remote script executed successfully!", stdout) - self.assertIn(b"Audit event: remote_debugger_script, arg: ", stdout) + self.assertIn(b"Audit event: cpython.remote_debugger_script, arg: ", stdout) self.assertEqual(stderr, b"") def test_remote_exec_with_exception(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-02-31-42.gh-issue-135543.6b0HOF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-02-31-42.gh-issue-135543.6b0HOF.rst new file mode 100644 index 00000000000000..6efe2a47bac5d4 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-02-31-42.gh-issue-135543.6b0HOF.rst @@ -0,0 +1,2 @@ +Emit ``sys.remote_exec`` audit event when :func:`sys.remote_exec` is called +and migrate ``remote_debugger_script`` to ``cpython.remote_debugger_script``. diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 6d2383ac7c1c65..57d8f68b000b60 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1220,7 +1220,7 @@ static inline int run_remote_debugger_source(PyObject *source) // that would be an easy target for a ROP gadget. static inline void run_remote_debugger_script(PyObject *path) { - if (0 != PySys_Audit("remote_debugger_script", "O", path)) { + if (0 != PySys_Audit("cpython.remote_debugger_script", "O", path)) { PyErr_FormatUnraisable( "Audit hook failed for remote debugger script %U", path); return; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index e5ae841d195d4f..b3a2512a99d2fc 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2488,6 +2488,11 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) if (PyUnicode_FSConverter(script, &path) == 0) { return NULL; } + + if (PySys_Audit("sys.remote_exec", "iO", pid, script) < 0) { + return NULL; + } + debugger_script_path = PyBytes_AS_STRING(path); #ifdef MS_WINDOWS PyObject *unicode_path; From c8c13f8036051c2858956905769b7b9dfde4bbd7 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Thu, 19 Jun 2025 14:27:02 -0700 Subject: [PATCH 627/989] gh-127146: Add skip_emscripten_stack_overflow in a few places (#135722) More tests that hit stack limits on some platforms. --- Lib/test/test_exceptions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 175ef53138689e..57d0656487d4db 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1445,6 +1445,7 @@ def foo(): foo() support.gc_collect() + @support.skip_emscripten_stack_overflow() @cpython_only def test_recursion_normalizing_exception(self): import_module("_testinternalcapi") @@ -1522,6 +1523,7 @@ def test_recursion_normalizing_infinite_exception(self): self.assertIn(b'Done.', out) + @support.skip_emscripten_stack_overflow() def test_recursion_in_except_handler(self): def set_relative_recursion_limit(n): From b53b0c14da58b73955b8989e4a9ce19ea67db7a3 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 20 Jun 2025 14:33:35 +0800 Subject: [PATCH 628/989] gh-135608: Add a null check for attribute promotion to fix a JIT crash (GH-135613) Co-authored-by: devdanzin <74280297+devdanzin@users.noreply.github.com> --- Lib/test/test_capi/test_opt.py | 30 +++++++++++++++++++ ...-06-17-12-50-48.gh-issue-135608.PnHckD.rst | 1 + Python/optimizer_analysis.c | 4 +++ Python/optimizer_bytecodes.c | 8 ++++- Python/optimizer_cases.c.h | 7 ++++- 5 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-12-50-48.gh-issue-135608.PnHckD.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0fea0649d75709..2b6934d747ebe0 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2319,6 +2319,36 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_INT", uops) self.assertNotIn("_GUARD_NOS_INT", uops) + def test_attr_promotion_failure(self): + # We're not testing for any specific uops here, just + # testing it doesn't crash. + script_helper.assert_python_ok('-c', textwrap.dedent(""" + import _testinternalcapi + import _opcode + import email + + def get_first_executor(func): + code = func.__code__ + co_code = code.co_code + for i in range(0, len(co_code), 2): + try: + return _opcode.get_executor(code, i) + except ValueError: + pass + return None + + def testfunc(n): + for _ in range(n): + email.jit_testing = None + prompt = email.jit_testing + del email.jit_testing + + + testfunc(_testinternalcapi.TIER2_THRESHOLD) + ex = get_first_executor(testfunc) + assert ex is not None + """)) + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-12-50-48.gh-issue-135608.PnHckD.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-12-50-48.gh-issue-135608.PnHckD.rst new file mode 100644 index 00000000000000..a65a0c85fa64fa --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-12-50-48.gh-issue-135608.PnHckD.rst @@ -0,0 +1 @@ +Fix a crash in the JIT involving attributes of modules. diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 03ad8e1db24635..8b1a63e3d2916f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -103,6 +103,10 @@ convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj, bool pop) if ((int)index >= dict->ma_keys->dk_nentries) { return NULL; } + PyDictKeysObject *keys = dict->ma_keys; + if (keys->dk_version != inst->operand0) { + return NULL; + } PyObject *res = entries[index].me_value; if (res == NULL) { return NULL; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f01c3787e5b4c7..2e240830013a46 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -591,7 +591,13 @@ dummy_func(void) { PyDict_Watch(GLOBALS_WATCHER_ID, dict); _Py_BloomFilter_Add(dependencies, dict); PyObject *res = convert_global_to_const(this_instr, dict, true); - attr = sym_new_const(ctx, res); + if (res == NULL) { + attr = sym_new_not_null(ctx); + } + else { + attr = sym_new_const(ctx, res); + } + } } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 6bc506c6298f56..3cabf619fe81e7 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1266,7 +1266,12 @@ PyDict_Watch(GLOBALS_WATCHER_ID, dict); _Py_BloomFilter_Add(dependencies, dict); PyObject *res = convert_global_to_const(this_instr, dict, true); - attr = sym_new_const(ctx, res); + if (res == NULL) { + attr = sym_new_not_null(ctx); + } + else { + attr = sym_new_const(ctx, res); + } } } } From 2dbada179f1c7cfcfffceac4ac91902154b5a356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 20 Jun 2025 10:57:16 +0200 Subject: [PATCH 629/989] gh-135532: simplify handling of HACL* errors in `_hmac` (#135740) --- Modules/hmacmodule.c | 45 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 9c92d9d145f5a3..f6ed35ae742905 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -234,24 +234,24 @@ typedef struct py_hmac_hacl_api { * * The formal signature of this macro is: * - * (HACL_HMAC_state *, uint8_t *, uint32_t, PyObject *, (C statements)) + * (HACL_HMAC_state *, uint8_t *, uint32_t, (C statements)) */ #ifndef NDEBUG #define Py_HMAC_HACL_UPDATE_ONCE( \ HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ + ERRACTION \ ) \ do { \ Py_CHECK_HACL_UINT32_T_LENGTH(LEN); \ hacl_errno_t code = Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN); \ - if (_hacl_convert_errno(code, (ALGORITHM)) < 0) { \ + if (_hacl_convert_errno(code) < 0) { \ ERRACTION; \ } \ } while (0) #else #define Py_HMAC_HACL_UPDATE_ONCE( \ HACL_STATE, BUF, LEN, \ - _ALGORITHM, _ERRACTION \ + _ERRACTION \ ) \ do { \ (void)Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, (LEN)); \ @@ -274,17 +274,17 @@ typedef struct py_hmac_hacl_api { * * The formal signature of this macro is: * - * (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements)) + * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) */ #ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 #define Py_HMAC_HACL_UPDATE_LOOP( \ HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ + ERRACTION \ ) \ do { \ while ((Py_ssize_t)LEN > UINT32_MAX_AS_SSIZE_T) { \ Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, UINT32_MAX, \ - ALGORITHM, ERRACTION); \ + ERRACTION); \ BUF += UINT32_MAX; \ LEN -= UINT32_MAX; \ } \ @@ -292,7 +292,7 @@ typedef struct py_hmac_hacl_api { #else #define Py_HMAC_HACL_UPDATE_LOOP( \ HACL_STATE, BUF, LEN, \ - _ALGORITHM, _ERRACTION \ + _ERRACTION \ ) #endif @@ -301,17 +301,17 @@ typedef struct py_hmac_hacl_api { * * The formal signature of this macro is: * - * (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements)) + * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) */ #define Py_HMAC_HACL_UPDATE( \ HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ + ERRACTION \ ) \ do { \ Py_HMAC_HACL_UPDATE_LOOP(HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION); \ + ERRACTION); \ Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION); \ + ERRACTION); \ } while (0) /* @@ -491,7 +491,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) * Otherwise, this sets an appropriate exception and returns -1. */ static int -_hacl_convert_errno(hacl_errno_t code, PyObject *algorithm) +_hacl_convert_errno(hacl_errno_t code) { assert(PyGILState_GetThisThreadState() != NULL); if (code == Hacl_Streaming_Types_Success) { @@ -501,10 +501,7 @@ _hacl_convert_errno(hacl_errno_t code, PyObject *algorithm) PyGILState_STATE gstate = PyGILState_Ensure(); switch (code) { case Hacl_Streaming_Types_InvalidAlgorithm: { - // only makes sense if an algorithm is known at call time - assert(algorithm != NULL); - assert(PyUnicode_CheckExact(algorithm)); - PyErr_Format(PyExc_ValueError, "invalid algorithm: %U", algorithm); + PyErr_SetString(PyExc_ValueError, "invalid HACL* algorithm"); break; } case Hacl_Streaming_Types_InvalidLength: { @@ -521,7 +518,7 @@ _hacl_convert_errno(hacl_errno_t code, PyObject *algorithm) } default: { PyErr_Format(PyExc_RuntimeError, - "HACL* internal routine failed with error code: %d", + "HACL* internal routine failed with error code: %u", code); break; } @@ -541,7 +538,7 @@ _hacl_hmac_state_new(HMAC_Hash_Kind kind, uint8_t *key, uint32_t len) assert(kind != Py_hmac_kind_hash_unknown); HACL_HMAC_state *state = NULL; hacl_errno_t retcode = Hacl_Streaming_HMAC_malloc_(kind, key, len, &state); - if (_hacl_convert_errno(retcode, NULL) < 0) { + if (_hacl_convert_errno(retcode) < 0) { assert(state == NULL); return NULL; } @@ -809,13 +806,13 @@ hmac_feed_initial_data(HMACObject *self, uint8_t *msg, Py_ssize_t len) } if (len < HASHLIB_GIL_MINSIZE) { - Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, return -1); + Py_HMAC_HACL_UPDATE(self->state, msg, len, return -1); return 0; } int res = 0; Py_BEGIN_ALLOW_THREADS - Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, goto error); + Py_HMAC_HACL_UPDATE(self->state, msg, len, goto error); goto done; #ifndef NDEBUG error: @@ -983,7 +980,7 @@ hmac_update_state_with_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) int res = 0; Py_BEGIN_ALLOW_THREADS PyMutex_Lock(&self->mutex); // unconditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error); + Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); goto done; #ifndef NDEBUG error: @@ -1010,7 +1007,7 @@ static int hmac_update_state_cond_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) { ENTER_HASHLIB(self); // conditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error); + Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); LEAVE_HASHLIB(self); return 0; @@ -1081,7 +1078,7 @@ hmac_digest_compute_cond_lock(HMACObject *self, uint8_t *digest) rc == Hacl_Streaming_Types_Success || rc == Hacl_Streaming_Types_OutOfMemory ); - return _hacl_convert_errno(rc, NULL); + return _hacl_convert_errno(rc); } /*[clinic input] From 82726600be84773c9cca6febdead7a804f4a68bd Mon Sep 17 00:00:00 2001 From: Chris Eibl <138194463+chris-eibl@users.noreply.github.com> Date: Fri, 20 Jun 2025 11:05:33 +0200 Subject: [PATCH 630/989] gh-135379: fix MSVC warning: conversion from 'stwodigits' to 'digit' (GH-135714) fix warning C4244: 'initializing': conversion from 'stwodigits' to 'digit', possible loss of data --- Objects/longobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 59b10355ad9df8..557bb6e1dd956c 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -337,7 +337,7 @@ medium_from_stwodigits(stwodigits x) } _PyObject_Init((PyObject*)v, &PyLong_Type); } - digit abs_x = x < 0 ? -x : x; + digit abs_x = x < 0 ? (digit)(-x) : (digit)x; _PyLong_SetSignAndDigitCount(v, x<0?-1:1, 1); v->long_value.ob_digit[0] = abs_x; return PyStackRef_FromPyObjectStealMortal((PyObject *)v); From 57dba7c9a59ab345f859810bad6608a9d1a0fdd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 20 Jun 2025 11:32:38 +0200 Subject: [PATCH 631/989] gh-135532: update `Modules/_hacl/python_hacl_namespaces.h` (#135741) --- Misc/sbom.spdx.json | 4 +- Modules/_hacl/python_hacl_namespaces.h | 237 ++++++++++++------------- 2 files changed, 111 insertions(+), 130 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 10a5c646d2a59e..738b33908857a8 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -930,11 +930,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "4a0bdb9496d49bbfa3ad50bb7854d8f099e84891" + "checksumValue": "682b069d3888f3e8f8cc90f1a49bac9d1a5903d2" }, { "algorithm": "SHA256", - "checksumValue": "b1a45149239ee7af7de769a3e9339950d47c199bb9eaa10edce8a00fde603b12" + "checksumValue": "8551d2c3fde03b92c6fab5febde00347bd9184ea0085077976863c7836e9669d" } ], "fileName": "Modules/_hacl/python_hacl_namespaces.h" diff --git a/Modules/_hacl/python_hacl_namespaces.h b/Modules/_hacl/python_hacl_namespaces.h index 1c2f7fea5c837a..d0b4500395e7c3 100644 --- a/Modules/_hacl/python_hacl_namespaces.h +++ b/Modules/_hacl/python_hacl_namespaces.h @@ -2,95 +2,43 @@ #define _PYTHON_HACL_NAMESPACES_H /* - * C's excuse for namespaces: Use globally unique names to avoid linkage - * conflicts with builds linking or dynamically loading other code potentially - * using HACL* libraries. + * Use globally unique names to avoid linkage conflicts with builds linking + * or dynamically loading other code potentially using HACL* libraries. * - * Something like this to generate new entries for the list: - * - * nm *.o | grep Hacl | cut -c 20- | sort | uniq | grep -v _Py_LibHacl_ | egrep ^_ | sed 's/_\(.*\)/#define \1 _Py_LibHacl_\1/g' - */ + * Assuming that the current working directory is Modules/_hacl, + * use the following command to generate a list of candidates: -#define Lib_Memzero0_memzero0 _Py_LibHacl_Lib_Memzero0_memzero0 + nm -j *.o | grep -i hacl | grep -P '^[a-zA-Z_][a-zA-Z0-9_]+' \ + | sed -e 's/^_Py_LibHacl_//g' \ + | sed 's/\(.*\)/#define \1 _Py_LibHacl_\1/g' \ + | sort -u -#define Hacl_Hash_SHA2_state_sha2_224_s _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_224_s -#define Hacl_Hash_SHA2_state_sha2_224 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_224 -#define Hacl_Hash_SHA2_state_sha2_256 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_256 -#define Hacl_Hash_SHA2_state_sha2_384_s _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_384_s -#define Hacl_Hash_SHA2_state_sha2_384 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_384 -#define Hacl_Hash_SHA2_state_sha2_512 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_512 -#define Hacl_Hash_SHA2_malloc_256 _Py_LibHacl_Hacl_Hash_SHA2_malloc_256 -#define Hacl_Hash_SHA2_malloc_224 _Py_LibHacl_Hacl_Hash_SHA2_malloc_224 -#define Hacl_Hash_SHA2_malloc_512 _Py_LibHacl_Hacl_Hash_SHA2_malloc_512 -#define Hacl_Hash_SHA2_malloc_384 _Py_LibHacl_Hacl_Hash_SHA2_malloc_384 -#define Hacl_Hash_SHA2_copy_256 _Py_LibHacl_Hacl_Hash_SHA2_copy_256 -#define Hacl_Hash_SHA2_copy_224 _Py_LibHacl_Hacl_Hash_SHA2_copy_224 -#define Hacl_Hash_SHA2_copy_512 _Py_LibHacl_Hacl_Hash_SHA2_copy_512 -#define Hacl_Hash_SHA2_copy_384 _Py_LibHacl_Hacl_Hash_SHA2_copy_384 -#define Hacl_Hash_SHA2_init_256 _Py_LibHacl_Hacl_Hash_SHA2_init_256 -#define Hacl_Hash_SHA2_init_224 _Py_LibHacl_Hacl_Hash_SHA2_init_224 -#define Hacl_Hash_SHA2_init_512 _Py_LibHacl_Hacl_Hash_SHA2_init_512 -#define Hacl_Hash_SHA2_init_384 _Py_LibHacl_Hacl_Hash_SHA2_init_384 -#define Hacl_SHA2_Scalar32_sha512_init _Py_LibHacl_Hacl_SHA2_Scalar32_sha512_init -#define Hacl_Hash_SHA2_update_256 _Py_LibHacl_Hacl_Hash_SHA2_update_256 -#define Hacl_Hash_SHA2_update_224 _Py_LibHacl_Hacl_Hash_SHA2_update_224 -#define Hacl_Hash_SHA2_update_512 _Py_LibHacl_Hacl_Hash_SHA2_update_512 -#define Hacl_Hash_SHA2_update_384 _Py_LibHacl_Hacl_Hash_SHA2_update_384 -#define Hacl_Hash_SHA2_digest_256 _Py_LibHacl_Hacl_Hash_SHA2_digest_256 -#define Hacl_Hash_SHA2_digest_224 _Py_LibHacl_Hacl_Hash_SHA2_digest_224 -#define Hacl_Hash_SHA2_digest_512 _Py_LibHacl_Hacl_Hash_SHA2_digest_512 -#define Hacl_Hash_SHA2_digest_384 _Py_LibHacl_Hacl_Hash_SHA2_digest_384 -#define Hacl_Hash_SHA2_free_256 _Py_LibHacl_Hacl_Hash_SHA2_free_256 -#define Hacl_Hash_SHA2_free_224 _Py_LibHacl_Hacl_Hash_SHA2_free_224 -#define Hacl_Hash_SHA2_free_512 _Py_LibHacl_Hacl_Hash_SHA2_free_512 -#define Hacl_Hash_SHA2_free_384 _Py_LibHacl_Hacl_Hash_SHA2_free_384 -#define Hacl_Hash_SHA2_sha256 _Py_LibHacl_Hacl_Hash_SHA2_sha256 -#define Hacl_Hash_SHA2_sha224 _Py_LibHacl_Hacl_Hash_SHA2_sha224 -#define Hacl_Hash_SHA2_sha512 _Py_LibHacl_Hacl_Hash_SHA2_sha512 -#define Hacl_Hash_SHA2_sha384 _Py_LibHacl_Hacl_Hash_SHA2_sha384 + * Compare the entries to add as follows: -#define Hacl_Hash_MD5_malloc _Py_LibHacl_Hacl_Hash_MD5_malloc -#define Hacl_Hash_MD5_init _Py_LibHacl_Hacl_Hash_MD5_init -#define Hacl_Hash_MD5_update _Py_LibHacl_Hacl_Hash_MD5_update -#define Hacl_Hash_MD5_digest _Py_LibHacl_Hacl_Hash_MD5_digest -#define Hacl_Hash_MD5_free _Py_LibHacl_Hacl_Hash_MD5_free -#define Hacl_Hash_MD5_copy _Py_LibHacl_Hacl_Hash_MD5_copy -#define Hacl_Hash_MD5_hash _Py_LibHacl_Hacl_Hash_MD5_hash - -#define Hacl_Hash_SHA1_malloc _Py_LibHacl_Hacl_Hash_SHA1_malloc -#define Hacl_Hash_SHA1_init _Py_LibHacl_Hacl_Hash_SHA1_init -#define Hacl_Hash_SHA1_update _Py_LibHacl_Hacl_Hash_SHA1_update -#define Hacl_Hash_SHA1_digest _Py_LibHacl_Hacl_Hash_SHA1_digest -#define Hacl_Hash_SHA1_free _Py_LibHacl_Hacl_Hash_SHA1_free -#define Hacl_Hash_SHA1_copy _Py_LibHacl_Hacl_Hash_SHA1_copy -#define Hacl_Hash_SHA1_hash _Py_LibHacl_Hacl_Hash_SHA1_hash - -#define Hacl_Hash_SHA3_update_last_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_last_sha3 -#define Hacl_Hash_SHA3_update_multi_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_multi_sha3 -#define Hacl_Impl_SHA3_absorb_inner _Py_LibHacl_Hacl_Impl_SHA3_absorb_inner -#define Hacl_Impl_SHA3_keccak _Py_LibHacl_Hacl_Impl_SHA3_keccak -#define Hacl_Impl_SHA3_loadState _Py_LibHacl_Hacl_Impl_SHA3_loadState -#define Hacl_Impl_SHA3_squeeze _Py_LibHacl_Hacl_Impl_SHA3_squeeze -#define Hacl_Impl_SHA3_state_permute _Py_LibHacl_Hacl_Impl_SHA3_state_permute -#define Hacl_SHA3_sha3_224 _Py_LibHacl_Hacl_SHA3_sha3_224 -#define Hacl_SHA3_sha3_256 _Py_LibHacl_Hacl_SHA3_sha3_256 -#define Hacl_SHA3_sha3_384 _Py_LibHacl_Hacl_SHA3_sha3_384 -#define Hacl_SHA3_sha3_512 _Py_LibHacl_Hacl_SHA3_sha3_512 -#define Hacl_SHA3_shake128_hacl _Py_LibHacl_Hacl_SHA3_shake128_hacl -#define Hacl_SHA3_shake256_hacl _Py_LibHacl_Hacl_SHA3_shake256_hacl -#define Hacl_Hash_SHA3_block_len _Py_LibHacl_Hacl_Hash_SHA3_block_len -#define Hacl_Hash_SHA3_copy _Py_LibHacl_Hacl_Hash_SHA3_copy -#define Hacl_Hash_SHA3_digest _Py_LibHacl_Hacl_Hash_SHA3_digest -#define Hacl_Hash_SHA3_free _Py_LibHacl_Hacl_Hash_SHA3_free -#define Hacl_Hash_SHA3_get_alg _Py_LibHacl_Hacl_Hash_SHA3_get_alg -#define Hacl_Hash_SHA3_hash_len _Py_LibHacl_Hacl_Hash_SHA3_hash_len -#define Hacl_Hash_SHA3_is_shake _Py_LibHacl_Hacl_Hash_SHA3_is_shake -#define Hacl_Hash_SHA3_init_ _Py_LibHacl_Hacl_Hash_SHA3_init_ -#define Hacl_Hash_SHA3_malloc _Py_LibHacl_Hacl_Hash_SHA3_malloc -#define Hacl_Hash_SHA3_reset _Py_LibHacl_Hacl_Hash_SHA3_reset -#define Hacl_Hash_SHA3_update _Py_LibHacl_Hacl_Hash_SHA3_update -#define Hacl_Hash_SHA3_squeeze _Py_LibHacl_Hacl_Hash_SHA3_squeeze + diff -y --suppress-common-lines \ + <(grep -P '^#define (?!_PY.+_H)' python_hacl_namespaces.h | sort -u) \ + <(nm -j *.o | grep -i hacl | grep -P '^[a-zA-Z_][a-zA-Z0-9_]+' \ + | sed -e 's/^_Py_LibHacl_//g' \ + | sed 's/\(.*\)/#define \1 _Py_LibHacl_\1/g' | sort -u) + */ +// --- Utils ------------------------------------------------------------------ +#define Lib_Memzero0_memzero0 _Py_LibHacl_Lib_Memzero0_memzero0 +// --- HASH-BLAKE-2b ---------------------------------------------------------- +#define Hacl_Hash_Blake2b_copy _Py_LibHacl_Hacl_Hash_Blake2b_copy +#define Hacl_Hash_Blake2b_digest _Py_LibHacl_Hacl_Hash_Blake2b_digest +#define Hacl_Hash_Blake2b_finish _Py_LibHacl_Hacl_Hash_Blake2b_finish +#define Hacl_Hash_Blake2b_free _Py_LibHacl_Hacl_Hash_Blake2b_free +#define Hacl_Hash_Blake2b_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key +#define Hacl_Hash_Blake2b_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key_and_params +#define Hacl_Hash_Blake2b_info _Py_LibHacl_Hacl_Hash_Blake2b_info +#define Hacl_Hash_Blake2b_init _Py_LibHacl_Hacl_Hash_Blake2b_init +#define Hacl_Hash_Blake2b_malloc _Py_LibHacl_Hacl_Hash_Blake2b_malloc +#define Hacl_Hash_Blake2b_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_key +#define Hacl_Hash_Blake2b_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_params_and_key +#define Hacl_Hash_Blake2b_reset _Py_LibHacl_Hacl_Hash_Blake2b_reset +#define Hacl_Hash_Blake2b_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key +#define Hacl_Hash_Blake2b_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key_and_params #define Hacl_Hash_Blake2b_Simd256_copy _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_copy #define Hacl_Hash_Blake2b_Simd256_copy_internal_state _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_copy_internal_state #define Hacl_Hash_Blake2b_Simd256_digest _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_digest @@ -104,7 +52,6 @@ #define Hacl_Hash_Blake2b_Simd256_malloc _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc #define Hacl_Hash_Blake2b_Simd256_malloc_internal_state_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_internal_state_with_key #define Hacl_Hash_Blake2b_Simd256_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_key -#define Hacl_Hash_Blake2b_Simd256_malloc_with_key0 _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_key0 #define Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key #define Hacl_Hash_Blake2b_Simd256_reset _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_reset #define Hacl_Hash_Blake2b_Simd256_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_reset_with_key @@ -115,23 +62,24 @@ #define Hacl_Hash_Blake2b_Simd256_update_last_no_inline _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_last_no_inline #define Hacl_Hash_Blake2b_Simd256_update_multi _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_multi #define Hacl_Hash_Blake2b_Simd256_update_multi_no_inline _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_multi_no_inline -#define Hacl_Hash_Blake2b_copy _Py_LibHacl_Hacl_Hash_Blake2b_copy -#define Hacl_Hash_Blake2b_digest _Py_LibHacl_Hacl_Hash_Blake2b_digest -#define Hacl_Hash_Blake2b_finish _Py_LibHacl_Hacl_Hash_Blake2b_finish -#define Hacl_Hash_Blake2b_free _Py_LibHacl_Hacl_Hash_Blake2b_free -#define Hacl_Hash_Blake2b_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key -#define Hacl_Hash_Blake2b_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key_and_params -#define Hacl_Hash_Blake2b_info _Py_LibHacl_Hacl_Hash_Blake2b_info -#define Hacl_Hash_Blake2b_init _Py_LibHacl_Hacl_Hash_Blake2b_init -#define Hacl_Hash_Blake2b_malloc _Py_LibHacl_Hacl_Hash_Blake2b_malloc -#define Hacl_Hash_Blake2b_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_key -#define Hacl_Hash_Blake2b_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_params_and_key -#define Hacl_Hash_Blake2b_reset _Py_LibHacl_Hacl_Hash_Blake2b_reset -#define Hacl_Hash_Blake2b_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key -#define Hacl_Hash_Blake2b_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key_and_params #define Hacl_Hash_Blake2b_update _Py_LibHacl_Hacl_Hash_Blake2b_update #define Hacl_Hash_Blake2b_update_last _Py_LibHacl_Hacl_Hash_Blake2b_update_last #define Hacl_Hash_Blake2b_update_multi _Py_LibHacl_Hacl_Hash_Blake2b_update_multi +// --- HASH-BLAKE-2s ---------------------------------------------------------- +#define Hacl_Hash_Blake2s_copy _Py_LibHacl_Hacl_Hash_Blake2s_copy +#define Hacl_Hash_Blake2s_digest _Py_LibHacl_Hacl_Hash_Blake2s_digest +#define Hacl_Hash_Blake2s_finish _Py_LibHacl_Hacl_Hash_Blake2s_finish +#define Hacl_Hash_Blake2s_free _Py_LibHacl_Hacl_Hash_Blake2s_free +#define Hacl_Hash_Blake2s_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key +#define Hacl_Hash_Blake2s_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key_and_params +#define Hacl_Hash_Blake2s_info _Py_LibHacl_Hacl_Hash_Blake2s_info +#define Hacl_Hash_Blake2s_init _Py_LibHacl_Hacl_Hash_Blake2s_init +#define Hacl_Hash_Blake2s_malloc _Py_LibHacl_Hacl_Hash_Blake2s_malloc +#define Hacl_Hash_Blake2s_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_key +#define Hacl_Hash_Blake2s_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_params_and_key +#define Hacl_Hash_Blake2s_reset _Py_LibHacl_Hacl_Hash_Blake2s_reset +#define Hacl_Hash_Blake2s_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key +#define Hacl_Hash_Blake2s_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key_and_params #define Hacl_Hash_Blake2s_Simd128_copy _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_copy #define Hacl_Hash_Blake2s_Simd128_copy_internal_state _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_copy_internal_state #define Hacl_Hash_Blake2s_Simd128_digest _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_digest @@ -145,7 +93,6 @@ #define Hacl_Hash_Blake2s_Simd128_malloc _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc #define Hacl_Hash_Blake2s_Simd128_malloc_internal_state_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_internal_state_with_key #define Hacl_Hash_Blake2s_Simd128_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_key -#define Hacl_Hash_Blake2s_Simd128_malloc_with_key0 _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_key0 #define Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key #define Hacl_Hash_Blake2s_Simd128_reset _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_reset #define Hacl_Hash_Blake2s_Simd128_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_reset_with_key @@ -156,37 +103,54 @@ #define Hacl_Hash_Blake2s_Simd128_update_last_no_inline _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_last_no_inline #define Hacl_Hash_Blake2s_Simd128_update_multi _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_multi #define Hacl_Hash_Blake2s_Simd128_update_multi_no_inline _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_multi_no_inline -#define Hacl_Hash_Blake2s_copy _Py_LibHacl_Hacl_Hash_Blake2s_copy -#define Hacl_Hash_Blake2s_digest _Py_LibHacl_Hacl_Hash_Blake2s_digest -#define Hacl_Hash_Blake2s_finish _Py_LibHacl_Hacl_Hash_Blake2s_finish -#define Hacl_Hash_Blake2s_free _Py_LibHacl_Hacl_Hash_Blake2s_free -#define Hacl_Hash_Blake2s_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key -#define Hacl_Hash_Blake2s_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key_and_params -#define Hacl_Hash_Blake2s_info _Py_LibHacl_Hacl_Hash_Blake2s_info -#define Hacl_Hash_Blake2s_init _Py_LibHacl_Hacl_Hash_Blake2s_init -#define Hacl_Hash_Blake2s_malloc _Py_LibHacl_Hacl_Hash_Blake2s_malloc -#define Hacl_Hash_Blake2s_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_key -#define Hacl_Hash_Blake2s_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_params_and_key -#define Hacl_Hash_Blake2s_reset _Py_LibHacl_Hacl_Hash_Blake2s_reset -#define Hacl_Hash_Blake2s_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key -#define Hacl_Hash_Blake2s_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key_and_params #define Hacl_Hash_Blake2s_update _Py_LibHacl_Hacl_Hash_Blake2s_update #define Hacl_Hash_Blake2s_update_last _Py_LibHacl_Hacl_Hash_Blake2s_update_last #define Hacl_Hash_Blake2s_update_multi _Py_LibHacl_Hacl_Hash_Blake2s_update_multi +// --- HASH-MD5 --------------------------------------------------------------- +#define Hacl_Hash_MD5_copy _Py_LibHacl_Hacl_Hash_MD5_copy +#define Hacl_Hash_MD5_digest _Py_LibHacl_Hacl_Hash_MD5_digest #define Hacl_Hash_MD5_finish _Py_LibHacl_Hacl_Hash_MD5_finish +#define Hacl_Hash_MD5_free _Py_LibHacl_Hacl_Hash_MD5_free +#define Hacl_Hash_MD5_hash _Py_LibHacl_Hacl_Hash_MD5_hash #define Hacl_Hash_MD5_hash_oneshot _Py_LibHacl_Hacl_Hash_MD5_hash_oneshot +#define Hacl_Hash_MD5_init _Py_LibHacl_Hacl_Hash_MD5_init +#define Hacl_Hash_MD5_malloc _Py_LibHacl_Hacl_Hash_MD5_malloc #define Hacl_Hash_MD5_reset _Py_LibHacl_Hacl_Hash_MD5_reset +#define Hacl_Hash_MD5_update _Py_LibHacl_Hacl_Hash_MD5_update #define Hacl_Hash_MD5_update_last _Py_LibHacl_Hacl_Hash_MD5_update_last #define Hacl_Hash_MD5_update_multi _Py_LibHacl_Hacl_Hash_MD5_update_multi +// --- HASH-SHA-1 ------------------------------------------------------------- +#define Hacl_Hash_SHA1_copy _Py_LibHacl_Hacl_Hash_SHA1_copy +#define Hacl_Hash_SHA1_digest _Py_LibHacl_Hacl_Hash_SHA1_digest #define Hacl_Hash_SHA1_finish _Py_LibHacl_Hacl_Hash_SHA1_finish +#define Hacl_Hash_SHA1_free _Py_LibHacl_Hacl_Hash_SHA1_free +#define Hacl_Hash_SHA1_hash _Py_LibHacl_Hacl_Hash_SHA1_hash #define Hacl_Hash_SHA1_hash_oneshot _Py_LibHacl_Hacl_Hash_SHA1_hash_oneshot +#define Hacl_Hash_SHA1_init _Py_LibHacl_Hacl_Hash_SHA1_init +#define Hacl_Hash_SHA1_malloc _Py_LibHacl_Hacl_Hash_SHA1_malloc #define Hacl_Hash_SHA1_reset _Py_LibHacl_Hacl_Hash_SHA1_reset +#define Hacl_Hash_SHA1_update _Py_LibHacl_Hacl_Hash_SHA1_update #define Hacl_Hash_SHA1_update_last _Py_LibHacl_Hacl_Hash_SHA1_update_last #define Hacl_Hash_SHA1_update_multi _Py_LibHacl_Hacl_Hash_SHA1_update_multi +// --- HASH-SHA-2 ------------------------------------------------------------- +#define Hacl_Hash_SHA2_copy_256 _Py_LibHacl_Hacl_Hash_SHA2_copy_256 +#define Hacl_Hash_SHA2_copy_512 _Py_LibHacl_Hacl_Hash_SHA2_copy_512 +#define Hacl_Hash_SHA2_digest_224 _Py_LibHacl_Hacl_Hash_SHA2_digest_224 +#define Hacl_Hash_SHA2_digest_256 _Py_LibHacl_Hacl_Hash_SHA2_digest_256 +#define Hacl_Hash_SHA2_digest_384 _Py_LibHacl_Hacl_Hash_SHA2_digest_384 +#define Hacl_Hash_SHA2_digest_512 _Py_LibHacl_Hacl_Hash_SHA2_digest_512 +#define Hacl_Hash_SHA2_free_224 _Py_LibHacl_Hacl_Hash_SHA2_free_224 +#define Hacl_Hash_SHA2_free_256 _Py_LibHacl_Hacl_Hash_SHA2_free_256 +#define Hacl_Hash_SHA2_free_384 _Py_LibHacl_Hacl_Hash_SHA2_free_384 +#define Hacl_Hash_SHA2_free_512 _Py_LibHacl_Hacl_Hash_SHA2_free_512 #define Hacl_Hash_SHA2_hash_224 _Py_LibHacl_Hacl_Hash_SHA2_hash_224 #define Hacl_Hash_SHA2_hash_256 _Py_LibHacl_Hacl_Hash_SHA2_hash_256 #define Hacl_Hash_SHA2_hash_384 _Py_LibHacl_Hacl_Hash_SHA2_hash_384 #define Hacl_Hash_SHA2_hash_512 _Py_LibHacl_Hacl_Hash_SHA2_hash_512 +#define Hacl_Hash_SHA2_malloc_224 _Py_LibHacl_Hacl_Hash_SHA2_malloc_224 +#define Hacl_Hash_SHA2_malloc_256 _Py_LibHacl_Hacl_Hash_SHA2_malloc_256 +#define Hacl_Hash_SHA2_malloc_384 _Py_LibHacl_Hacl_Hash_SHA2_malloc_384 +#define Hacl_Hash_SHA2_malloc_512 _Py_LibHacl_Hacl_Hash_SHA2_malloc_512 #define Hacl_Hash_SHA2_reset_224 _Py_LibHacl_Hacl_Hash_SHA2_reset_224 #define Hacl_Hash_SHA2_reset_256 _Py_LibHacl_Hacl_Hash_SHA2_reset_256 #define Hacl_Hash_SHA2_reset_384 _Py_LibHacl_Hacl_Hash_SHA2_reset_384 @@ -207,10 +171,25 @@ #define Hacl_Hash_SHA2_sha512_init _Py_LibHacl_Hacl_Hash_SHA2_sha512_init #define Hacl_Hash_SHA2_sha512_update_last _Py_LibHacl_Hacl_Hash_SHA2_sha512_update_last #define Hacl_Hash_SHA2_sha512_update_nblocks _Py_LibHacl_Hacl_Hash_SHA2_sha512_update_nblocks +#define Hacl_Hash_SHA2_update_224 _Py_LibHacl_Hacl_Hash_SHA2_update_224 +#define Hacl_Hash_SHA2_update_256 _Py_LibHacl_Hacl_Hash_SHA2_update_256 +#define Hacl_Hash_SHA2_update_384 _Py_LibHacl_Hacl_Hash_SHA2_update_384 +#define Hacl_Hash_SHA2_update_512 _Py_LibHacl_Hacl_Hash_SHA2_update_512 +// --- HASH-SHA-3 ------------------------------------------------------------- #define Hacl_Hash_SHA3_absorb_inner_32 _Py_LibHacl_Hacl_Hash_SHA3_absorb_inner_32 +#define Hacl_Hash_SHA3_block_len _Py_LibHacl_Hacl_Hash_SHA3_block_len +#define Hacl_Hash_SHA3_copy _Py_LibHacl_Hacl_Hash_SHA3_copy +#define Hacl_Hash_SHA3_digest _Py_LibHacl_Hacl_Hash_SHA3_digest +#define Hacl_Hash_SHA3_free _Py_LibHacl_Hacl_Hash_SHA3_free +#define Hacl_Hash_SHA3_get_alg _Py_LibHacl_Hacl_Hash_SHA3_get_alg +#define Hacl_Hash_SHA3_hash_len _Py_LibHacl_Hacl_Hash_SHA3_hash_len +#define Hacl_Hash_SHA3_init_ _Py_LibHacl_Hacl_Hash_SHA3_init_ +#define Hacl_Hash_SHA3_is_shake _Py_LibHacl_Hacl_Hash_SHA3_is_shake #define Hacl_Hash_SHA3_keccak_piln _Py_LibHacl_Hacl_Hash_SHA3_keccak_piln #define Hacl_Hash_SHA3_keccak_rndc _Py_LibHacl_Hacl_Hash_SHA3_keccak_rndc #define Hacl_Hash_SHA3_keccak_rotc _Py_LibHacl_Hacl_Hash_SHA3_keccak_rotc +#define Hacl_Hash_SHA3_malloc _Py_LibHacl_Hacl_Hash_SHA3_malloc +#define Hacl_Hash_SHA3_reset _Py_LibHacl_Hacl_Hash_SHA3_reset #define Hacl_Hash_SHA3_sha3_224 _Py_LibHacl_Hacl_Hash_SHA3_sha3_224 #define Hacl_Hash_SHA3_sha3_256 _Py_LibHacl_Hacl_Hash_SHA3_sha3_256 #define Hacl_Hash_SHA3_sha3_384 _Py_LibHacl_Hacl_Hash_SHA3_sha3_384 @@ -220,37 +199,39 @@ #define Hacl_Hash_SHA3_shake128_absorb_nblocks _Py_LibHacl_Hacl_Hash_SHA3_shake128_absorb_nblocks #define Hacl_Hash_SHA3_shake128_squeeze_nblocks _Py_LibHacl_Hacl_Hash_SHA3_shake128_squeeze_nblocks #define Hacl_Hash_SHA3_shake256 _Py_LibHacl_Hacl_Hash_SHA3_shake256 +#define Hacl_Hash_SHA3_squeeze _Py_LibHacl_Hacl_Hash_SHA3_squeeze #define Hacl_Hash_SHA3_state_free _Py_LibHacl_Hacl_Hash_SHA3_state_free #define Hacl_Hash_SHA3_state_malloc _Py_LibHacl_Hacl_Hash_SHA3_state_malloc - -// Streaming HMAC +#define Hacl_Hash_SHA3_update _Py_LibHacl_Hacl_Hash_SHA3_update +#define Hacl_Hash_SHA3_update_last_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_last_sha3 +#define Hacl_Hash_SHA3_update_multi_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_multi_sha3 +// --- STREAMING-MAC ---------------------------------------------------------- +#define Hacl_Streaming_HMAC_copy _Py_LibHacl_Hacl_Streaming_HMAC_copy +#define Hacl_Streaming_HMAC_digest _Py_LibHacl_Hacl_Streaming_HMAC_digest +#define Hacl_Streaming_HMAC_free _Py_LibHacl_Hacl_Streaming_HMAC_free +#define Hacl_Streaming_HMAC_get_impl _Py_LibHacl_Hacl_Streaming_HMAC_get_impl #define Hacl_Streaming_HMAC_index_of_state _Py_LibHacl_Hacl_Streaming_HMAC_index_of_state #define Hacl_Streaming_HMAC_malloc_ _Py_LibHacl_Hacl_Streaming_HMAC_malloc_ -#define Hacl_Streaming_HMAC_get_impl _Py_LibHacl_Hacl_Streaming_HMAC_get_impl #define Hacl_Streaming_HMAC_reset _Py_LibHacl_Hacl_Streaming_HMAC_reset -#define Hacl_Streaming_HMAC_update _Py_LibHacl_Hacl_Streaming_HMAC_update -#define Hacl_Streaming_HMAC_digest _Py_LibHacl_Hacl_Streaming_HMAC_digest -#define Hacl_Streaming_HMAC_copy _Py_LibHacl_Hacl_Streaming_HMAC_copy -#define Hacl_Streaming_HMAC_free _Py_LibHacl_Hacl_Streaming_HMAC_free #define Hacl_Streaming_HMAC_s1 _Py_LibHacl_Hacl_Streaming_HMAC_s1 #define Hacl_Streaming_HMAC_s2 _Py_LibHacl_Hacl_Streaming_HMAC_s2 - -// HMAC-MD5 +#define Hacl_Streaming_HMAC_update _Py_LibHacl_Hacl_Streaming_HMAC_update +// --- HMAC-MD5 --------------------------------------------------------------- #define Hacl_HMAC_compute_md5 _Py_LibHacl_Hacl_HMAC_compute_md5 -// HMAC-SHA-1 +// --- HMAC-SHA-1 ------------------------------------------------------------- #define Hacl_HMAC_compute_sha1 _Py_LibHacl_Hacl_HMAC_compute_sha1 -// HMAC-SHA-2 +// --- HMAC-SHA-2 ------------------------------------------------------------- #define Hacl_HMAC_compute_sha2_224 _Py_LibHacl_Hacl_HMAC_compute_sha2_224 #define Hacl_HMAC_compute_sha2_256 _Py_LibHacl_Hacl_HMAC_compute_sha2_256 #define Hacl_HMAC_compute_sha2_384 _Py_LibHacl_Hacl_HMAC_compute_sha2_384 #define Hacl_HMAC_compute_sha2_512 _Py_LibHacl_Hacl_HMAC_compute_sha2_512 -// HMAC-SHA-3 +// --- HMAC-SHA-3 ------------------------------------------------------------- #define Hacl_HMAC_compute_sha3_224 _Py_LibHacl_Hacl_HMAC_compute_sha3_224 #define Hacl_HMAC_compute_sha3_256 _Py_LibHacl_Hacl_HMAC_compute_sha3_256 #define Hacl_HMAC_compute_sha3_384 _Py_LibHacl_Hacl_HMAC_compute_sha3_384 #define Hacl_HMAC_compute_sha3_512 _Py_LibHacl_Hacl_HMAC_compute_sha3_512 -// HMAC-BLAKE -#define Hacl_HMAC_compute_blake2s_32 _Py_LibHacl_Hacl_HMAC_compute_blake2s_32 +// --- HMAC-BLAKE-2 ----------------------------------------------------------- #define Hacl_HMAC_compute_blake2b_32 _Py_LibHacl_Hacl_HMAC_compute_blake2b_32 +#define Hacl_HMAC_compute_blake2s_32 _Py_LibHacl_Hacl_HMAC_compute_blake2s_32 #endif // _PYTHON_HACL_NAMESPACES_H From eec7a8ff22dcf409717a21a9aeab28b55526ee24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 20 Jun 2025 11:50:09 +0200 Subject: [PATCH 632/989] gh-135532: use `_Py_strhex` in HACL-MD5's `hexdigest` (#135742) --- Modules/md5module.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index 08dbcd2cbce844..6ec0ee87a2f761 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -21,6 +21,8 @@ #endif #include "Python.h" +#include "pycore_strhex.h" // _Py_strhex() + #include "hashlib.h" /*[clinic input] @@ -136,7 +138,7 @@ static PyObject * MD5Type_digest_impl(MD5object *self) /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/ { - unsigned char digest[MD5_DIGESTSIZE]; + uint8_t digest[MD5_DIGESTSIZE]; ENTER_HASHLIB(self); Hacl_Hash_MD5_digest(self->hash_state, digest); LEAVE_HASHLIB(self); @@ -153,20 +155,11 @@ static PyObject * MD5Type_hexdigest_impl(MD5object *self) /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/ { - unsigned char digest[MD5_DIGESTSIZE]; + uint8_t digest[MD5_DIGESTSIZE]; ENTER_HASHLIB(self); Hacl_Hash_MD5_digest(self->hash_state, digest); LEAVE_HASHLIB(self); - - const char *hexdigits = "0123456789abcdef"; - char digest_hex[MD5_DIGESTSIZE * 2]; - char *str = digest_hex; - for (size_t i=0; i < MD5_DIGESTSIZE; i++) { - unsigned char byte = digest[i]; - *str++ = hexdigits[byte >> 4]; - *str++ = hexdigits[byte & 0x0f]; - } - return PyUnicode_FromStringAndSize(digest_hex, sizeof(digest_hex)); + return _Py_strhex((const char *)digest, MD5_DIGESTSIZE); } static void From b881e3db1eb501980c43161d7361d8892c94ac81 Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Fri, 20 Jun 2025 19:03:41 +0800 Subject: [PATCH 633/989] gh-89083: add links to RFC 9562 sections in UUID docs (#135684) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We also sync the docs for UUIDv1 and UUIDv6 concerning the node address and clock sequence. --------- Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/uuid.rst | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 92d58024e84d4e..6698e6d3f43c43 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -193,43 +193,52 @@ The :mod:`uuid` module defines the following functions: .. function:: uuid1(node=None, clock_seq=None) - Generate a UUID from a host ID, sequence number, and the current time. If *node* - is not given, :func:`getnode` is used to obtain the hardware address. If - *clock_seq* is given, it is used as the sequence number; otherwise a random - 14-bit sequence number is chosen. + Generate a UUID from a host ID, sequence number, and the current time + according to :rfc:`RFC 9562, §5.1 <9562#section-5.1>`. + + When *node* is not specified, :func:`getnode` is used to obtain the hardware + address as a 48-bit positive integer. When a sequence number *clock_seq* is + not specified, a pseudo-random 14-bit positive integer is generated. + + If *node* or *clock_seq* exceed their expected bit count, + only their least significant bits are kept. .. function:: uuid3(namespace, name) Generate a UUID based on the MD5 hash of a namespace identifier (which is a UUID) and a name (which is a :class:`bytes` object or a string - that will be encoded using UTF-8). + that will be encoded using UTF-8) + according to :rfc:`RFC 9562, §5.3 <9562#section-5.3>`. .. function:: uuid4() - Generate a random UUID. + Generate a random UUID in a cryptographically-secure method + according to :rfc:`RFC 9562, §5.4 <9562#section-5.4>`. .. function:: uuid5(namespace, name) Generate a UUID based on the SHA-1 hash of a namespace identifier (which is a UUID) and a name (which is a :class:`bytes` object or a string - that will be encoded using UTF-8). + that will be encoded using UTF-8) + according to :rfc:`RFC 9562, §5.5 <9562#section-5.5>`. .. function:: uuid6(node=None, clock_seq=None) Generate a UUID from a sequence number and the current time according to - :rfc:`9562`. + :rfc:`RFC 9562, §5.6 <9562#section-5.6>`. + This is an alternative to :func:`uuid1` to improve database locality. When *node* is not specified, :func:`getnode` is used to obtain the hardware address as a 48-bit positive integer. When a sequence number *clock_seq* is not specified, a pseudo-random 14-bit positive integer is generated. - If *node* or *clock_seq* exceed their expected bit count, only their least - significant bits are kept. + If *node* or *clock_seq* exceed their expected bit count, + only their least significant bits are kept. .. versionadded:: 3.14 From c825b5d989d2e796b48e10230447c616e19c3d67 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Fri, 20 Jun 2025 17:32:37 +0530 Subject: [PATCH 634/989] gh-135748: use argument clinic for more socket methods (#135749) --- Modules/clinic/socketmodule.c.h | 172 +++++++++++++++++++++++++++++++- Modules/socketmodule.c | 148 +++++++++++++-------------- 2 files changed, 242 insertions(+), 78 deletions(-) diff --git a/Modules/clinic/socketmodule.c.h b/Modules/clinic/socketmodule.c.h index 573903be87e6ea..0cedab597db714 100644 --- a/Modules/clinic/socketmodule.c.h +++ b/Modules/clinic/socketmodule.c.h @@ -7,7 +7,7 @@ preserve # include "pycore_runtime.h" // _Py_ID() #endif #include "pycore_long.h" // _PyLong_UInt16_Converter() -#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() +#include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_socket_socket_close__doc__, "close($self, /)\n" @@ -29,6 +29,170 @@ _socket_socket_close(PyObject *s, PyObject *Py_UNUSED(ignored)) return _socket_socket_close_impl((PySocketSockObject *)s); } +PyDoc_STRVAR(_socket_socket_send__doc__, +"send($self, data, flags=0, /)\n" +"--\n" +"\n" +"Send a data string to the socket.\n" +"\n" +"For the optional flags argument, see the Unix manual.\n" +"Return the number of bytes sent; this may be less than len(data) if the network is busy."); + +#define _SOCKET_SOCKET_SEND_METHODDEF \ + {"send", _PyCFunction_CAST(_socket_socket_send), METH_FASTCALL, _socket_socket_send__doc__}, + +static PyObject * +_socket_socket_send_impl(PySocketSockObject *s, Py_buffer *pbuf, int flags); + +static PyObject * +_socket_socket_send(PyObject *s, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_buffer pbuf = {NULL, NULL}; + int flags = 0; + + if (!_PyArg_CheckPositional("send", nargs, 1, 2)) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &pbuf, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (nargs < 2) { + goto skip_optional; + } + flags = PyLong_AsInt(args[1]); + if (flags == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional: + return_value = _socket_socket_send_impl((PySocketSockObject *)s, &pbuf, flags); + +exit: + /* Cleanup for pbuf */ + if (pbuf.obj) { + PyBuffer_Release(&pbuf); + } + + return return_value; +} + +PyDoc_STRVAR(_socket_socket_sendall__doc__, +"sendall($self, data, flags=0, /)\n" +"--\n" +"\n" +"Send a data string to the socket.\n" +"\n" +"For the optional flags argument, see the Unix manual.\n" +"This calls send() repeatedly until all data is sent.\n" +"If an error occurs, it\'s impossible to tell how much data has been sent."); + +#define _SOCKET_SOCKET_SENDALL_METHODDEF \ + {"sendall", _PyCFunction_CAST(_socket_socket_sendall), METH_FASTCALL, _socket_socket_sendall__doc__}, + +static PyObject * +_socket_socket_sendall_impl(PySocketSockObject *s, Py_buffer *pbuf, + int flags); + +static PyObject * +_socket_socket_sendall(PyObject *s, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_buffer pbuf = {NULL, NULL}; + int flags = 0; + + if (!_PyArg_CheckPositional("sendall", nargs, 1, 2)) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &pbuf, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (nargs < 2) { + goto skip_optional; + } + flags = PyLong_AsInt(args[1]); + if (flags == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional: + return_value = _socket_socket_sendall_impl((PySocketSockObject *)s, &pbuf, flags); + +exit: + /* Cleanup for pbuf */ + if (pbuf.obj) { + PyBuffer_Release(&pbuf); + } + + return return_value; +} + +#if defined(CMSG_LEN) + +PyDoc_STRVAR(_socket_socket_sendmsg__doc__, +"sendmsg($self, buffers, ancdata=, flags=0,\n" +" address=, /)\n" +"--\n" +"\n" +"Send normal and ancillary data to the socket.\n" +"\n" +"It gathering the non-ancillary data from a series of buffers\n" +"and concatenating it into a single message.\n" +"The buffers argument specifies the non-ancillary\n" +"data as an iterable of bytes-like objects (e.g. bytes objects).\n" +"The ancdata argument specifies the ancillary data (control messages)\n" +"as an iterable of zero or more tuples (cmsg_level, cmsg_type,\n" +"cmsg_data), where cmsg_level and cmsg_type are integers specifying the\n" +"protocol level and protocol-specific type respectively, and cmsg_data\n" +"is a bytes-like object holding the associated data. The flags\n" +"argument defaults to 0 and has the same meaning as for send(). If\n" +"address is supplied and not None, it sets a destination address for\n" +"the message. The return value is the number of bytes of non-ancillary\n" +"data sent."); + +#define _SOCKET_SOCKET_SENDMSG_METHODDEF \ + {"sendmsg", _PyCFunction_CAST(_socket_socket_sendmsg), METH_FASTCALL, _socket_socket_sendmsg__doc__}, + +static PyObject * +_socket_socket_sendmsg_impl(PySocketSockObject *s, PyObject *data_arg, + PyObject *cmsg_arg, int flags, + PyObject *addr_arg); + +static PyObject * +_socket_socket_sendmsg(PyObject *s, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *data_arg; + PyObject *cmsg_arg = NULL; + int flags = 0; + PyObject *addr_arg = NULL; + + if (!_PyArg_CheckPositional("sendmsg", nargs, 1, 4)) { + goto exit; + } + data_arg = args[0]; + if (nargs < 2) { + goto skip_optional; + } + cmsg_arg = args[1]; + if (nargs < 3) { + goto skip_optional; + } + flags = PyLong_AsInt(args[2]); + if (flags == -1 && PyErr_Occurred()) { + goto exit; + } + if (nargs < 4) { + goto skip_optional; + } + addr_arg = args[3]; +skip_optional: + return_value = _socket_socket_sendmsg_impl((PySocketSockObject *)s, data_arg, cmsg_arg, flags, addr_arg); + +exit: + return return_value; +} + +#endif /* defined(CMSG_LEN) */ + static int sock_initobj_impl(PySocketSockObject *self, int family, int type, int proto, PyObject *fdobj); @@ -359,6 +523,10 @@ _socket_if_indextoname(PyObject *module, PyObject *arg) #endif /* (defined(HAVE_IF_NAMEINDEX) || defined(MS_WINDOWS)) */ +#ifndef _SOCKET_SOCKET_SENDMSG_METHODDEF + #define _SOCKET_SOCKET_SENDMSG_METHODDEF +#endif /* !defined(_SOCKET_SOCKET_SENDMSG_METHODDEF) */ + #ifndef _SOCKET_INET_NTOA_METHODDEF #define _SOCKET_INET_NTOA_METHODDEF #endif /* !defined(_SOCKET_INET_NTOA_METHODDEF) */ @@ -370,4 +538,4 @@ _socket_if_indextoname(PyObject *module, PyObject *arg) #ifndef _SOCKET_IF_INDEXTONAME_METHODDEF #define _SOCKET_IF_INDEXTONAME_METHODDEF #endif /* !defined(_SOCKET_IF_INDEXTONAME_METHODDEF) */ -/*[clinic end generated code: output=07776dd21d1e3b56 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0376c46b76ae2bce input=a9049054013a1b77]*/ diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 85c72779bac607..f3ad01854de93b 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -4592,55 +4592,62 @@ sock_send_impl(PySocketSockObject *s, void *data) return (ctx->result >= 0); } -/* s.send(data [,flags]) method */ +/*[clinic input] +_socket.socket.send + self as s: self(type="PySocketSockObject *") + data as pbuf: Py_buffer + flags: int = 0 + / + +Send a data string to the socket. + +For the optional flags argument, see the Unix manual. +Return the number of bytes sent; this may be less than len(data) if the network is busy. +[clinic start generated code]*/ static PyObject * -sock_send(PyObject *self, PyObject *args) -{ - PySocketSockObject *s = _PySocketSockObject_CAST(self); +_socket_socket_send_impl(PySocketSockObject *s, Py_buffer *pbuf, int flags) +/*[clinic end generated code: output=3ddf83f17d0c875b input=befe7d7790ccb035]*/ - int flags = 0; - Py_buffer pbuf; +{ struct sock_send ctx; - if (!PyArg_ParseTuple(args, "y*|i:send", &pbuf, &flags)) - return NULL; - if (!IS_SELECTABLE(s)) { - PyBuffer_Release(&pbuf); return select_error(); } - ctx.buf = pbuf.buf; - ctx.len = pbuf.len; + ctx.buf = pbuf->buf; + ctx.len = pbuf->len; ctx.flags = flags; if (sock_call(s, 1, sock_send_impl, &ctx) < 0) { - PyBuffer_Release(&pbuf); return NULL; } - PyBuffer_Release(&pbuf); return PyLong_FromSsize_t(ctx.result); } -PyDoc_STRVAR(send_doc, -"send(data[, flags]) -> count\n\ -\n\ -Send a data string to the socket. For the optional flags\n\ -argument, see the Unix manual. Return the number of bytes\n\ -sent; this may be less than len(data) if the network is busy."); +/*[clinic input] +_socket.socket.sendall + self as s: self(type="PySocketSockObject *") + data as pbuf: Py_buffer + flags: int = 0 + / -/* s.sendall(data [,flags]) method */ +Send a data string to the socket. + +For the optional flags argument, see the Unix manual. +This calls send() repeatedly until all data is sent. +If an error occurs, it's impossible to tell how much data has been sent. +[clinic start generated code]*/ static PyObject * -sock_sendall(PyObject *self, PyObject *args) -{ - PySocketSockObject *s = _PySocketSockObject_CAST(self); +_socket_socket_sendall_impl(PySocketSockObject *s, Py_buffer *pbuf, + int flags) +/*[clinic end generated code: output=ec92861424d3faa8 input=732b15b9ca64dce6]*/ +{ char *buf; Py_ssize_t len, n; - int flags = 0; - Py_buffer pbuf; struct sock_send ctx; int has_timeout = (s->sock_timeout > 0); PyTime_t timeout = s->sock_timeout; @@ -4648,13 +4655,10 @@ sock_sendall(PyObject *self, PyObject *args) int deadline_initialized = 0; PyObject *res = NULL; - if (!PyArg_ParseTuple(args, "y*|i:sendall", &pbuf, &flags)) - return NULL; - buf = pbuf.buf; - len = pbuf.len; + buf = pbuf->buf; + len = pbuf->len; if (!IS_SELECTABLE(s)) { - PyBuffer_Release(&pbuf); return select_error(); } @@ -4692,23 +4696,13 @@ sock_sendall(PyObject *self, PyObject *args) if (PyErr_CheckSignals()) goto done; } while (len > 0); - PyBuffer_Release(&pbuf); res = Py_NewRef(Py_None); done: - PyBuffer_Release(&pbuf); return res; } -PyDoc_STRVAR(sendall_doc, -"sendall(data[, flags])\n\ -\n\ -Send a data string to the socket. For the optional flags\n\ -argument, see the Unix manual. This calls send() repeatedly\n\ -until all data is sent. If an error occurs, it's impossible\n\ -to tell how much data has been sent."); - #ifdef HAVE_SENDTO struct sock_sendto { @@ -4858,10 +4852,8 @@ sock_sendmsg_iovec(PySocketSockObject *s, PyObject *data_arg, } } for (; ndatabufs < ndataparts; ndatabufs++) { - if (!PyArg_Parse(PySequence_Fast_GET_ITEM(data_fast, ndatabufs), - "y*;sendmsg() argument 1 must be an iterable of " - "bytes-like objects", - &databufs[ndatabufs])) + if (PyObject_GetBuffer(PySequence_Fast_GET_ITEM(data_fast, ndatabufs), + &databufs[ndatabufs], PyBUF_SIMPLE) < 0) goto finally; iovs[ndatabufs].iov_base = databufs[ndatabufs].buf; iovs[ndatabufs].iov_len = databufs[ndatabufs].len; @@ -4883,13 +4875,39 @@ sock_sendmsg_impl(PySocketSockObject *s, void *data) return (ctx->result >= 0); } -/* s.sendmsg(buffers[, ancdata[, flags[, address]]]) method */ +/*[clinic input] +_socket.socket.sendmsg + self as s: self(type="PySocketSockObject *") + buffers as data_arg: object + ancdata as cmsg_arg: object = NULL + flags: int = 0 + address as addr_arg: object = NULL + / + +Send normal and ancillary data to the socket. + +It gathering the non-ancillary data from a series of buffers +and concatenating it into a single message. +The buffers argument specifies the non-ancillary +data as an iterable of bytes-like objects (e.g. bytes objects). +The ancdata argument specifies the ancillary data (control messages) +as an iterable of zero or more tuples (cmsg_level, cmsg_type, +cmsg_data), where cmsg_level and cmsg_type are integers specifying the +protocol level and protocol-specific type respectively, and cmsg_data +is a bytes-like object holding the associated data. The flags +argument defaults to 0 and has the same meaning as for send(). If +address is supplied and not None, it sets a destination address for +the message. The return value is the number of bytes of non-ancillary +data sent. +[clinic start generated code]*/ static PyObject * -sock_sendmsg(PyObject *self, PyObject *args) -{ - PySocketSockObject *s = _PySocketSockObject_CAST(self); +_socket_socket_sendmsg_impl(PySocketSockObject *s, PyObject *data_arg, + PyObject *cmsg_arg, int flags, + PyObject *addr_arg) +/*[clinic end generated code: output=3b4cb1110644ce39 input=479c13d90bd2f88b]*/ +{ Py_ssize_t i, ndatabufs = 0, ncmsgs, ncmsgbufs = 0; Py_buffer *databufs = NULL; sock_addr_t addrbuf; @@ -4901,16 +4919,10 @@ sock_sendmsg(PyObject *self, PyObject *args) } *cmsgs = NULL; void *controlbuf = NULL; size_t controllen, controllen_last; - int addrlen, flags = 0; - PyObject *data_arg, *cmsg_arg = NULL, *addr_arg = NULL, - *cmsg_fast = NULL, *retval = NULL; + int addrlen; + PyObject *cmsg_fast = NULL, *retval = NULL; struct sock_sendmsg ctx; - if (!PyArg_ParseTuple(args, "O|OiO:sendmsg", - &data_arg, &cmsg_arg, &flags, &addr_arg)) { - return NULL; - } - memset(&msg, 0, sizeof(msg)); /* Parse destination address. */ @@ -5072,22 +5084,6 @@ sock_sendmsg(PyObject *self, PyObject *args) return retval; } -PyDoc_STRVAR(sendmsg_doc, -"sendmsg(buffers[, ancdata[, flags[, address]]]) -> count\n\ -\n\ -Send normal and ancillary data to the socket, gathering the\n\ -non-ancillary data from a series of buffers and concatenating it into\n\ -a single message. The buffers argument specifies the non-ancillary\n\ -data as an iterable of bytes-like objects (e.g. bytes objects).\n\ -The ancdata argument specifies the ancillary data (control messages)\n\ -as an iterable of zero or more tuples (cmsg_level, cmsg_type,\n\ -cmsg_data), where cmsg_level and cmsg_type are integers specifying the\n\ -protocol level and protocol-specific type respectively, and cmsg_data\n\ -is a bytes-like object holding the associated data. The flags\n\ -argument defaults to 0 and has the same meaning as for send(). If\n\ -address is supplied and not None, it sets a destination address for\n\ -the message. The return value is the number of bytes of non-ancillary\n\ -data sent."); #endif /* CMSG_LEN */ #ifdef HAVE_SOCKADDR_ALG @@ -5424,8 +5420,8 @@ static PyMethodDef sock_methods[] = { recvfrom_into_doc }, #endif - {"send", sock_send, METH_VARARGS, send_doc}, - {"sendall", sock_sendall, METH_VARARGS, sendall_doc}, + _SOCKET_SOCKET_SEND_METHODDEF + _SOCKET_SOCKET_SENDALL_METHODDEF #ifdef HAVE_SENDTO {"sendto", sock_sendto, METH_VARARGS, sendto_doc}, #endif @@ -5445,7 +5441,7 @@ static PyMethodDef sock_methods[] = { #ifdef CMSG_LEN {"recvmsg", sock_recvmsg, METH_VARARGS, recvmsg_doc}, {"recvmsg_into", sock_recvmsg_into, METH_VARARGS, recvmsg_into_doc}, - {"sendmsg", sock_sendmsg, METH_VARARGS, sendmsg_doc}, + _SOCKET_SOCKET_SENDMSG_METHODDEF #endif #ifdef HAVE_SOCKADDR_ALG { From 59963e866a1bb8128a50cd53d1b13eeab03df06e Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Fri, 20 Jun 2025 22:57:04 +0900 Subject: [PATCH 635/989] Docs: Document `PyExceptionClass` functions in the C API (GH-135697) * Docs: Document `PyExceptionClass_Name` `PyExceptionClass_Name` is an undocumented function in the limited API. * Document `PyExceptionClass_Check` --- Doc/c-api/exceptions.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index 885dbeb75303d1..a750cda3e2d474 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -749,6 +749,16 @@ Exception Classes .. versionadded:: 3.2 +.. c:function:: int PyExceptionClass_Check(PyObject *ob) + + Return non-zero if *ob* is an exception class, zero otherwise. This function always succeeds. + + +.. c:function:: const char *PyExceptionClass_Name(PyObject *ob) + + Return :c:member:`~PyTypeObject.tp_name` of the exception class *ob*. + + Exception Objects ================= From 61532b4bc783c1570c0fd7fb2ff5f8157cf64e82 Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Fri, 20 Jun 2025 18:21:39 +0200 Subject: [PATCH 636/989] gh-131798: JIT: Optimize _CALL_LEN when the length is known (#135260) * Add news entry * Optimize _CALL_LEN * Simplify tests --- Lib/test/test_capi/test_opt.py | 43 +++++++++++++++++++ ...-06-08-14-24-29.gh-issue-131798.qfw91T.rst | 1 + Python/optimizer_bytecodes.c | 15 ++++++- Python/optimizer_cases.c.h | 19 ++++++++ 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2b6934d747ebe0..84e864b44b9544 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1968,6 +1968,49 @@ def testfunc(n): self.assertNotIn("_GUARD_NOS_INT", uops) self.assertNotIn("_GUARD_TOS_INT", uops) + def test_call_len_known_length_small_int(self): + def testfunc(n): + x = 0 + for _ in range(n): + t = (1, 2, 3, 4, 5) + if len(t) == 5: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # When the length is < _PY_NSMALLPOSINTS, the len() call is replaced + # with just an inline load. + self.assertNotIn("_CALL_LEN", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + + def test_call_len_known_length(self): + def testfunc(n): + class C: + t = tuple(range(300)) + + x = 0 + for _ in range(n): + if len(C.t) == 300: # comparison + guard removed + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # When the length is >= _PY_NSMALLPOSINTS, we cannot replace + # the len() call with an inline load, but knowing the exact + # length allows us to optimize more code, such as conditionals + # in this case + self.assertIn("_CALL_LEN", uops) + self.assertNotIn("_COMPARE_OP_INT", uops) + self.assertNotIn("_GUARD_IS_TRUE_POP", uops) + def test_get_len_with_const_tuple(self): def testfunc(n): x = 0.0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst new file mode 100644 index 00000000000000..7965169d46e820 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst @@ -0,0 +1 @@ +Optimize _CALL_LEN in the JIT when the length is known. Patch by Tomas Roun diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 2e240830013a46..3f2e2e0351e052 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1203,8 +1203,21 @@ dummy_func(void) { sym_set_const(callable, (PyObject *)&PyUnicode_Type); } - op(_CALL_LEN, (unused, unused, unused -- res)) { + op(_CALL_LEN, (callable, null, arg -- res)) { res = sym_new_type(ctx, &PyLong_Type); + int tuple_length = sym_tuple_length(arg); + if (tuple_length >= 0) { + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + 0, (uintptr_t)temp); + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + } } op(_GET_LEN, (obj -- obj, len)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 3cabf619fe81e7..91927180b3509d 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2241,8 +2241,27 @@ } case _CALL_LEN: { + JitOptRef arg; JitOptRef res; + arg = stack_pointer[-1]; res = sym_new_type(ctx, &PyLong_Type); + int tuple_length = sym_tuple_length(arg); + if (tuple_length >= 0) { + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + 0, (uintptr_t)temp); + } + res = sym_new_const(ctx, temp); + stack_pointer[-3] = res; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + Py_DECREF(temp); + stack_pointer += 2; + } stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); From 3fb6cfe7a95081e6775ad2dca845713a3ea4c799 Mon Sep 17 00:00:00 2001 From: Pastukhov Nikita Date: Fri, 20 Jun 2025 20:06:01 +0300 Subject: [PATCH 637/989] gh-135721: skip `test_trashcan_python_class` on wasm buildbots with stack overflow (#135766) --- Lib/test/test_capi/test_misc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index f74694a7a745a4..ef950f5df04ad3 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -413,11 +413,13 @@ def test_trashcan_subclass(self): @support.requires_resource('cpu') @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_trashcan_python_class1(self): self.do_test_trashcan_python_class(list) @support.requires_resource('cpu') @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_trashcan_python_class2(self): from _testcapi import MyList self.do_test_trashcan_python_class(MyList) From a8ec511900d0d84cffbb4ee6419c9a790d131129 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Fri, 20 Jun 2025 22:43:23 +0530 Subject: [PATCH 638/989] gh-135380: enhance critical section held assertions (#135381) --- Include/internal/pycore_critical_section.h | 24 +++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 42f06b935bd0a0..62460c5f8fad30 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -64,7 +64,7 @@ extern "C" { # define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) \ if (Py_REFCNT(op) != 1) { \ - _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(&_PyObject_CAST(op)->ob_mutex); \ + _PyCriticalSection_AssertHeldObj(_PyObject_CAST(op)); \ } #else /* Py_DEBUG */ @@ -239,6 +239,28 @@ _PyCriticalSection_AssertHeld(PyMutex *mutex) #endif } +static inline void +_PyCriticalSection_AssertHeldObj(PyObject *op) +{ +#ifdef Py_DEBUG + PyMutex *mutex = &_PyObject_CAST(op)->ob_mutex; + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t prev = tstate->critical_section; + if (prev & _Py_CRITICAL_SECTION_TWO_MUTEXES) { + PyCriticalSection2 *cs = (PyCriticalSection2 *)(prev & ~_Py_CRITICAL_SECTION_MASK); + _PyObject_ASSERT_WITH_MSG(op, + (cs != NULL && (cs->_cs_base._cs_mutex == mutex || cs->_cs_mutex2 == mutex)), + "Critical section of object is not held"); + } + else { + PyCriticalSection *cs = (PyCriticalSection *)(prev & ~_Py_CRITICAL_SECTION_MASK); + _PyObject_ASSERT_WITH_MSG(op, + (cs != NULL && cs->_cs_mutex == mutex), + "Critical section of object is not held"); + } + +#endif +} #endif /* Py_GIL_DISABLED */ #ifdef __cplusplus From c5ea8e8e8fc725f39ed23ff6259b3cc157a0785f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 20 Jun 2025 14:26:32 -0600 Subject: [PATCH 639/989] gh-135698: Fix Cross-interpreter Queue.full() With Negative/Default max_size (gh-135724) We weren't handling non-positive maxsize values (including the default) properly in Queue.full(). This change fixes that and adjusts an associated assert. --- Lib/test/test_interpreters/test_queues.py | 68 +++++++++++++++++++---- Modules/_interpqueuesmodule.c | 7 ++- 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_interpreters/test_queues.py b/Lib/test/test_interpreters/test_queues.py index 3e982d76e86314..cb17340f581b0a 100644 --- a/Lib/test/test_interpreters/test_queues.py +++ b/Lib/test/test_interpreters/test_queues.py @@ -208,18 +208,64 @@ def test_empty(self): self.assertIs(after, True) def test_full(self): - expected = [False, False, False, True, False, False, False] - actual = [] - queue = queues.create(3) - for _ in range(3): - actual.append(queue.full()) - queue.put(None) - actual.append(queue.full()) - for _ in range(3): - queue.get() - actual.append(queue.full()) + for maxsize in [1, 3, 11]: + with self.subTest(f'maxsize={maxsize}'): + num_to_add = maxsize + expected = [False] * (num_to_add * 2 + 3) + expected[maxsize] = True + expected[maxsize + 1] = True + + queue = queues.create(maxsize) + actual = [] + empty = [queue.empty()] + + for _ in range(num_to_add): + actual.append(queue.full()) + queue.put_nowait(None) + actual.append(queue.full()) + with self.assertRaises(queues.QueueFull): + queue.put_nowait(None) + empty.append(queue.empty()) + + for _ in range(num_to_add): + actual.append(queue.full()) + queue.get_nowait() + actual.append(queue.full()) + with self.assertRaises(queues.QueueEmpty): + queue.get_nowait() + actual.append(queue.full()) + empty.append(queue.empty()) - self.assertEqual(actual, expected) + self.assertEqual(actual, expected) + self.assertEqual(empty, [True, False, True]) + + # no max size + for args in [(), (0,), (-1,), (-10,)]: + with self.subTest(f'maxsize={args[0]}' if args else ''): + num_to_add = 13 + expected = [False] * (num_to_add * 2 + 3) + + queue = queues.create(*args) + actual = [] + empty = [queue.empty()] + + for _ in range(num_to_add): + actual.append(queue.full()) + queue.put_nowait(None) + actual.append(queue.full()) + empty.append(queue.empty()) + + for _ in range(num_to_add): + actual.append(queue.full()) + queue.get_nowait() + actual.append(queue.full()) + with self.assertRaises(queues.QueueEmpty): + queue.get_nowait() + actual.append(queue.full()) + empty.append(queue.empty()) + + self.assertEqual(actual, expected) + self.assertEqual(empty, [True, False, True]) def test_qsize(self): expected = [0, 1, 2, 3, 2, 3, 2, 1, 0, 1, 0] diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index e22709d5119b7c..de06b8b41fe585 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -707,8 +707,11 @@ _queue_is_full(_queue *queue, int *p_is_full) return err; } - assert(queue->items.count <= queue->items.maxsize); - *p_is_full = queue->items.count == queue->items.maxsize; + assert(queue->items.maxsize <= 0 + || queue->items.count <= queue->items.maxsize); + *p_is_full = queue->items.maxsize > 0 + ? queue->items.count == queue->items.maxsize + : 0; _queue_unlock(queue); return 0; From 4ddf505d9982dc8afead8f52f5754eea5ebde623 Mon Sep 17 00:00:00 2001 From: Marcell Perger <102254594+MarcellPerger1@users.noreply.github.com> Date: Fri, 20 Jun 2025 23:45:36 +0100 Subject: [PATCH 640/989] gh-135756: Fix nonexistent parameter in tkinter docs (#135770) Remove nonexistent color parameter from tkinter.commondialog.Dialog.show() method documentation. --- Doc/library/dialog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/dialog.rst b/Doc/library/dialog.rst index 191e0da12103fa..e0693e8eb6ed22 100644 --- a/Doc/library/dialog.rst +++ b/Doc/library/dialog.rst @@ -220,7 +220,7 @@ is the base class for dialogs defined in other supporting modules. .. class:: Dialog(master=None, **options) - .. method:: show(color=None, **options) + .. method:: show(**options) Render the Dialog window. From f4911258a80409cb641f13578137475204ab43b5 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Fri, 20 Jun 2025 23:06:59 -0700 Subject: [PATCH 641/989] gh-127146: Skip test_os.test_mode for Emscripten (#135764) Temporarily skip test_os.test_mode on Emscripten; this fails consistently on the buildbot, but not on other test configurations. Reported as #135783 for follow up. --- Lib/test/test_os.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 8f56b4559f46c6..5217037ae9d812 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1918,6 +1918,10 @@ def test_makedir(self): support.is_wasi, "WASI's umask is a stub." ) + @unittest.skipIf( + support.is_emscripten, + "TODO: Fails in buildbot; see #135783" + ) def test_mode(self): with os_helper.temp_umask(0o002): base = os_helper.TESTFN From 8ca1e4d846e868a20834cf442c48a3648b558bbe Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sat, 21 Jun 2025 10:56:14 +0300 Subject: [PATCH 642/989] gh-135645: Added `supports_isolated_interpreters` to `sys.implementation` (#135667) Co-authored-by: Eric Snow --- Doc/library/sys.rst | 12 ++++++++++++ Lib/test/test_sys.py | 10 ++++++++++ .../2025-06-18-13-58-13.gh-issue-135645.109nff.rst | 2 ++ Python/sysmodule.c | 12 ++++++++++++ 4 files changed, 36 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-06-18-13-58-13.gh-issue-135645.109nff.rst diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index f8f727f4a23410..1626a89a0731bf 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1185,6 +1185,15 @@ always available. Unless explicitly noted otherwise, all variables are read-only ``cache_tag`` is set to ``None``, it indicates that module caching should be disabled. + *supports_isolated_interpreters* is a boolean value, whether + this implementation supports multiple isolated interpreters. + It is ``True`` for CPython on most platforms. Platforms with + this support implement the low-level :mod:`!_interpreters` module. + + .. seealso:: + + :pep:`684`, :pep:`734`, and :mod:`concurrent.interpreters`. + :data:`sys.implementation` may contain additional attributes specific to the Python implementation. These non-standard attributes must start with an underscore, and are not described here. Regardless of its contents, @@ -1194,6 +1203,9 @@ always available. Unless explicitly noted otherwise, all variables are read-only .. versionadded:: 3.3 + .. versionchanged:: 3.14 + Added ``supports_isolated_interpreters`` field. + .. note:: The addition of new required attributes must go through the normal PEP diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 73a72024bba84d..27524d86355b9c 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1074,6 +1074,7 @@ def test_implementation(self): self.assertHasAttr(sys.implementation, 'version') self.assertHasAttr(sys.implementation, 'hexversion') self.assertHasAttr(sys.implementation, 'cache_tag') + self.assertHasAttr(sys.implementation, 'supports_isolated_interpreters') version = sys.implementation.version self.assertEqual(version[:2], (version.major, version.minor)) @@ -1087,6 +1088,15 @@ def test_implementation(self): self.assertEqual(sys.implementation.name, sys.implementation.name.lower()) + # https://peps.python.org/pep-0734 + sii = sys.implementation.supports_isolated_interpreters + self.assertIsInstance(sii, bool) + if test.support.check_impl_detail(cpython=True): + if test.support.is_emscripten or test.support.is_wasi: + self.assertFalse(sii) + else: + self.assertTrue(sii) + @test.support.cpython_only def test_debugmallocstats(self): # Test sys._debugmallocstats() diff --git a/Misc/NEWS.d/next/Library/2025-06-18-13-58-13.gh-issue-135645.109nff.rst b/Misc/NEWS.d/next/Library/2025-06-18-13-58-13.gh-issue-135645.109nff.rst new file mode 100644 index 00000000000000..a7764a0105b8b3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-18-13-58-13.gh-issue-135645.109nff.rst @@ -0,0 +1,2 @@ +Added ``supports_isolated_interpreters`` field to +:data:`sys.implementation`. diff --git a/Python/sysmodule.c b/Python/sysmodule.c index b3a2512a99d2fc..ae6cf306735939 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -3607,6 +3607,18 @@ make_impl_info(PyObject *version_info) goto error; #endif + // PEP-734 +#if defined(__wasi__) || defined(__EMSCRIPTEN__) + // It is not enabled on WASM builds just yet + value = Py_False; +#else + value = Py_True; +#endif + res = PyDict_SetItemString(impl_info, "supports_isolated_interpreters", value); + if (res < 0) { + goto error; + } + /* dict ready */ ns = _PyNamespace_New(impl_info); From 13cac833471885564cbfde72a4cbac64ade3137a Mon Sep 17 00:00:00 2001 From: Xuanteng Huang <44627253+xuantengh@users.noreply.github.com> Date: Sat, 21 Jun 2025 16:43:15 +0800 Subject: [PATCH 643/989] gh-135557: use atomic stores in `heapq` operations in free-threading (#135601) --- Lib/test/test_free_threading/test_heapq.py | 29 ++++++++++++++- ...-06-17-23-13-56.gh-issue-135557.Bfcy4v.rst | 2 + Modules/_heapqmodule.c | 37 +++++++++++-------- 3 files changed, 51 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-17-23-13-56.gh-issue-135557.Bfcy4v.rst diff --git a/Lib/test/test_free_threading/test_heapq.py b/Lib/test/test_free_threading/test_heapq.py index f75fb264c8ac0f..ee7adfb2b78d83 100644 --- a/Lib/test/test_free_threading/test_heapq.py +++ b/Lib/test/test_free_threading/test_heapq.py @@ -3,7 +3,7 @@ import heapq from enum import Enum -from threading import Thread, Barrier +from threading import Thread, Barrier, Lock from random import shuffle, randint from test.support import threading_helper @@ -178,6 +178,33 @@ def heapreplace_max_func(max_heap, replace_items): self.assertEqual(len(max_heap), OBJECT_COUNT) self.test_heapq.check_max_invariant(max_heap) + def test_lock_free_list_read(self): + n, n_threads = 1_000, 10 + l = [] + barrier = Barrier(n_threads * 2) + + count = 0 + lock = Lock() + + def worker(): + with lock: + nonlocal count + x = count + count += 1 + + barrier.wait() + for i in range(n): + if x % 2: + heapq.heappush(l, 1) + heapq.heappop(l) + else: + try: + l[0] + except IndexError: + pass + + self.run_concurrently(worker, (), n_threads * 2) + @staticmethod def is_sorted_ascending(lst): """ diff --git a/Misc/NEWS.d/next/Library/2025-06-17-23-13-56.gh-issue-135557.Bfcy4v.rst b/Misc/NEWS.d/next/Library/2025-06-17-23-13-56.gh-issue-135557.Bfcy4v.rst new file mode 100644 index 00000000000000..eabf5ea4aaa2ac --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-17-23-13-56.gh-issue-135557.Bfcy4v.rst @@ -0,0 +1,2 @@ +Fix races on :mod:`heapq` updates and :class:`list` reads on the :term:`free threaded ` +build. diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 7784cdcd9ffa24..560fe431fcac99 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -12,6 +12,7 @@ annotated by François Pinard, and converted to C by Raymond Hettinger. #include "Python.h" #include "pycore_list.h" // _PyList_ITEMS(), _PyList_AppendTakeRef() +#include "pycore_pyatomic_ft_wrappers.h" #include "clinic/_heapqmodule.c.h" @@ -59,8 +60,8 @@ siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) arr = _PyList_ITEMS(heap); parent = arr[parentpos]; newitem = arr[pos]; - arr[parentpos] = newitem; - arr[pos] = parent; + FT_ATOMIC_STORE_PTR_RELAXED(arr[parentpos], newitem); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], parent); pos = parentpos; } return 0; @@ -108,8 +109,8 @@ siftup(PyListObject *heap, Py_ssize_t pos) /* Move the smaller child up. */ tmp1 = arr[childpos]; tmp2 = arr[pos]; - arr[childpos] = tmp2; - arr[pos] = tmp1; + FT_ATOMIC_STORE_PTR_RELAXED(arr[childpos], tmp2); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], tmp1); pos = childpos; } /* Bubble it up to its final resting place (by sifting its parents down). */ @@ -172,8 +173,9 @@ heappop_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) if (!n) return lastelt; returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, lastelt); - if (siftup_func((PyListObject *)heap, 0)) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], lastelt); + if (siftup_func(list, 0)) { Py_DECREF(returnitem); return NULL; } @@ -208,8 +210,9 @@ heapreplace_internal(PyObject *heap, PyObject *item, int siftup_func(PyListObjec } returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - if (siftup_func((PyListObject *)heap, 0)) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item)); + if (siftup_func(list, 0)) { Py_DECREF(returnitem); return NULL; } @@ -284,8 +287,9 @@ _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) } returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - if (siftup((PyListObject *)heap, 0)) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item)); + if (siftup(list, 0)) { Py_DECREF(returnitem); return NULL; } @@ -437,8 +441,8 @@ siftdown_max(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) arr = _PyList_ITEMS(heap); parent = arr[parentpos]; newitem = arr[pos]; - arr[parentpos] = newitem; - arr[pos] = parent; + FT_ATOMIC_STORE_PTR_RELAXED(arr[parentpos], newitem); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], parent); pos = parentpos; } return 0; @@ -486,8 +490,8 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) /* Move the smaller child up. */ tmp1 = arr[childpos]; tmp2 = arr[pos]; - arr[childpos] = tmp2; - arr[pos] = tmp1; + FT_ATOMIC_STORE_PTR_RELAXED(arr[childpos], tmp2); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], tmp1); pos = childpos; } /* Bubble it up to its final resting place (by sifting its parents down). */ @@ -621,8 +625,9 @@ _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) } returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - if (siftup_max((PyListObject *)heap, 0) < 0) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item)); + if (siftup_max(list, 0) < 0) { Py_DECREF(returnitem); return NULL; } From 7c4361564c3881946a5eca677607b4ffec0a566d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Jun 2025 11:43:30 +0200 Subject: [PATCH 644/989] gh-135759: consistently reject negative sizes in SHAKE digests (#135767) Passing a negative digest length to `_hashilb.HASHXOF.[hex]digest()` now raises a ValueError instead of a MemoryError or a SystemError. This makes the behavior consistent with that of `_sha3.shake_{128,256}.[hex]digest`. --- Lib/test/test_hashlib.py | 64 ++++++++++++++----- ...-06-20-16-28-47.gh-issue-135759.jne0Zi.rst | 4 ++ Modules/_hashopenssl.c | 15 ++++- Modules/clinic/sha3module.c.h | 38 ++++++++--- Modules/sha3module.c | 39 +++++++---- 5 files changed, 120 insertions(+), 40 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index b83ae181718b7a..e77343f827811a 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -207,6 +207,11 @@ def hash_constructors(self): constructors = self.constructors_to_test.values() return itertools.chain.from_iterable(constructors) + @property + def shake_constructors(self): + for shake_name in self.shakes: + yield from self.constructors_to_test.get(shake_name, ()) + @property def is_fips_mode(self): return get_fips_mode() @@ -376,21 +381,50 @@ def test_hexdigest(self): self.assertIsInstance(h.digest(), bytes) self.assertEqual(hexstr(h.digest()), h.hexdigest()) - def test_digest_length_overflow(self): - # See issue #34922 - large_sizes = (2**29, 2**32-10, 2**32+10, 2**61, 2**64-10, 2**64+10) - for cons in self.hash_constructors: - h = cons(usedforsecurity=False) - if h.name not in self.shakes: - continue - if HASH is not None and isinstance(h, HASH): - # _hashopenssl's take a size_t - continue - for digest in h.digest, h.hexdigest: - self.assertRaises(ValueError, digest, -10) - for length in large_sizes: - with self.assertRaises((ValueError, OverflowError)): - digest(length) + def test_shakes_zero_digest_length(self): + for constructor in self.shake_constructors: + with self.subTest(constructor=constructor): + h = constructor(b'abcdef', usedforsecurity=False) + self.assertEqual(h.digest(0), b'') + self.assertEqual(h.hexdigest(0), '') + + def test_shakes_invalid_digest_length(self): + # See https://github.com/python/cpython/issues/79103. + for constructor in self.shake_constructors: + with self.subTest(constructor=constructor): + h = constructor(usedforsecurity=False) + # Note: digest() and hexdigest() take a signed input and + # raise if it is negative; the rationale is that we use + # internally PyBytes_FromStringAndSize() and _Py_strhex() + # which both take a Py_ssize_t. + for negative_size in (-1, -10, -(1 << 31), -sys.maxsize): + self.assertRaises(ValueError, h.digest, negative_size) + self.assertRaises(ValueError, h.hexdigest, negative_size) + + def test_shakes_overflow_digest_length(self): + # See https://github.com/python/cpython/issues/135759. + + exc_types = (OverflowError, ValueError) + # HACL* accepts an 'uint32_t' while OpenSSL accepts a 'size_t'. + openssl_overflown_sizes = (sys.maxsize + 1, 2 * sys.maxsize) + # https://github.com/python/cpython/issues/79103 restricts + # the accepted built-in lengths to 2 ** 29, even if OpenSSL + # accepts such lengths. + builtin_overflown_sizes = openssl_overflown_sizes + ( + 2 ** 29, 2 ** 32 - 10, 2 ** 32, 2 ** 32 + 10, + 2 ** 61, 2 ** 64 - 10, 2 ** 64, 2 ** 64 + 10, + ) + + for constructor in self.shake_constructors: + with self.subTest(constructor=constructor): + h = constructor(usedforsecurity=False) + if HASH is not None and isinstance(h, HASH): + overflown_sizes = openssl_overflown_sizes + else: + overflown_sizes = builtin_overflown_sizes + for invalid_size in overflown_sizes: + self.assertRaises(exc_types, h.digest, invalid_size) + self.assertRaises(exc_types, h.hexdigest, invalid_size) def test_name_attribute(self): for cons in self.hash_constructors: diff --git a/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst b/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst new file mode 100644 index 00000000000000..268d7eccdabc7f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst @@ -0,0 +1,4 @@ +:mod:`hashlib`: reject negative digest lengths in OpenSSL-based SHAKE objects +by raising a :exc:`ValueError`. Previously, negative lengths were implicitly +rejected by raising a :exc:`MemoryError` or a :exc:`SystemError`. +Patch by Bénédikt Tran. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index ce9603d5db841f..75ddd7c24823f5 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -938,8 +938,14 @@ _hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length) /*[clinic end generated code: output=dcb09335dd2fe908 input=3eb034ce03c55b21]*/ { EVP_MD_CTX *temp_ctx; - PyObject *retval = PyBytes_FromStringAndSize(NULL, length); + PyObject *retval; + + if (length < 0) { + PyErr_SetString(PyExc_ValueError, "negative digest length"); + return NULL; + } + retval = PyBytes_FromStringAndSize(NULL, length); if (retval == NULL) { return NULL; } @@ -986,9 +992,14 @@ _hashlib_HASHXOF_hexdigest_impl(HASHobject *self, Py_ssize_t length) EVP_MD_CTX *temp_ctx; PyObject *retval; + if (length < 0) { + PyErr_SetString(PyExc_ValueError, "negative digest length"); + return NULL; + } + digest = (unsigned char*)PyMem_Malloc(length); if (digest == NULL) { - PyErr_NoMemory(); + (void)PyErr_NoMemory(); return NULL; } diff --git a/Modules/clinic/sha3module.c.h b/Modules/clinic/sha3module.c.h index 121be2c0758695..d6d44bf4c514c0 100644 --- a/Modules/clinic/sha3module.c.h +++ b/Modules/clinic/sha3module.c.h @@ -6,7 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif -#include "pycore_long.h" // _PyLong_UnsignedLong_Converter() +#include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(py_sha3_new__doc__, @@ -179,7 +179,7 @@ PyDoc_STRVAR(_sha3_shake_128_digest__doc__, {"digest", _PyCFunction_CAST(_sha3_shake_128_digest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_digest__doc__}, static PyObject * -_sha3_shake_128_digest_impl(SHA3object *self, unsigned long length); +_sha3_shake_128_digest_impl(SHA3object *self, Py_ssize_t length); static PyObject * _sha3_shake_128_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -213,15 +213,24 @@ _sha3_shake_128_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, }; #undef KWTUPLE PyObject *argsbuf[1]; - unsigned long length; + Py_ssize_t length; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!args) { goto exit; } - if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { - goto exit; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + length = ival; } return_value = _sha3_shake_128_digest_impl((SHA3object *)self, length); @@ -239,7 +248,7 @@ PyDoc_STRVAR(_sha3_shake_128_hexdigest__doc__, {"hexdigest", _PyCFunction_CAST(_sha3_shake_128_hexdigest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_hexdigest__doc__}, static PyObject * -_sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length); +_sha3_shake_128_hexdigest_impl(SHA3object *self, Py_ssize_t length); static PyObject * _sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -273,19 +282,28 @@ _sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t narg }; #undef KWTUPLE PyObject *argsbuf[1]; - unsigned long length; + Py_ssize_t length; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!args) { goto exit; } - if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { - goto exit; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + length = ival; } return_value = _sha3_shake_128_hexdigest_impl((SHA3object *)self, length); exit: return return_value; } -/*[clinic end generated code: output=65e437799472b89f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c17e3ec670afe253 input=a9049054013a1b77]*/ diff --git a/Modules/sha3module.c b/Modules/sha3module.c index cfbf0cbcc042c5..3d047e8e0fe42c 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -473,16 +473,25 @@ SHA3_TYPE_SLOTS(sha3_512_slots, sha3_512__doc__, SHA3_methods, SHA3_getseters); SHA3_TYPE_SPEC(sha3_512_spec, "sha3_512", sha3_512_slots); static PyObject * -_SHAKE_digest(PyObject *op, unsigned long digestlen, int hex) +_SHAKE_digest(SHA3object *self, Py_ssize_t digestlen, int hex) { unsigned char *digest = NULL; PyObject *result = NULL; - SHA3object *self = _SHA3object_CAST(op); - if (digestlen >= (1 << 29)) { - PyErr_SetString(PyExc_ValueError, "length is too large"); + if (digestlen < 0) { + PyErr_SetString(PyExc_ValueError, "negative digest length"); + return NULL; + } + if ((size_t)digestlen >= (1 << 29)) { + /* + * Raise OverflowError to match the semantics of OpenSSL SHAKE + * when the digest length exceeds the range of a 'Py_ssize_t'; + * the exception message will however be different in this case. + */ + PyErr_SetString(PyExc_OverflowError, "digest length is too large"); return NULL; } + digest = (unsigned char*)PyMem_Malloc(digestlen); if (digest == NULL) { return PyErr_NoMemory(); @@ -493,7 +502,11 @@ _SHAKE_digest(PyObject *op, unsigned long digestlen, int hex) * - the output length is zero -- we follow the existing behavior and return * an empty digest, without raising an error */ if (digestlen > 0) { - (void)Hacl_Hash_SHA3_squeeze(self->hash_state, digest, digestlen); +#if PY_SSIZE_T_MAX > UINT32_MAX + assert(digestlen <= (Py_ssize_t)UINT32_MAX); +#endif + (void)Hacl_Hash_SHA3_squeeze(self->hash_state, digest, + (uint32_t)digestlen); } if (hex) { result = _Py_strhex((const char *)digest, digestlen); @@ -509,32 +522,32 @@ _SHAKE_digest(PyObject *op, unsigned long digestlen, int hex) /*[clinic input] _sha3.shake_128.digest - length: unsigned_long + length: Py_ssize_t Return the digest value as a bytes object. [clinic start generated code]*/ static PyObject * -_sha3_shake_128_digest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=2313605e2f87bb8f input=93d6d6ff32904f18]*/ +_sha3_shake_128_digest_impl(SHA3object *self, Py_ssize_t length) +/*[clinic end generated code: output=6c53fb71a6cff0a0 input=be03ade4b31dd54c]*/ { - return _SHAKE_digest((PyObject *)self, length, 0); + return _SHAKE_digest(self, length, 0); } /*[clinic input] _sha3.shake_128.hexdigest - length: unsigned_long + length: Py_ssize_t Return the digest value as a string of hexadecimal digits. [clinic start generated code]*/ static PyObject * -_sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=bf8e2f1e490944a8 input=562d74e7060b56ab]*/ +_sha3_shake_128_hexdigest_impl(SHA3object *self, Py_ssize_t length) +/*[clinic end generated code: output=a27412d404f64512 input=0d84d05d7a8ccd37]*/ { - return _SHAKE_digest((PyObject *)self, length, 1); + return _SHAKE_digest(self, length, 1); } static PyObject * From d08b4b2333d28403633f9ceb86a3a5fab011d8a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Jun 2025 14:32:00 +0200 Subject: [PATCH 645/989] gh-135532: optimize calls to `PyMem_Malloc` in SHAKE digest computation (#135744) - Add a fast path when the digest length is 0 to avoid calling useless functions. - Directly allocate via `PyBytes_FromStringAndSize(NULL, length)` when possible. --- Modules/_hashopenssl.c | 8 ++++ Modules/sha3module.c | 95 ++++++++++++++++++++++++++---------------- 2 files changed, 67 insertions(+), 36 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 75ddd7c24823f5..20eba9dd77bafb 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -945,6 +945,10 @@ _hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length) return NULL; } + if (length == 0) { + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + retval = PyBytes_FromStringAndSize(NULL, length); if (retval == NULL) { return NULL; @@ -997,6 +1001,10 @@ _hashlib_HASHXOF_hexdigest_impl(HASHobject *self, Py_ssize_t length) return NULL; } + if (length == 0) { + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); + } + digest = (unsigned char*)PyMem_Malloc(length); if (digest == NULL) { (void)PyErr_NoMemory(); diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 3d047e8e0fe42c..9946024fe2e848 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -24,6 +24,17 @@ #include "pycore_typeobject.h" // _PyType_GetModuleState() #include "hashlib.h" +/* + * Assert that 'LEN' can be safely casted to uint32_t. + * + * The 'LEN' parameter should be convertible to Py_ssize_t. + */ +#if !defined(NDEBUG) && (PY_SSIZE_T_MAX > UINT32_MAX) +#define CHECK_HACL_UINT32_T_LENGTH(LEN) assert((LEN) < (Py_ssize_t)UINT32_MAX) +#else +#define CHECK_HACL_UINT32_T_LENGTH(LEN) +#endif + #define SHA3_MAX_DIGESTSIZE 64 /* 64 Bytes (512 Bits) for 224 to 512 */ typedef struct { @@ -472,50 +483,23 @@ SHA3_TYPE_SPEC(sha3_384_spec, "sha3_384", sha3_384_slots); SHA3_TYPE_SLOTS(sha3_512_slots, sha3_512__doc__, SHA3_methods, SHA3_getseters); SHA3_TYPE_SPEC(sha3_512_spec, "sha3_512", sha3_512_slots); -static PyObject * -_SHAKE_digest(SHA3object *self, Py_ssize_t digestlen, int hex) +static int +sha3_shake_check_digest_length(Py_ssize_t length) { - unsigned char *digest = NULL; - PyObject *result = NULL; - - if (digestlen < 0) { + if (length < 0) { PyErr_SetString(PyExc_ValueError, "negative digest length"); - return NULL; + return -1; } - if ((size_t)digestlen >= (1 << 29)) { + if ((size_t)length >= (1 << 29)) { /* * Raise OverflowError to match the semantics of OpenSSL SHAKE * when the digest length exceeds the range of a 'Py_ssize_t'; * the exception message will however be different in this case. */ PyErr_SetString(PyExc_OverflowError, "digest length is too large"); - return NULL; - } - - digest = (unsigned char*)PyMem_Malloc(digestlen); - if (digest == NULL) { - return PyErr_NoMemory(); - } - - /* Get the raw (binary) digest value. The HACL functions errors out if: - * - the algorithm is not shake -- not the case here - * - the output length is zero -- we follow the existing behavior and return - * an empty digest, without raising an error */ - if (digestlen > 0) { -#if PY_SSIZE_T_MAX > UINT32_MAX - assert(digestlen <= (Py_ssize_t)UINT32_MAX); -#endif - (void)Hacl_Hash_SHA3_squeeze(self->hash_state, digest, - (uint32_t)digestlen); - } - if (hex) { - result = _Py_strhex((const char *)digest, digestlen); - } - else { - result = PyBytes_FromStringAndSize((const char *)digest, digestlen); + return -1; } - PyMem_Free(digest); - return result; + return 0; } @@ -531,7 +515,26 @@ static PyObject * _sha3_shake_128_digest_impl(SHA3object *self, Py_ssize_t length) /*[clinic end generated code: output=6c53fb71a6cff0a0 input=be03ade4b31dd54c]*/ { - return _SHAKE_digest(self, length, 0); + if (sha3_shake_check_digest_length(length) < 0) { + return NULL; + } + + /* + * Hacl_Hash_SHA3_squeeze() fails if the algorithm is not SHAKE, + * or if the length is 0. In the latter case, we follow OpenSSL's + * behavior and return an empty digest, without raising an error. + */ + if (length == 0) { + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + + CHECK_HACL_UINT32_T_LENGTH(length); + PyObject *digest = PyBytes_FromStringAndSize(NULL, length); + uint8_t *buffer = (uint8_t *)PyBytes_AS_STRING(digest); + ENTER_HASHLIB(self); + (void)Hacl_Hash_SHA3_squeeze(self->hash_state, buffer, (uint32_t)length); + LEAVE_HASHLIB(self); + return digest; } @@ -547,7 +550,27 @@ static PyObject * _sha3_shake_128_hexdigest_impl(SHA3object *self, Py_ssize_t length) /*[clinic end generated code: output=a27412d404f64512 input=0d84d05d7a8ccd37]*/ { - return _SHAKE_digest(self, length, 1); + if (sha3_shake_check_digest_length(length) < 0) { + return NULL; + } + + /* See _sha3_shake_128_digest_impl() for the fast path rationale. */ + if (length == 0) { + return Py_GetConstant(Py_CONSTANT_EMPTY_STR); + } + + CHECK_HACL_UINT32_T_LENGTH(length); + uint8_t *buffer = PyMem_Malloc(length); + if (buffer == NULL) { + return PyErr_NoMemory(); + } + + ENTER_HASHLIB(self); + (void)Hacl_Hash_SHA3_squeeze(self->hash_state, buffer, (uint32_t)length); + LEAVE_HASHLIB(self); + PyObject *digest = _Py_strhex((const char *)buffer, length); + PyMem_Free(buffer); + return digest; } static PyObject * From 6a16b3c440cf9ecabecd3e90f44310e3b0765780 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Sat, 21 Jun 2025 10:01:14 -0300 Subject: [PATCH 646/989] Docs: Remove unnecessary trailing backslashes (GH-135781) This fixes Sphinx's gettext extraction for translations. --- Doc/c-api/extension-modules.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/extension-modules.rst b/Doc/c-api/extension-modules.rst index 4c8212f2f5e7f7..3d331e6ec12f76 100644 --- a/Doc/c-api/extension-modules.rst +++ b/Doc/c-api/extension-modules.rst @@ -242,6 +242,6 @@ in the following ways: * Single-phase modules support module lookup functions like :c:func:`PyState_FindModule`. -.. [#testsinglephase] ``_testsinglephase`` is an internal module used \ - in CPython's self-test suite; your installation may or may not \ +.. [#testsinglephase] ``_testsinglephase`` is an internal module used + in CPython's self-test suite; your installation may or may not include it. From b14986c91464b06e9016678637e41d0e0192bbe8 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sat, 21 Jun 2025 22:03:17 +0530 Subject: [PATCH 647/989] gh-130605: reenable `test_concurrent_futures` tests under TSAN (#135790) --- Lib/test/libregrtest/tsan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/libregrtest/tsan.py b/Lib/test/libregrtest/tsan.py index d984a735bdf92f..3545c5f999ff70 100644 --- a/Lib/test/libregrtest/tsan.py +++ b/Lib/test/libregrtest/tsan.py @@ -8,7 +8,7 @@ 'test_capi.test_pyatomic', 'test_code', 'test_ctypes', - # 'test_concurrent_futures', # gh-130605: too many data races + 'test_concurrent_futures', 'test_enum', 'test_functools', 'test_httpservers', From ac9d37c60b7d10cb38fc9e5cb04e398dbc5bf2e5 Mon Sep 17 00:00:00 2001 From: Kattni Date: Sun, 22 Jun 2025 00:51:23 -0400 Subject: [PATCH 648/989] patchcheck: use URL paths to identify upstream remote (GH-135806) * find defined "(fetch)" remotes with "python/cpython" in their URL * if there is exactly one, use that remote name * if there is one named "upstream", "origin", or "python", use that remote (in that precedence order) * otherwise report an error listing the defined remotes --- Misc/ACKS | 1 + Tools/patchcheck/patchcheck.py | 46 ++++++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/Misc/ACKS b/Misc/ACKS index d4557a03eb5400..74cf29cdbc552f 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -943,6 +943,7 @@ Anton Kasyanov Lou Kates Makoto Kato Irit Katriel +Kattni Hiroaki Kawai Dmitry Kazakov Brian Kearns diff --git a/Tools/patchcheck/patchcheck.py b/Tools/patchcheck/patchcheck.py index 0dcf6ef844a048..afd010a52544a3 100755 --- a/Tools/patchcheck/patchcheck.py +++ b/Tools/patchcheck/patchcheck.py @@ -53,19 +53,43 @@ def get_git_branch(): def get_git_upstream_remote(): - """Get the remote name to use for upstream branches + """ + Get the remote name to use for upstream branches - Uses "upstream" if it exists, "origin" otherwise + Check for presence of "https://github.com/python/cpython" remote URL. + If only one is found, return that remote name. If multiple are found, + check for and return "upstream", "origin", or "python", in that + order. Raise an error if no valid matches are found. """ - cmd = "git remote get-url upstream".split() - try: - subprocess.check_output(cmd, - stderr=subprocess.DEVNULL, - cwd=SRCDIR, - encoding='UTF-8') - except subprocess.CalledProcessError: - return "origin" - return "upstream" + cmd = "git remote -v".split() + output = subprocess.check_output( + cmd, + stderr=subprocess.DEVNULL, + cwd=SRCDIR, + encoding="UTF-8" + ) + # Filter to desired remotes, accounting for potential uppercasing + filtered_remotes = { + remote.split("\t")[0].lower() for remote in output.split('\n') + if "python/cpython" in remote.lower() and remote.endswith("(fetch)") + } + if len(filtered_remotes) == 1: + [remote] = filtered_remotes + return remote + for remote_name in ["upstream", "origin", "python"]: + if remote_name in filtered_remotes: + return remote_name + remotes_found = "\n".join( + {remote for remote in output.split('\n') if remote.endswith("(fetch)")} + ) + raise ValueError( + f"Patchcheck was unable to find an unambiguous upstream remote, " + f"with URL matching 'https://github.com/python/cpython'. " + f"For help creating an upstream remote, see Dev Guide: " + f"https://devguide.python.org/getting-started/" + f"git-boot-camp/#cloning-a-forked-cpython-repository " + f"\nRemotes found: \n{remotes_found}" + ) def get_git_remote_default_branch(remote_name): From e7295a89b85df69f5d6a60ed50f78e1d8e636434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Jun 2025 16:59:57 +0200 Subject: [PATCH 649/989] gh-135239: simpler use of mutexes in cryptographic modules (#135267) --- Lib/test/test_hashlib.py | 75 ++++++---- Modules/_hashopenssl.c | 81 ++++------- Modules/blake2module.c | 57 +++----- Modules/hashlib.h | 99 +++++++++----- Modules/hmacmodule.c | 288 +++++++++------------------------------ Modules/md5module.c | 54 +++----- Modules/sha1module.c | 53 +++---- Modules/sha2module.c | 136 +++++++----------- Modules/sha3module.c | 60 +++----- 9 files changed, 322 insertions(+), 581 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index e77343f827811a..6e4e41e4fbb4f1 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -1043,49 +1043,67 @@ def test_gil(self): def test_sha256_gil(self): gil_minsize = hashlib_helper.find_gil_minsize(['_sha2', '_hashlib']) + data = b'1' + b'#' * gil_minsize + b'1' + expected = hashlib.sha256(data).hexdigest() + m = hashlib.sha256() m.update(b'1') m.update(b'#' * gil_minsize) m.update(b'1') - self.assertEqual( - m.hexdigest(), - '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' - ) + self.assertEqual(m.hexdigest(), expected) - m = hashlib.sha256(b'1' + b'#' * gil_minsize + b'1') - self.assertEqual( - m.hexdigest(), - '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' - ) + @threading_helper.reap_threads + @threading_helper.requires_working_threading() + def test_threaded_hashing_fast(self): + # Same as test_threaded_hashing_slow() but only tests some functions + # since otherwise test_hashlib.py becomes too slow during development. + for name in ['md5', 'sha1', 'sha256', 'sha3_256', 'blake2s']: + if constructor := getattr(hashlib, name, None): + with self.subTest(name): + self.do_test_threaded_hashing(constructor, is_shake=False) + if shake_128 := getattr(hashlib, 'shake_128', None): + self.do_test_threaded_hashing(shake_128, is_shake=True) + @requires_resource('cpu') @threading_helper.reap_threads @threading_helper.requires_working_threading() - def test_threaded_hashing(self): + def test_threaded_hashing_slow(self): + for algorithm, constructors in self.constructors_to_test.items(): + is_shake = algorithm in self.shakes + for constructor in constructors: + with self.subTest(constructor.__name__, is_shake=is_shake): + self.do_test_threaded_hashing(constructor, is_shake) + + def do_test_threaded_hashing(self, constructor, is_shake): # Updating the same hash object from several threads at once # using data chunk sizes containing the same byte sequences. # # If the internal locks are working to prevent multiple # updates on the same object from running at once, the resulting # hash will be the same as doing it single threaded upfront. - hasher = hashlib.sha1() - num_threads = 5 - smallest_data = b'swineflu' - data = smallest_data * 200000 - expected_hash = hashlib.sha1(data*num_threads).hexdigest() - - def hash_in_chunks(chunk_size): - index = 0 - while index < len(data): - hasher.update(data[index:index + chunk_size]) - index += chunk_size + + # The data to hash has length s|M|q^N and the chunk size for the i-th + # thread is s|M|q^(N-i), where N is the number of threads, M is a fixed + # message of small length, and s >= 1 and q >= 2 are small integers. + smallest_size, num_threads, s, q = 8, 5, 2, 10 + + smallest_data = os.urandom(smallest_size) + data = s * smallest_data * (q ** num_threads) + + h1 = constructor(usedforsecurity=False) + h2 = constructor(data * num_threads, usedforsecurity=False) + + def update(chunk_size): + for index in range(0, len(data), chunk_size): + h1.update(data[index:index + chunk_size]) threads = [] - for threadnum in range(num_threads): - chunk_size = len(data) // (10 ** threadnum) + for thread_num in range(num_threads): + # chunk_size = len(data) // (q ** thread_num) + chunk_size = s * smallest_size * q ** (num_threads - thread_num) self.assertGreater(chunk_size, 0) - self.assertEqual(chunk_size % len(smallest_data), 0) - thread = threading.Thread(target=hash_in_chunks, - args=(chunk_size,)) + self.assertEqual(chunk_size % smallest_size, 0) + thread = threading.Thread(target=update, args=(chunk_size,)) threads.append(thread) for thread in threads: @@ -1093,7 +1111,10 @@ def hash_in_chunks(chunk_size): for thread in threads: thread.join() - self.assertEqual(expected_hash, hasher.hexdigest()) + if is_shake: + self.assertEqual(h1.hexdigest(16), h2.hexdigest(16)) + else: + self.assertEqual(h1.hexdigest(), h2.hexdigest()) def test_get_fips_mode(self): fips_mode = self.is_fips_mode diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 20eba9dd77bafb..0fe69ee8fac835 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -278,21 +278,15 @@ get_hashlib_state(PyObject *module) } typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD EVP_MD_CTX *ctx; /* OpenSSL message digest context */ - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; /* OpenSSL context lock */ } HASHobject; #define HASHobject_CAST(op) ((HASHobject *)(op)) typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD HMAC_CTX *ctx; /* OpenSSL hmac context */ - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; /* HMAC context lock */ } HMACobject; #define HMACobject_CAST(op) ((HMACobject *)(op)) @@ -700,9 +694,9 @@ static int _hashlib_HASH_copy_locked(HASHobject *self, EVP_MD_CTX *new_ctx_p) { int result; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); result = EVP_MD_CTX_copy(new_ctx_p, self->ctx); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (result == 0) { notify_smart_ssl_error_occurred_in(Py_STRINGIFY(EVP_MD_CTX_copy)); return -1; @@ -802,27 +796,13 @@ _hashlib_HASH_update_impl(HASHobject *self, PyObject *obj) { int result; Py_buffer view; - GET_BUFFER_VIEW_OR_ERROUT(obj, &view); - - if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - result = _hashlib_HASH_hash(self, view.buf, view.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - result = _hashlib_HASH_hash(self, view.buf, view.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, view.len, + result = _hashlib_HASH_hash(self, view.buf, view.len) + ); PyBuffer_Release(&view); - - if (result == -1) - return NULL; - Py_RETURN_NONE; + return result < 0 ? NULL : Py_None; } static PyMethodDef HASH_methods[] = { @@ -1144,15 +1124,12 @@ _hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, } if (view.buf && view.len) { - if (view.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - result = _hashlib_HASH_hash(self, view.buf, view.len); - Py_END_ALLOW_THREADS - } else { - result = _hashlib_HASH_hash(self, view.buf, view.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + view.len, + result = _hashlib_HASH_hash(self, view.buf, view.len) + ); if (result == -1) { assert(PyErr_Occurred()); Py_CLEAR(self); @@ -1813,9 +1790,9 @@ static int locked_HMAC_CTX_copy(HMAC_CTX *new_ctx_p, HMACobject *self) { int result; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); result = HMAC_CTX_copy(new_ctx_p, self->ctx); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (result == 0) { notify_smart_ssl_error_occurred_in(Py_STRINGIFY(HMAC_CTX_copy)); return -1; @@ -1846,24 +1823,12 @@ _hmac_update(HMACobject *self, PyObject *obj) Py_buffer view = {0}; GET_BUFFER_VIEW_OR_ERROR(obj, &view, return 0); - - if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - r = HMAC_Update(self->ctx, - (const unsigned char *)view.buf, - (size_t)view.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - r = HMAC_Update(self->ctx, - (const unsigned char *)view.buf, - (size_t)view.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, view.len, + r = HMAC_Update( + self->ctx, (const unsigned char *)view.buf, (size_t)view.len + ) + ); PyBuffer_Release(&view); if (r == 0) { diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 2ce8c0cd3d7b6f..295bca07650916 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -352,7 +352,7 @@ type_to_impl(PyTypeObject *type) } typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; @@ -364,8 +364,6 @@ typedef struct { #endif }; blake2_impl impl; - bool use_mutex; - PyMutex mutex; } Blake2Object; #define _Blake2Object_CAST(op) ((Blake2Object *)(op)) @@ -422,7 +420,7 @@ new_Blake2Object(PyTypeObject *type) } while (0) static void -update(Blake2Object *self, uint8_t *buf, Py_ssize_t len) +blake2_update_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len) { switch (self->impl) { // blake2b_256_state and blake2s_128_state must be if'd since @@ -646,14 +644,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, if (data != NULL) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - if (buf.len >= HASHLIB_GIL_MINSIZE) { - Py_BEGIN_ALLOW_THREADS - update(self, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update(self, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + blake2_update_unlocked(self, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } @@ -744,7 +740,7 @@ py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, } static int -blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) +blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy) { assert(cpy != NULL); #define BLAKE2_COPY(TYPE, STATE_ATTR) \ @@ -801,9 +797,9 @@ _blake2_blake2b_copy_impl(Blake2Object *self) return NULL; } - ENTER_HASHLIB(self); - rc = blake2_blake2b_copy_locked(self, cpy); - LEAVE_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); + rc = blake2_blake2b_copy_unlocked(self, cpy); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(cpy); return NULL; @@ -825,25 +821,12 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data) /*[clinic end generated code: output=99330230068e8c99 input=ffc4aa6a6a225d31]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update(self, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } - else { - update(self, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + blake2_update_unlocked(self, buf.buf, buf.len) + ); PyBuffer_Release(&buf); - Py_RETURN_NONE; } @@ -881,9 +864,9 @@ _blake2_blake2b_digest_impl(Blake2Object *self) /*[clinic end generated code: output=31ab8ad477f4a2f7 input=7d21659e9c5fff02]*/ { uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); digest_length = blake2_blake2b_compute_digest(self, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, digest_length); } @@ -898,9 +881,9 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self) /*[clinic end generated code: output=5ef54b138db6610a input=76930f6946351f56]*/ { uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); digest_length = blake2_blake2b_compute_digest(self, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, digest_length); } diff --git a/Modules/hashlib.h b/Modules/hashlib.h index e82ec92be25c57..9a7e72f34a7f9d 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -34,47 +34,78 @@ /* * Helper code to synchronize access to the hash object when the GIL is - * released around a CPU consuming hashlib operation. All code paths that - * access a mutable part of obj must be enclosed in an ENTER_HASHLIB / - * LEAVE_HASHLIB block or explicitly acquire and release the lock inside - * a PY_BEGIN / END_ALLOW_THREADS block if they wish to release the GIL for - * an operation. + * released around a CPU consuming hashlib operation. * - * These only drop the GIL if the lock acquisition itself is likely to - * block. Thus the non-blocking acquire gating the GIL release for a - * blocking lock acquisition. The intent of these macros is to surround - * the assumed always "fast" operations that you aren't releasing the - * GIL around. Otherwise use code similar to what you see in hash - * function update() methods. + * Code accessing a mutable part of the hash object must be enclosed in + * an HASHLIB_{ACQUIRE,RELEASE}_LOCK block or explicitly acquire and release + * the mutex inside a Py_BEGIN_ALLOW_THREADS -- Py_END_ALLOW_THREADS block if + * they wish to release the GIL for an operation. */ -#include "pythread.h" -#define ENTER_HASHLIB(obj) \ - if ((obj)->use_mutex) { \ - PyMutex_Lock(&(obj)->mutex); \ - } -#define LEAVE_HASHLIB(obj) \ - if ((obj)->use_mutex) { \ - PyMutex_Unlock(&(obj)->mutex); \ - } +#define HASHLIB_OBJECT_HEAD \ + PyObject_HEAD \ + /* Guard against race conditions during incremental update(). */ \ + PyMutex mutex; -#ifdef Py_GIL_DISABLED -#define HASHLIB_INIT_MUTEX(obj) \ - do { \ - (obj)->mutex = (PyMutex){0}; \ - (obj)->use_mutex = true; \ +#define HASHLIB_INIT_MUTEX(OBJ) \ + do { \ + (OBJ)->mutex = (PyMutex){0}; \ } while (0) -#else -#define HASHLIB_INIT_MUTEX(obj) \ - do { \ - (obj)->mutex = (PyMutex){0}; \ - (obj)->use_mutex = false; \ + +#define HASHLIB_ACQUIRE_LOCK(OBJ) PyMutex_Lock(&(OBJ)->mutex) +#define HASHLIB_RELEASE_LOCK(OBJ) PyMutex_Unlock(&(OBJ)->mutex) + +/* + * Message length above which the GIL is to be released + * when performing hashing operations. + */ +#define HASHLIB_GIL_MINSIZE 2048 + +// Macros for executing code while conditionally holding the GIL. +// +// These only drop the GIL if the lock acquisition itself is likely to +// block. Thus the non-blocking acquire gating the GIL release for a +// blocking lock acquisition. The intent of these macros is to surround +// the assumed always "fast" operations that you aren't releasing the +// GIL around. + +/* + * Execute a suite of C statements 'STATEMENTS'. + * + * The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold. + */ +#define HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(SIZE, STATEMENTS) \ + do { \ + if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ + Py_BEGIN_ALLOW_THREADS \ + STATEMENTS; \ + Py_END_ALLOW_THREADS \ + } \ + else { \ + STATEMENTS; \ + } \ } while (0) -#endif -/* TODO(gpshead): We should make this a module or class attribute - * to allow the user to optimize based on the platform they're using. */ -#define HASHLIB_GIL_MINSIZE 2048 +/* + * Lock 'OBJ' and execute a suite of C statements 'STATEMENTS'. + * + * The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold. + */ +#define HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(OBJ, SIZE, STATEMENTS) \ + do { \ + if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ + Py_BEGIN_ALLOW_THREADS \ + HASHLIB_ACQUIRE_LOCK(OBJ); \ + STATEMENTS; \ + HASHLIB_RELEASE_LOCK(OBJ); \ + Py_END_ALLOW_THREADS \ + } \ + else { \ + HASHLIB_ACQUIRE_LOCK(OBJ); \ + STATEMENTS; \ + HASHLIB_RELEASE_LOCK(OBJ); \ + } \ + } while (0) static inline int _Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index f6ed35ae742905..e7a5ccbb19b45c 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -215,105 +215,6 @@ typedef struct py_hmac_hacl_api { #define Py_CHECK_HACL_UINT32_T_LENGTH(LEN) #endif -/* - * Call the HACL* HMAC-HASH update function on the given data. - * - * The magnitude of 'LEN' is not checked and thus 'LEN' must be - * safely convertible to a uint32_t value. - */ -#define Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN) \ - Hacl_Streaming_HMAC_update(HACL_STATE, BUF, (uint32_t)(LEN)) - -/* - * Call the HACL* HMAC-HASH update function on the given data. - * - * On DEBUG builds, the 'ERRACTION' statements are executed if - * the update() call returned a non-successful HACL* exit code. - * - * The buffer 'BUF' and its length 'LEN' are left untouched. - * - * The formal signature of this macro is: - * - * (HACL_HMAC_state *, uint8_t *, uint32_t, (C statements)) - */ -#ifndef NDEBUG -#define Py_HMAC_HACL_UPDATE_ONCE( \ - HACL_STATE, BUF, LEN, \ - ERRACTION \ -) \ - do { \ - Py_CHECK_HACL_UINT32_T_LENGTH(LEN); \ - hacl_errno_t code = Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN); \ - if (_hacl_convert_errno(code) < 0) { \ - ERRACTION; \ - } \ - } while (0) -#else -#define Py_HMAC_HACL_UPDATE_ONCE( \ - HACL_STATE, BUF, LEN, \ - _ERRACTION \ -) \ - do { \ - (void)Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, (LEN)); \ - } while (0) -#endif - -/* - * Repetivively call the HACL* HMAC-HASH update function on the given - * data until the buffer length 'LEN' is strictly less than UINT32_MAX. - * - * On builds with PY_SSIZE_T_MAX <= UINT32_MAX, this is a no-op. - * - * The buffer 'BUF' (resp. 'LEN') is advanced (resp. decremented) - * by UINT32_MAX after each update. On DEBUG builds, each update() - * call is verified and the 'ERRACTION' statements are executed if - * a non-successful HACL* exit code is being returned. - * - * In particular, 'BUF' and 'LEN' must be variable names and not - * expressions on their own. - * - * The formal signature of this macro is: - * - * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) - */ -#ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 -#define Py_HMAC_HACL_UPDATE_LOOP( \ - HACL_STATE, BUF, LEN, \ - ERRACTION \ -) \ - do { \ - while ((Py_ssize_t)LEN > UINT32_MAX_AS_SSIZE_T) { \ - Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, UINT32_MAX, \ - ERRACTION); \ - BUF += UINT32_MAX; \ - LEN -= UINT32_MAX; \ - } \ - } while (0) -#else -#define Py_HMAC_HACL_UPDATE_LOOP( \ - HACL_STATE, BUF, LEN, \ - _ERRACTION \ -) -#endif - -/* - * Perform the HMAC-HASH update() operation in a streaming fashion. - * - * The formal signature of this macro is: - * - * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) - */ -#define Py_HMAC_HACL_UPDATE( \ - HACL_STATE, BUF, LEN, \ - ERRACTION \ -) \ - do { \ - Py_HMAC_HACL_UPDATE_LOOP(HACL_STATE, BUF, LEN, \ - ERRACTION); \ - Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, LEN, \ - ERRACTION); \ - } while (0) - /* * HMAC underlying hash function static information. */ @@ -382,11 +283,7 @@ get_hmacmodule_state_by_cls(PyTypeObject *cls) typedef Hacl_Streaming_HMAC_agile_state HACL_HMAC_state; typedef struct HMACObject { - PyObject_HEAD - - bool use_mutex; - PyMutex mutex; - + HASHLIB_OBJECT_HEAD // Hash function information PyObject *name; // rendered name (exact unicode object) HMAC_Hash_Kind kind; // can be used for runtime dispatch (must be known) @@ -556,6 +453,51 @@ _hacl_hmac_state_free(HACL_HMAC_state *state) } } +/* + * Call the HACL* HMAC-HASH update function on the given data. + * + * On DEBUG builds, the update() call is verified. + * + * Return 0 on success; otherwise, set an exception and return -1 on failure. +*/ +static int +_hacl_hmac_state_update_once(HACL_HMAC_state *state, + uint8_t *buf, uint32_t len) +{ +#ifndef NDEBUG + hacl_errno_t code = Hacl_Streaming_HMAC_update(state, buf, len); + return _hacl_convert_errno(code); +#else + (void)Hacl_Streaming_HMAC_update(state, buf, len); + return 0; +#endif +} + +/* + * Perform the HMAC-HASH update() operation in a streaming fashion. + * + * On DEBUG builds, each update() call is verified. + * + * Return 0 on success; otherwise, set an exception and return -1 on failure. + */ +static int +_hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) +{ + assert(len >= 0); +#ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 + while (len > UINT32_MAX_AS_SSIZE_T) { + if (_hacl_hmac_state_update_once(state, buf, UINT32_MAX) < 0) { + assert(PyErr_Occurred()); + return -1; + } + buf += UINT32_MAX; + len -= UINT32_MAX; + } +#endif + Py_CHECK_HACL_UINT32_T_LENGTH(len); + return _hacl_hmac_state_update_once(state, buf, (uint32_t)len); +} + /* Static information used to construct the hash table. */ static const py_hmac_hinfo py_hmac_static_hinfo[] = { #define Py_HMAC_HINFO_HACL_API(HACL_HID) \ @@ -786,45 +728,6 @@ hmac_new_initial_state(HMACObject *self, uint8_t *key, Py_ssize_t len) return self->state == NULL ? -1 : 0; } -/* - * Feed initial data. - * - * This function MUST only be called by the HMAC object constructor - * and after hmac_set_hinfo() and hmac_new_initial_state() have been - * called, lest the behaviour is undefined. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static int -hmac_feed_initial_data(HMACObject *self, uint8_t *msg, Py_ssize_t len) -{ - assert(self->name != NULL); - assert(self->state != NULL); - if (len == 0) { - // do nothing if the buffer is empty - return 0; - } - - if (len < HASHLIB_GIL_MINSIZE) { - Py_HMAC_HACL_UPDATE(self->state, msg, len, return -1); - return 0; - } - - int res = 0; - Py_BEGIN_ALLOW_THREADS - Py_HMAC_HACL_UPDATE(self->state, msg, len, goto error); - goto done; -#ifndef NDEBUG -error: - res = -1; -#else - Py_UNREACHABLE(); -#endif -done: - Py_END_ALLOW_THREADS - return res; -} - /*[clinic input] _hmac.new @@ -871,7 +774,12 @@ _hmac_new_impl(PyObject *module, PyObject *keyobj, PyObject *msgobj, if (msgobj != NULL && msgobj != Py_None) { Py_buffer msg; GET_BUFFER_VIEW_OR_ERROR(msgobj, &msg, goto error); - rc = hmac_feed_initial_data(self, msg.buf, msg.len); + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + msg.len, + rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len) + ); PyBuffer_Release(&msg); #ifndef NDEBUG if (rc < 0) { @@ -948,12 +856,12 @@ _hmac_HMAC_copy_impl(HMACObject *self, PyTypeObject *cls) return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); /* copy hash information */ hmac_copy_hinfo(copy, self); /* copy internal state */ int rc = hmac_copy_state(copy, self); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(copy); @@ -965,78 +873,6 @@ _hmac_HMAC_copy_impl(HMACObject *self, PyTypeObject *cls) return (PyObject *)copy; } -/* - * Update the HMAC object with the given buffer. - * - * This unconditionally acquires the lock on the HMAC object. - * - * On DEBUG builds, each update() call is verified. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static int -hmac_update_state_with_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) -{ - int res = 0; - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); // unconditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); - goto done; -#ifndef NDEBUG -error: - res = -1; -#else - Py_UNREACHABLE(); -#endif -done: - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - return res; -} - -/* - * Update the HMAC object with the given buffer. - * - * This conditionally acquires the lock on the HMAC object. - * - * On DEBUG builds, each update() call is verified. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static int -hmac_update_state_cond_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) -{ - ENTER_HASHLIB(self); // conditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); - LEAVE_HASHLIB(self); - return 0; - -#ifndef NDEBUG -error: - LEAVE_HASHLIB(self); - return -1; -#else - Py_UNREACHABLE(); -#endif -} - -/* - * Update the internal HMAC state with the given buffer. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static inline int -hmac_update_state(HMACObject *self, uint8_t *buf, Py_ssize_t len) -{ - assert(buf != 0); - assert(len >= 0); - return len == 0 - ? 0 /* nothing to do */ - : len < HASHLIB_GIL_MINSIZE - ? hmac_update_state_cond_lock(self, buf, len) - : hmac_update_state_with_lock(self, buf, len); -} - /*[clinic input] _hmac.HMAC.update @@ -1049,9 +885,13 @@ static PyObject * _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj) /*[clinic end generated code: output=962134ada5e55985 input=7c0ea830efb03367]*/ { + int rc = 0; Py_buffer msg; GET_BUFFER_VIEW_OR_ERROUT(msgobj, &msg); - int rc = hmac_update_state(self, msg.buf, msg.len); + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, msg.len, + rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len) + ); PyBuffer_Release(&msg); return rc < 0 ? NULL : Py_None; } @@ -1067,13 +907,13 @@ _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj) * Note: this function may raise a MemoryError. */ static int -hmac_digest_compute_cond_lock(HMACObject *self, uint8_t *digest) +hmac_digest_compute_locked(HMACObject *self, uint8_t *digest) { assert(digest != NULL); hacl_errno_t rc; - ENTER_HASHLIB(self); // conditionally acquire a lock + HASHLIB_ACQUIRE_LOCK(self); rc = Hacl_Streaming_HMAC_digest(self->state, digest, self->digest_size); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); assert( rc == Hacl_Streaming_Types_Success || rc == Hacl_Streaming_Types_OutOfMemory @@ -1095,7 +935,7 @@ _hmac_HMAC_digest_impl(HMACObject *self) { assert(self->digest_size <= Py_hmac_hash_max_digest_size); uint8_t digest[Py_hmac_hash_max_digest_size]; - if (hmac_digest_compute_cond_lock(self, digest) < 0) { + if (hmac_digest_compute_locked(self, digest) < 0) { return NULL; } return PyBytes_FromStringAndSize((const char *)digest, self->digest_size); @@ -1118,7 +958,7 @@ _hmac_HMAC_hexdigest_impl(HMACObject *self) { assert(self->digest_size <= Py_hmac_hash_max_digest_size); uint8_t digest[Py_hmac_hash_max_digest_size]; - if (hmac_digest_compute_cond_lock(self, digest) < 0) { + if (hmac_digest_compute_locked(self, digest) < 0) { return NULL; } return _Py_strhex((const char *)digest, self->digest_size); diff --git a/Modules/md5module.c b/Modules/md5module.c index 6ec0ee87a2f761..5dac3a91f4f09d 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -38,12 +38,8 @@ class MD5Type "MD5object *" "&PyType_Type" #include "_hacl/Hacl_Hash_MD5.h" - typedef struct { - PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + HASHLIB_OBJECT_HEAD Hacl_Hash_MD5_state_t *hash_state; } MD5object; @@ -118,9 +114,9 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); newobj->hash_state = Hacl_Hash_MD5_copy(self->hash_state); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (newobj->hash_state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); @@ -139,9 +135,9 @@ MD5Type_digest_impl(MD5object *self) /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/ { uint8_t digest[MD5_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_MD5_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE); } @@ -156,9 +152,9 @@ MD5Type_hexdigest_impl(MD5object *self) /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/ { uint8_t digest[MD5_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_MD5_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, MD5_DIGESTSIZE); } @@ -170,6 +166,7 @@ update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len) * take more than 1 billion years to overflow the maximum admissible length * for MD5 (2^61 - 1). */ + assert(len >= 0); #if PY_SSIZE_T_MAX > UINT32_MAX while (len > UINT32_MAX) { (void)Hacl_Hash_MD5_update(state, buf, UINT32_MAX); @@ -195,22 +192,11 @@ MD5Type_update_impl(MD5object *self, PyObject *obj) /*[clinic end generated code: output=b0fed9a7ce7ad253 input=6e1efcd9ecf17032]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update(self->hash_state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - update(self->hash_state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + update(self->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -312,16 +298,12 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update(new->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update(new->hash_state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update(new->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index a746bf74f8d4c1..3bc83b674f1bc1 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -38,11 +38,7 @@ class SHA1Type "SHA1object *" "&PyType_Type" #include "_hacl/Hacl_Hash_SHA1.h" typedef struct { - PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; - PyThread_type_lock lock; + HASHLIB_OBJECT_HEAD Hacl_Hash_SHA1_state_t *hash_state; } SHA1object; @@ -121,9 +117,9 @@ SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls) return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); newobj->hash_state = Hacl_Hash_SHA1_copy(self->hash_state); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (newobj->hash_state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); @@ -142,9 +138,9 @@ SHA1Type_digest_impl(SHA1object *self) /*[clinic end generated code: output=2f05302a7aa2b5cb input=13824b35407444bd]*/ { unsigned char digest[SHA1_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA1_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, SHA1_DIGESTSIZE); } @@ -159,9 +155,9 @@ SHA1Type_hexdigest_impl(SHA1object *self) /*[clinic end generated code: output=4161fd71e68c6659 input=97691055c0c74ab0]*/ { unsigned char digest[SHA1_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA1_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, SHA1_DIGESTSIZE); } @@ -198,22 +194,11 @@ SHA1Type_update_impl(SHA1object *self, PyObject *obj) /*[clinic end generated code: output=cdc8e0e106dbec5f input=aad8e07812edbba3]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update(self->hash_state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - update(self->hash_state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + update(self->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -314,16 +299,12 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update(new->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update(new->hash_state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update(new->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 72931910c5d720..6643b7e6b02654 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -50,20 +50,14 @@ class SHA512Type "SHA512object *" "&PyType_Type" // TODO: Get rid of int digestsize in favor of Hacl state info? typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD int digestsize; - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; Hacl_Hash_SHA2_state_t_256 *state; } SHA256object; typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD int digestsize; - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; Hacl_Hash_SHA2_state_t_512 *state; } SHA512object; @@ -272,9 +266,9 @@ SHA256Type_copy_impl(SHA256object *self, PyTypeObject *cls) } } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); rc = SHA256copy(self, newobj); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(newobj); return NULL; @@ -309,9 +303,9 @@ SHA512Type_copy_impl(SHA512object *self, PyTypeObject *cls) } } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); rc = SHA512copy(self, newobj); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(newobj); return NULL; @@ -331,11 +325,11 @@ SHA256Type_digest_impl(SHA256object *self) { uint8_t digest[SHA256_DIGESTSIZE]; assert(self->digestsize <= SHA256_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); // HACL* performs copies under the hood so that self->state remains valid // after this call. Hacl_Hash_SHA2_digest_256(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, self->digestsize); } @@ -351,11 +345,11 @@ SHA512Type_digest_impl(SHA512object *self) { uint8_t digest[SHA512_DIGESTSIZE]; assert(self->digestsize <= SHA512_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); // HACL* performs copies under the hood so that self->state remains valid // after this call. Hacl_Hash_SHA2_digest_512(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, self->digestsize); } @@ -371,9 +365,9 @@ SHA256Type_hexdigest_impl(SHA256object *self) { uint8_t digest[SHA256_DIGESTSIZE]; assert(self->digestsize <= SHA256_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA2_digest_256(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, self->digestsize); } @@ -389,9 +383,9 @@ SHA512Type_hexdigest_impl(SHA512object *self) { uint8_t digest[SHA512_DIGESTSIZE]; assert(self->digestsize <= SHA512_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA2_digest_512(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, self->digestsize); } @@ -409,22 +403,11 @@ SHA256Type_update_impl(SHA256object *self, PyObject *obj) /*[clinic end generated code: output=dc58a580cf8905a5 input=b2d449d5b30f0f5a]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update_256(self->state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - update_256(self->state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + update_256(self->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -443,22 +426,11 @@ SHA512Type_update_impl(SHA512object *self, PyObject *obj) /*[clinic end generated code: output=9af211766c0b7365 input=ded2b46656566283]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update_512(self->state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - update_512(self->state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + update_512(self->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -638,16 +610,12 @@ _sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_256(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_256(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update_256(new->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } @@ -700,16 +668,12 @@ _sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_256(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_256(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update_256(new->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } @@ -763,16 +727,12 @@ _sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_512(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_512(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update_512(new->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } @@ -826,16 +786,12 @@ _sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_512(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_512(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update_512(new->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 9946024fe2e848..68b239352769a1 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -70,10 +70,7 @@ class _sha3.shake_256 "SHA3object *" "&SHAKE256type" #include "_hacl/Hacl_Hash_SHA3.h" typedef struct { - PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + HASHLIB_OBJECT_HEAD Hacl_Hash_SHA3_state_t *hash_state; } SHA3object; @@ -174,16 +171,12 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, if (data) { GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - sha3_update(self->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - sha3_update(self->hash_state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + sha3_update(self->hash_state, buf.buf, buf.len) + ); } PyBuffer_Release(&buf); @@ -249,9 +242,9 @@ _sha3_sha3_224_copy_impl(SHA3object *self) if ((newobj = newSHA3object(Py_TYPE(self))) == NULL) { return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); newobj->hash_state = Hacl_Hash_SHA3_copy(self->hash_state); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (newobj->hash_state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); @@ -273,9 +266,9 @@ _sha3_sha3_224_digest_impl(SHA3object *self) unsigned char digest[SHA3_MAX_DIGESTSIZE]; // This function errors out if the algorithm is SHAKE. Here, we know this // not to be the case, and therefore do not perform error checking. - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, Hacl_Hash_SHA3_hash_len(self->hash_state)); } @@ -292,9 +285,9 @@ _sha3_sha3_224_hexdigest_impl(SHA3object *self) /*[clinic end generated code: output=75ad03257906918d input=2d91bb6e0d114ee3]*/ { unsigned char digest[SHA3_MAX_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, Hacl_Hash_SHA3_hash_len(self->hash_state)); } @@ -314,22 +307,11 @@ _sha3_sha3_224_update_impl(SHA3object *self, PyObject *data) /*[clinic end generated code: output=390b7abf7c9795a5 input=a887f54dcc4ae227]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - sha3_update(self->hash_state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - sha3_update(self->hash_state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + sha3_update(self->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -531,9 +513,9 @@ _sha3_shake_128_digest_impl(SHA3object *self, Py_ssize_t length) CHECK_HACL_UINT32_T_LENGTH(length); PyObject *digest = PyBytes_FromStringAndSize(NULL, length); uint8_t *buffer = (uint8_t *)PyBytes_AS_STRING(digest); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_squeeze(self->hash_state, buffer, (uint32_t)length); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return digest; } @@ -565,9 +547,9 @@ _sha3_shake_128_hexdigest_impl(SHA3object *self, Py_ssize_t length) return PyErr_NoMemory(); } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_squeeze(self->hash_state, buffer, (uint32_t)length); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); PyObject *digest = _Py_strhex((const char *)buffer, length); PyMem_Free(buffer); return digest; From b57b619e34cdfc87b47943c988b0b4d69f8f1fe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Jun 2025 21:48:06 +0200 Subject: [PATCH 650/989] gh-135815: skip `netrc` security checks if `os.getuid` is missing (#135816) --- Doc/library/netrc.rst | 4 ++- Lib/netrc.py | 29 ++++++++++++------- Lib/test/test_netrc.py | 13 +++++---- ...-06-22-16-23-44.gh-issue-135815.0DandH.rst | 2 ++ 4 files changed, 30 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst diff --git a/Doc/library/netrc.rst b/Doc/library/netrc.rst index f6260383b2b057..74c97e8c9a9759 100644 --- a/Doc/library/netrc.rst +++ b/Doc/library/netrc.rst @@ -24,12 +24,14 @@ the Unix :program:`ftp` program and other FTP clients. a :exc:`FileNotFoundError` exception will be raised. Parse errors will raise :exc:`NetrcParseError` with diagnostic information including the file name, line number, and terminating token. + If no argument is specified on a POSIX system, the presence of passwords in the :file:`.netrc` file will raise a :exc:`NetrcParseError` if the file ownership or permissions are insecure (owned by a user other than the user running the process, or accessible for read or write by any other user). This implements security behavior equivalent to that of ftp and other - programs that use :file:`.netrc`. + programs that use :file:`.netrc`. Such security checks are not available + on platforms that do not support :func:`os.getuid`. .. versionchanged:: 3.4 Added the POSIX permission check. diff --git a/Lib/netrc.py b/Lib/netrc.py index b285fd8e357ddb..bd003e80a48081 100644 --- a/Lib/netrc.py +++ b/Lib/netrc.py @@ -7,6 +7,19 @@ __all__ = ["netrc", "NetrcParseError"] +def _can_security_check(): + # On WASI, getuid() is indicated as a stub but it may also be missing. + return os.name == 'posix' and hasattr(os, 'getuid') + + +def _getpwuid(uid): + try: + import pwd + return pwd.getpwuid(uid)[0] + except (ImportError, LookupError): + return f'uid {uid}' + + class NetrcParseError(Exception): """Exception raised on syntax errors in the .netrc file.""" def __init__(self, msg, filename=None, lineno=None): @@ -142,18 +155,12 @@ def _parse(self, file, fp, default_netrc): self._security_check(fp, default_netrc, self.hosts[entryname][0]) def _security_check(self, fp, default_netrc, login): - if os.name == 'posix' and default_netrc and login != "anonymous": + if _can_security_check() and default_netrc and login != "anonymous": prop = os.fstat(fp.fileno()) - if prop.st_uid != os.getuid(): - import pwd - try: - fowner = pwd.getpwuid(prop.st_uid)[0] - except KeyError: - fowner = 'uid %s' % prop.st_uid - try: - user = pwd.getpwuid(os.getuid())[0] - except KeyError: - user = 'uid %s' % os.getuid() + current_user_id = os.getuid() + if prop.st_uid != current_user_id: + fowner = _getpwuid(prop.st_uid) + user = _getpwuid(current_user_id) raise NetrcParseError( (f"~/.netrc file owner ({fowner}, {user}) does not match" " current user")) diff --git a/Lib/test/test_netrc.py b/Lib/test/test_netrc.py index 81e11a293cc4c8..9d720f627102e3 100644 --- a/Lib/test/test_netrc.py +++ b/Lib/test/test_netrc.py @@ -1,11 +1,7 @@ import netrc, os, unittest, sys, textwrap +from test import support from test.support import os_helper -try: - import pwd -except ImportError: - pwd = None - temp_filename = os_helper.TESTFN class NetrcTestCase(unittest.TestCase): @@ -269,9 +265,14 @@ def test_comment_at_end_of_machine_line_pass_has_hash(self): machine bar.domain.com login foo password pass """, '#pass') + @unittest.skipUnless(support.is_wasi, 'WASI only test') + def test_security_on_WASI(self): + self.assertFalse(netrc._can_security_check()) + self.assertEqual(netrc._getpwuid(0), 'uid 0') + self.assertEqual(netrc._getpwuid(123456), 'uid 123456') @unittest.skipUnless(os.name == 'posix', 'POSIX only test') - @unittest.skipIf(pwd is None, 'security check requires pwd module') + @unittest.skipUnless(hasattr(os, 'getuid'), "os.getuid is required") @os_helper.skip_unless_working_chmod def test_security(self): # This test is incomplete since we are normally not run as root and diff --git a/Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst b/Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst new file mode 100644 index 00000000000000..0f4a68bf745a8e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst @@ -0,0 +1,2 @@ +:mod:`netrc`: skip security checks if :func:`os.getuid` is missing. +Patch by Bénédikt Tran. From 621a8bd6a8d97cf4c628f8a0b869ff0e83930319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Jun 2025 22:04:38 +0200 Subject: [PATCH 651/989] gh-135532: cleanup clinic `module` directives for cryptographic modules (#135822) --- Modules/_hashopenssl.c | 17 +++++++++++------ Modules/blake2module.c | 12 ++++++++---- Modules/md5module.c | 24 ++++++++++++++++-------- Modules/sha1module.c | 25 ++++++++++++++++--------- Modules/sha2module.c | 30 +++++++++++++++++------------- Modules/sha3module.c | 33 +++++++++++++++++++++------------ 6 files changed, 89 insertions(+), 52 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 0fe69ee8fac835..90a7391ebb09af 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -255,7 +255,8 @@ py_hashentry_table_new(void) { return NULL; } -/* Module state */ +// --- Module state ----------------------------------------------------------- + static PyModuleDef _hashlibmodule; typedef struct { @@ -277,6 +278,8 @@ get_hashlib_state(PyObject *module) return (_hashlibstate *)state; } +// --- Module objects --------------------------------------------------------- + typedef struct { HASHLIB_OBJECT_HEAD EVP_MD_CTX *ctx; /* OpenSSL message digest context */ @@ -291,15 +294,17 @@ typedef struct { #define HMACobject_CAST(op) ((HMACobject *)(op)) -#include "clinic/_hashopenssl.c.h" +// --- Module clinic configuration -------------------------------------------- + /*[clinic input] module _hashlib -class _hashlib.HASH "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASH_type" -class _hashlib.HASHXOF "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASHXOF_type" -class _hashlib.HMAC "HMACobject *" "((_hashlibstate *)PyModule_GetState(module))->HMAC_type" +class _hashlib.HASH "HASHobject *" "&PyType_Type" +class _hashlib.HASHXOF "HASHobject *" "&PyType_Type" +class _hashlib.HMAC "HMACobject *" "&PyType_Type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=eb805ce4b90b1b31]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6b5c9ce5c28bdc58]*/ +#include "clinic/_hashopenssl.c.h" /* LCOV_EXCL_START */ diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 295bca07650916..6c4349ac06bb8a 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -2,6 +2,7 @@ * Written in 2013 by Dmitry Chestnykh * Modified for CPython by Christian Heimes * Updated to use HACL* by Jonathan Protzenko + * Additional work by Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * * To the extent possible under law, the author have dedicated all * copyright and related and neighboring rights to this software to @@ -368,15 +369,18 @@ typedef struct { #define _Blake2Object_CAST(op) ((Blake2Object *)(op)) -#include "clinic/blake2module.c.h" +// --- Module clinic configuration -------------------------------------------- /*[clinic input] module _blake2 -class _blake2.blake2b "Blake2Object *" "&PyBlake2_BLAKE2bType" -class _blake2.blake2s "Blake2Object *" "&PyBlake2_BLAKE2sType" +class _blake2.blake2b "Blake2Object *" "&PyType_Type" +class _blake2.blake2s "Blake2Object *" "&PyType_Type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b7526666bd18af83]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=86b0972b0c41b3d0]*/ + +#include "clinic/blake2module.c.h" +// --- BLAKE-2 object interface ----------------------------------------------- static Blake2Object * new_Blake2Object(PyTypeObject *type) diff --git a/Modules/md5module.c b/Modules/md5module.c index 5dac3a91f4f09d..8b6dd4a8195dfb 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -8,6 +8,7 @@ Andrew Kuchling (amk@amk.ca) Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. @@ -25,18 +26,14 @@ #include "hashlib.h" -/*[clinic input] -module _md5 -class MD5Type "MD5object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ +#include "_hacl/Hacl_Hash_MD5.h" /* The MD5 block size and message digest sizes, in bytes */ #define MD5_BLOCKSIZE 64 #define MD5_DIGESTSIZE 16 -#include "_hacl/Hacl_Hash_MD5.h" +// --- Module objects --------------------------------------------------------- typedef struct { HASHLIB_OBJECT_HEAD @@ -45,8 +42,7 @@ typedef struct { #define _MD5object_CAST(op) ((MD5object *)(op)) -#include "clinic/md5module.c.h" - +// --- Module state ----------------------------------------------------------- typedef struct { PyTypeObject* md5_type; @@ -60,6 +56,18 @@ md5_get_state(PyObject *module) return (MD5State *)state; } +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _md5 +class MD5Type "MD5object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ + +#include "clinic/md5module.c.h" + +// --- MD5 object interface --------------------------------------------------- + static MD5object * newMD5object(MD5State * st) { diff --git a/Modules/sha1module.c b/Modules/sha1module.c index 3bc83b674f1bc1..faa9dcccc5755b 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -8,13 +8,13 @@ Andrew Kuchling (amk@amk.ca) Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. */ -/* SHA1 objects */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif @@ -24,18 +24,14 @@ #include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" // _PyType_GetModuleState() -/*[clinic input] -module _sha1 -class SHA1Type "SHA1object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dc9a20d1becb759]*/ +#include "_hacl/Hacl_Hash_SHA1.h" /* The SHA1 block size and message digest sizes, in bytes */ #define SHA1_BLOCKSIZE 64 #define SHA1_DIGESTSIZE 20 -#include "_hacl/Hacl_Hash_SHA1.h" +// --- Module objects --------------------------------------------------------- typedef struct { HASHLIB_OBJECT_HEAD @@ -44,8 +40,7 @@ typedef struct { #define _SHA1object_CAST(op) ((SHA1object *)(op)) -#include "clinic/sha1module.c.h" - +// --- Module state ----------------------------------------------------------- typedef struct { PyTypeObject* sha1_type; @@ -59,6 +54,18 @@ sha1_get_state(PyObject *module) return (SHA1State *)state; } +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _sha1 +class SHA1Type "SHA1object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dc9a20d1becb759]*/ + +#include "clinic/sha1module.c.h" + +// --- SHA-1 object interface configuration ----------------------------------- + static SHA1object * newSHA1object(SHA1State *st) { diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 6643b7e6b02654..36300ba899fd44 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -9,32 +9,25 @@ Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) Jonathan Protzenko (jonathan@protzenko.fr) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. */ -/* SHA objects */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif #include "Python.h" -#include "pycore_bitutils.h" // _Py_bswap32() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_typeobject.h" // _PyType_GetModuleState() #include "pycore_strhex.h" // _Py_strhex() #include "hashlib.h" -/*[clinic input] -module _sha2 -class SHA256Type "SHA256object *" "&PyType_Type" -class SHA512Type "SHA512object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5315a7b611c9afc]*/ - +#include "_hacl/Hacl_Hash_SHA2.h" /* The SHA block sizes and maximum message digest sizes, in bytes */ @@ -43,9 +36,7 @@ class SHA512Type "SHA512object *" "&PyType_Type" #define SHA512_BLOCKSIZE 128 #define SHA512_DIGESTSIZE 64 -/* Our SHA2 implementations defer to the HACL* verified library. */ - -#include "_hacl/Hacl_Hash_SHA2.h" +// --- Module objects --------------------------------------------------------- // TODO: Get rid of int digestsize in favor of Hacl state info? @@ -64,7 +55,7 @@ typedef struct { #define _SHA256object_CAST(op) ((SHA256object *)(op)) #define _SHA512object_CAST(op) ((SHA512object *)(op)) -#include "clinic/sha2module.c.h" +// --- Module state ----------------------------------------------------------- /* We shall use run-time type information in the remainder of this module to * tell apart SHA2-224 and SHA2-256 */ @@ -83,6 +74,19 @@ sha2_get_state(PyObject *module) return (sha2_state *)state; } +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _sha2 +class SHA256Type "SHA256object *" "&PyType_Type" +class SHA512Type "SHA512object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5315a7b611c9afc]*/ + +#include "clinic/sha2module.c.h" + +// --- SHA-2 object interface ------------------------------------------------- + static int SHA256copy(SHA256object *src, SHA256object *dest) { diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 68b239352769a1..face90a6094beb 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -9,6 +9,7 @@ * Greg Stein (gstein@lyra.org) * Trevor Perrin (trevp@trevp.net) * Gregory P. Smith (greg@krypto.org) + * Bénédikt Tran (10796600+picnixz@users.noreply.github.com) * * Copyright (C) 2012-2022 Christian Heimes (christian@python.org) * Licensed to PSF under a Contributor Agreement. @@ -24,6 +25,8 @@ #include "pycore_typeobject.h" // _PyType_GetModuleState() #include "hashlib.h" +#include "_hacl/Hacl_Hash_SHA3.h" + /* * Assert that 'LEN' can be safely casted to uint32_t. * @@ -37,6 +40,8 @@ #define SHA3_MAX_DIGESTSIZE 64 /* 64 Bytes (512 Bits) for 224 to 512 */ +// --- Module state ----------------------------------------------------------- + typedef struct { PyTypeObject *sha3_224_type; PyTypeObject *sha3_256_type; @@ -54,21 +59,10 @@ sha3_get_state(PyObject *module) return (SHA3State *)state; } -/*[clinic input] -module _sha3 -class _sha3.sha3_224 "SHA3object *" "&SHA3_224typ" -class _sha3.sha3_256 "SHA3object *" "&SHA3_256typ" -class _sha3.sha3_384 "SHA3object *" "&SHA3_384typ" -class _sha3.sha3_512 "SHA3object *" "&SHA3_512typ" -class _sha3.shake_128 "SHA3object *" "&SHAKE128type" -class _sha3.shake_256 "SHA3object *" "&SHAKE256type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b8a53680f370285a]*/ +// --- Module objects --------------------------------------------------------- /* The structure for storing SHA3 info */ -#include "_hacl/Hacl_Hash_SHA3.h" - typedef struct { HASHLIB_OBJECT_HEAD Hacl_Hash_SHA3_state_t *hash_state; @@ -76,8 +70,23 @@ typedef struct { #define _SHA3object_CAST(op) ((SHA3object *)(op)) +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _sha3 +class _sha3.sha3_224 "SHA3object *" "&PyType_Type" +class _sha3.sha3_256 "SHA3object *" "&PyType_Type" +class _sha3.sha3_384 "SHA3object *" "&PyType_Type" +class _sha3.sha3_512 "SHA3object *" "&PyType_Type" +class _sha3.shake_128 "SHA3object *" "&PyType_Type" +class _sha3.shake_256 "SHA3object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ccd22550c7fb99bf]*/ + #include "clinic/sha3module.c.h" +// --- SHA-3 object interface ------------------------------------------------- + static SHA3object * newSHA3object(PyTypeObject *type) { From 0d9d48959e050b66cb37a333940ebf4dc2a74e15 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 23 Jun 2025 11:52:28 +0530 Subject: [PATCH 652/989] add async generators section to asyncio internal docs (#135674) --- InternalDocs/asyncio.md | 126 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 4 deletions(-) diff --git a/InternalDocs/asyncio.md b/InternalDocs/asyncio.md index b60fe70478a6bc..22159852ca54db 100644 --- a/InternalDocs/asyncio.md +++ b/InternalDocs/asyncio.md @@ -2,10 +2,12 @@ asyncio ======= -This document describes the working and implementation details of C -implementation of the +This document describes the working and implementation details of the [`asyncio`](https://docs.python.org/3/library/asyncio.html) module. +**The following section describes the implementation details of the C implementation**. + +# Task management ## Pre-Python 3.14 implementation @@ -158,7 +160,8 @@ flowchart TD subgraph two["Thread deallocating"] A1{"thread's task list empty?
llist_empty(tstate->asyncio_tasks_head)"} A1 --> |true| B1["deallocate thread
free_threadstate(tstate)"] - A1 --> |false| C1["add tasks to interpreter's task list
llist_concat(&tstate->interp->asyncio_tasks_head,tstate->asyncio_tasks_head)"] + A1 --> |false| C1["add tasks to interpreter's task list
llist_concat(&tstate->interp->asyncio_tasks_head, + &tstate->asyncio_tasks_head)"] C1 --> B1 end @@ -205,6 +208,121 @@ In free-threading, it avoids contention on a global dictionary as threads can access the current task of thier running loop without any locking. +--- + +**The following section describes the implementation details of the Python implementation**. + +# async generators + +This section describes the implementation details of async generators in `asyncio`. + +Since async generators are meant to be used from coroutines, +their finalization (execution of finally blocks) needs +to be done while the loop is running. +Most async generators are closed automatically +when they are fully iterated over and exhausted; however, +if the async generator is not fully iterated over, +it may not be closed properly, leading to the `finally` blocks not being executed. + +Consider the following code: +```py +import asyncio + +async def agen(): + try: + yield 1 + finally: + await asyncio.sleep(1) + print("finally executed") + + +async def main(): + async for i in agen(): + break + +loop = asyncio.EventLoop() +loop.run_until_complete(main()) +``` + +The above code will not print "finally executed", because the +async generator `agen` is not fully iterated over +and it is not closed manually by awaiting `agen.aclose()`. + +To solve this, `asyncio` uses the `sys.set_asyncgen_hooks` function to +set hooks for finalizing async generators as described in +[PEP 525](https://peps.python.org/pep-0525/). + +- **firstiter hook**: When the async generator is iterated over for the first time, +the *firstiter hook* is called. The async generator is added to `loop._asyncgens` WeakSet +and the event loop tracks all active async generators. + +- **finalizer hook**: When the async generator is about to be finalized, +the *finalizer hook* is called. The event loop removes the async generator +from `loop._asyncgens` WeakSet, and schedules the finalization of the async +generator by creating a task calling `agen.aclose()`. This ensures that the +finally block is executed while the event loop is running. When the loop is +shutting down, the loop checks if there are active async generators and if so, +it similarly schedules the finalization of all active async generators by calling +`agen.aclose()` on each of them and waits for them to complete before shutting +down the loop. + +This ensures that the async generator's `finally` blocks are executed even +if the generator is not explicitly closed. + +Consider the following example: + +```python +import asyncio + +async def agen(): + try: + yield 1 + yield 2 + finally: + print("executing finally block") + +async def main(): + async for item in agen(): + print(item) + break # not fully iterated + +asyncio.run(main()) +``` + +```mermaid +flowchart TD + subgraph one["Loop running"] + A["asyncio.run(main())"] --> B + B["set async generator hooks
sys.set_asyncgen_hooks()"] --> C + C["async for item in agen"] --> F + F{"first iteration?"} --> |true|D + F{"first iteration?"} --> |false|H + D["calls firstiter hook
loop._asyncgen_firstiter_hook(agen)"] --> E + E["add agen to WeakSet
loop._asyncgens.add(agen)"] --> H + H["item = await agen.\_\_anext\_\_()"] --> J + J{"StopAsyncIteration?"} --> |true|M + J{"StopAsyncIteration?"} --> |false|I + I["print(item)"] --> S + S{"continue iterating?"} --> |true|C + S{"continue iterating?"} --> |false|M + M{"agen is no longer referenced?"} --> |true|N + M{"agen is no longer referenced?"} --> |false|two + N["finalize agen
_PyGen_Finalize(agen)"] --> O + O["calls finalizer hook
loop._asyncgen_finalizer_hook(agen)"] --> P + P["remove agen from WeakSet
loop._asyncgens.discard(agen)"] --> Q + Q["schedule task to close it
self.create_task(agen.aclose())"] --> R + R["print('executing finally block')"] --> E1 + + end + + subgraph two["Loop shutting down"] + A1{"check for alive async generators?"} --> |true|B1 + B1["close all async generators
await asyncio.gather\(*\[ag.aclose\(\) for ag in loop._asyncgens\]"] --> R + A1{"check for alive async generators?"} --> |false|E1 + E1["loop.close()"] + end + +``` [^1]: https://github.com/python/cpython/issues/123089 -[^2]: https://github.com/python/cpython/issues/80788 \ No newline at end of file +[^2]: https://github.com/python/cpython/issues/80788 From 6ab842fce50a6125797bcddfc4a4b2622aa6c6a9 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Mon, 23 Jun 2025 06:28:05 -0400 Subject: [PATCH 653/989] gh-134986: Catch PermissionError when trying to call perf in tests (#134987) Using Ubuntu 24.04 on the Windows Subsystem for Linux, perf will raise a `PermissionError` instead of `FileNotFoundError`. This commit modifies the tests to catch that. --- Lib/test/test_perf_profiler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index 21d097dbb559ec..7529c853f9c152 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -506,9 +506,12 @@ def _is_perf_version_at_least(major, minor): # The output of perf --version looks like "perf version 6.7-3" but # it can also be perf version "perf version 5.15.143", or even include # a commit hash in the version string, like "6.12.9.g242e6068fd5c" + # + # PermissionError is raised if perf does not exist on the Windows Subsystem + # for Linux, see #134987 try: output = subprocess.check_output(["perf", "--version"], text=True) - except (subprocess.CalledProcessError, FileNotFoundError): + except (subprocess.CalledProcessError, FileNotFoundError, PermissionError): return False version = output.split()[2] version = version.split("-")[0] From 6aa0826ed7688e5f40742cdcaf57420b284e194f Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 23 Jun 2025 12:35:59 +0200 Subject: [PATCH 654/989] gh-89488: Add warning about `Py_BuildValue("p")` needing exact `int` (GH-135610) --- Doc/c-api/arg.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 49dbc8d71cce62..ab9f9c4539ae9a 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -686,6 +686,12 @@ Building values ``p`` (:class:`bool`) [int] Convert a C :c:expr:`int` to a Python :class:`bool` object. + Be aware that this format requires an ``int`` argument. + Unlike most other contexts in C, variadic arguments are not coerced to + a suitable type automatically. + You can convert another type (for example, a pointer or a float) to a + suitable ``int`` value using ``(x) ? 1 : 0`` or ``!!x``. + .. versionadded:: 3.14 ``c`` (:class:`bytes` of length 1) [char] From 396ca9a641429e59e09d3a3c28dc85623197797f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Jun 2025 12:49:27 +0200 Subject: [PATCH 655/989] gh-135823: improve error message in `netrc` security checks (#135827) --- Lib/netrc.py | 4 ++-- .../Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst diff --git a/Lib/netrc.py b/Lib/netrc.py index bd003e80a48081..2f502c1d53364f 100644 --- a/Lib/netrc.py +++ b/Lib/netrc.py @@ -162,8 +162,8 @@ def _security_check(self, fp, default_netrc, login): fowner = _getpwuid(prop.st_uid) user = _getpwuid(current_user_id) raise NetrcParseError( - (f"~/.netrc file owner ({fowner}, {user}) does not match" - " current user")) + f"~/.netrc file owner ({fowner}) does not match" + f" current user ({user})") if (prop.st_mode & (stat.S_IRWXG | stat.S_IRWXO)): raise NetrcParseError( "~/.netrc access too permissive: access" diff --git a/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst b/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst new file mode 100644 index 00000000000000..5b9d89caae7a55 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst @@ -0,0 +1,3 @@ +:mod:`netrc`: improve the error message when the security check for the +ownership of the default configuration file ``~/.netrc`` fails. Patch by +Bénédikt Tran. From b3ae76911df465ade76c267c5fa2a02cf1138b48 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 23 Jun 2025 15:29:30 +0300 Subject: [PATCH 656/989] Bump mypy to 1.16.1 (#135720) --- Lib/_pyrepl/base_eventqueue.py | 6 +++--- Tools/cases_generator/tier2_generator.py | 2 +- Tools/requirements-dev.txt | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/_pyrepl/base_eventqueue.py b/Lib/_pyrepl/base_eventqueue.py index 842599bd1877fb..0589a0f437ec7c 100644 --- a/Lib/_pyrepl/base_eventqueue.py +++ b/Lib/_pyrepl/base_eventqueue.py @@ -87,7 +87,7 @@ def push(self, char: int | bytes) -> None: if isinstance(k, dict): self.keymap = k else: - self.insert(Event('key', k, self.flush_buf())) + self.insert(Event('key', k, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap elif self.buf and self.buf[0] == 27: # escape @@ -96,7 +96,7 @@ def push(self, char: int | bytes) -> None: # the docstring in keymap.py trace('unrecognized escape sequence, propagating...') self.keymap = self.compiled_keymap - self.insert(Event('key', '\033', bytearray(b'\033'))) + self.insert(Event('key', '\033', b'\033')) for _c in self.flush_buf()[1:]: self.push(_c) @@ -106,5 +106,5 @@ def push(self, char: int | bytes) -> None: except UnicodeError: return else: - self.insert(Event('key', decoded, self.flush_buf())) + self.insert(Event('key', decoded, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 276f306dfffa18..fc3bc47286f7f6 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -91,7 +91,7 @@ def deopt_if( self.emit("}\n") return not always_true(first_tkn) - def exit_if( # type: ignore[override] + def exit_if( self, tkn: Token, tkn_iter: TokenIterator, diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index 5bf180bb30a310..0beaab2d3e7157 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -1,7 +1,7 @@ # Requirements file for external linters and checks we run on # Tools/clinic, Tools/cases_generator/, and Tools/peg_generator/ in CI -mypy==1.15 +mypy==1.16.1 # needed for peg_generator: -types-psutil==6.0.0.20240901 -types-setuptools==74.0.0.20240831 +types-psutil==7.0.0.20250601 +types-setuptools==80.9.0.20250529 From 99712c45ccaab4bbe83eac38d73bab91bf0f4a02 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 23 Jun 2025 21:38:57 +0530 Subject: [PATCH 657/989] GH-124878: reenable `test_finalize_daemon_thread_hang` test under TSAN (#135793) --- Lib/test/test_threading.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 125c27446986c0..13b55d0f0a2e73 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1284,12 +1284,6 @@ def f(): @cpython_only def test_finalize_daemon_thread_hang(self): - if support.check_sanitizer(thread=True, memory=True): - # the thread running `time.sleep(100)` below will still be alive - # at process exit - self.skipTest( - "https://github.com/python/cpython/issues/124878 - Known" - " race condition that TSAN identifies.") # gh-87135: tests that daemon threads hang during finalization script = textwrap.dedent(''' import os From 569fc6870f048cb75469ae3cacb6ebcf5172a10e Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Tue, 24 Jun 2025 00:57:14 +0800 Subject: [PATCH 658/989] gh-134584: Specialize POP_TOP by reference and type in JIT (GH-135761) --- Include/internal/pycore_uop_ids.h | 90 ++++++++++--------- Include/internal/pycore_uop_metadata.h | 16 ++++ Lib/test/test_capi/test_opt.py | 40 +++++++++ ...-06-20-14-50-44.gh-issue-134584.3CJdAI.rst | 1 + Python/bytecodes.c | 21 +++++ Python/executor_cases.c.h | 40 +++++++++ Python/optimizer_analysis.c | 2 +- Python/optimizer_bytecodes.c | 28 +++++- Python/optimizer_cases.c.h | 47 +++++++++- Python/optimizer_symbols.c | 3 - Tools/cases_generator/analyzer.py | 1 + .../opcode_metadata_generator.py | 12 +-- 12 files changed, 239 insertions(+), 62 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index aa11ddb75e19fb..a9432401525ebb 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -284,72 +284,76 @@ extern "C" { #define _POP_JUMP_IF_FALSE 500 #define _POP_JUMP_IF_TRUE 501 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 502 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 503 -#define _POP_TWO 504 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_FLOAT 502 +#define _POP_TOP_INT 503 +#define _POP_TOP_LOAD_CONST_INLINE 504 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_NOP 506 +#define _POP_TOP_UNICODE 507 +#define _POP_TWO 508 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 506 +#define _PUSH_FRAME 510 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 507 -#define _PY_FRAME_GENERAL 508 -#define _PY_FRAME_KW 509 -#define _QUICKEN_RESUME 510 -#define _REPLACE_WITH_TRUE 511 +#define _PUSH_NULL_CONDITIONAL 511 +#define _PY_FRAME_GENERAL 512 +#define _PY_FRAME_KW 513 +#define _QUICKEN_RESUME 514 +#define _REPLACE_WITH_TRUE 515 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 512 -#define _SEND 513 -#define _SEND_GEN_FRAME 514 +#define _SAVE_RETURN_OFFSET 516 +#define _SEND 517 +#define _SEND_GEN_FRAME 518 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 515 -#define _STORE_ATTR 516 -#define _STORE_ATTR_INSTANCE_VALUE 517 -#define _STORE_ATTR_SLOT 518 -#define _STORE_ATTR_WITH_HINT 519 +#define _START_EXECUTOR 519 +#define _STORE_ATTR 520 +#define _STORE_ATTR_INSTANCE_VALUE 521 +#define _STORE_ATTR_SLOT 522 +#define _STORE_ATTR_WITH_HINT 523 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 520 -#define _STORE_FAST_0 521 -#define _STORE_FAST_1 522 -#define _STORE_FAST_2 523 -#define _STORE_FAST_3 524 -#define _STORE_FAST_4 525 -#define _STORE_FAST_5 526 -#define _STORE_FAST_6 527 -#define _STORE_FAST_7 528 +#define _STORE_FAST 524 +#define _STORE_FAST_0 525 +#define _STORE_FAST_1 526 +#define _STORE_FAST_2 527 +#define _STORE_FAST_3 528 +#define _STORE_FAST_4 529 +#define _STORE_FAST_5 530 +#define _STORE_FAST_6 531 +#define _STORE_FAST_7 532 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 529 -#define _STORE_SUBSCR 530 -#define _STORE_SUBSCR_DICT 531 -#define _STORE_SUBSCR_LIST_INT 532 -#define _SWAP 533 -#define _SWAP_2 534 -#define _SWAP_3 535 -#define _TIER2_RESUME_CHECK 536 -#define _TO_BOOL 537 +#define _STORE_SLICE 533 +#define _STORE_SUBSCR 534 +#define _STORE_SUBSCR_DICT 535 +#define _STORE_SUBSCR_LIST_INT 536 +#define _SWAP 537 +#define _SWAP_2 538 +#define _SWAP_3 539 +#define _TIER2_RESUME_CHECK 540 +#define _TO_BOOL 541 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 538 +#define _TO_BOOL_LIST 542 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 539 +#define _TO_BOOL_STR 543 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 540 -#define _UNPACK_SEQUENCE_LIST 541 -#define _UNPACK_SEQUENCE_TUPLE 542 -#define _UNPACK_SEQUENCE_TWO_TUPLE 543 +#define _UNPACK_SEQUENCE 544 +#define _UNPACK_SEQUENCE_LIST 545 +#define _UNPACK_SEQUENCE_TUPLE 546 +#define _UNPACK_SEQUENCE_TWO_TUPLE 547 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 543 +#define MAX_UOP_ID 547 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 11345a00785817..52cbc2fffe484e 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -64,6 +64,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TOP_NOP] = 0, + [_POP_TOP_INT] = 0, + [_POP_TOP_FLOAT] = 0, + [_POP_TOP_UNICODE] = 0, [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, @@ -593,8 +597,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_ITER] = "_POP_ITER", [_POP_TOP] = "_POP_TOP", + [_POP_TOP_FLOAT] = "_POP_TOP_FLOAT", + [_POP_TOP_INT] = "_POP_TOP_INT", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TOP_NOP] = "_POP_TOP_NOP", + [_POP_TOP_UNICODE] = "_POP_TOP_UNICODE", [_POP_TWO] = "_POP_TWO", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", @@ -749,6 +757,14 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_TOP: return 1; + case _POP_TOP_NOP: + return 1; + case _POP_TOP_INT: + return 1; + case _POP_TOP_FLOAT: + return 1; + case _POP_TOP_UNICODE: + return 1; case _POP_TWO: return 2; case _PUSH_NULL: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 84e864b44b9544..0a85b19e06f158 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2392,6 +2392,46 @@ def testfunc(n): assert ex is not None """)) + def test_pop_top_specialize_none(self): + def testfunc(n): + for _ in range(n): + global_identity(None) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_NOP", uops) + + def test_pop_top_specialize_int(self): + def testfunc(n): + for _ in range(n): + global_identity(100000) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_INT", uops) + + def test_pop_top_specialize_float(self): + def testfunc(n): + for _ in range(n): + global_identity(1e6) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_FLOAT", uops) + + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst new file mode 100644 index 00000000000000..715ac7dc9253d5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst @@ -0,0 +1 @@ +Specialize :opcode:`POP_TOP` in the JIT compiler by specializing for reference lifetime and type. This will also enable easier top of stack caching in the JIT compiler. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 307844d38ccfcc..535e552e047475 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -344,6 +344,27 @@ dummy_func( PyStackRef_XCLOSE(value); } + op(_POP_TOP_NOP, (value --)) { + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + DEAD(value); + } + + op(_POP_TOP_INT, (value --)) { + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + + op(_POP_TOP_FLOAT, (value --)) { + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + } + + op(_POP_TOP_UNICODE, (value --)) { + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } + tier2 op(_POP_TWO, (nos, tos --)) { PyStackRef_CLOSE(tos); PyStackRef_CLOSE(nos); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f506172550afe..46fc164a5b3bc2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -539,6 +539,46 @@ break; } + case _POP_TOP_NOP: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TWO: { _PyStackRef tos; _PyStackRef nos; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b1a63e3d2916f..145a8c118d3612 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -345,7 +345,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 3f2e2e0351e052..f8fbaf232ffa2e 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -34,7 +34,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness @@ -534,7 +534,7 @@ dummy_func(void) { } op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { @@ -542,7 +542,7 @@ dummy_func(void) { } op(_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { @@ -561,6 +561,24 @@ dummy_func(void) { value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } + op(_POP_TOP, (value -- )) { + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + } + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = bottom; @@ -803,7 +821,9 @@ dummy_func(void) { } op(_RETURN_VALUE, (retval -- res)) { - JitOptRef temp = retval; + // We wrap and unwrap the value to mimic PyStackRef_MakeHeapSafe + // in bytecodes.c + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); DEAD(retval); SAVE_STACK(); ctx->frame->stack_pointer = stack_pointer; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 91927180b3509d..1e581afadc9569 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -100,6 +100,47 @@ } case _POP_TOP: { + JitOptRef value; + value = stack_pointer[-1]; + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_NOP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -784,7 +825,7 @@ JitOptRef retval; JitOptRef res; retval = stack_pointer[-1]; - JitOptRef temp = retval; + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; @@ -2660,7 +2701,7 @@ case _LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -2670,7 +2711,7 @@ case _POP_TOP_LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[-1] = value; break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 64cc1b9074fcf0..c3d9e0e778bf55 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -668,9 +668,6 @@ _Py_uop_symbol_is_immortal(JitOptSymbol *sym) if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) { return sym->cls.type == &PyBool_Type; } - if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { - return true; - } return false; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6ff0223d2ef3e7..6466d2615cd14e 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -596,6 +596,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsNull", "PyStackRef_MakeHeapSafe", "PyStackRef_None", + "PyStackRef_RefcountOnObject", "PyStackRef_TYPE", "PyStackRef_True", "PyTuple_GET_ITEM", diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 10567204dcc599..0bcdc5395dcd8e 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -242,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: assert name2 in analysis.instructions, f"{name2} doesn't match any instr" instr1 = analysis.instructions[name1] instr2 = analysis.instructions[name2] - assert ( - len(instr1.parts) == 1 - ), f"{name1} is not a good superinstruction part" - assert ( - len(instr2.parts) == 1 - ), f"{name2} is not a good superinstruction part" - expansions.append((instr1.parts[0].name, "OPARG_TOP", 0)) - expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0)) + for part in instr1.parts: + expansions.append((part.name, "OPARG_TOP", 0)) + for part in instr2.parts: + expansions.append((part.name, "OPARG_BOTTOM", 0)) elif not is_viable_expansion(inst): continue else: From bda121862e7d7f4684d9f0281f7d1f408c0c740c Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Mon, 23 Jun 2025 22:42:09 +0300 Subject: [PATCH 659/989] gh-131798: Optimize `_UNARY_NEGATIVE` (GH-135223) --- Lib/test/test_capi/test_opt.py | 18 ++++++++++++++++++ ...5-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst | 1 + Python/optimizer_bytecodes.c | 8 +++++++- Python/optimizer_cases.c.h | 8 +++++++- 4 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0a85b19e06f158..e4c9a463855a69 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2432,6 +2432,24 @@ def testfunc(n): self.assertIn("_POP_TOP_FLOAT", uops) + def test_unary_negative_long_float_type(self): + def testfunc(n): + for _ in range(n): + a = 9397 + f = 9397.0 + x = -a + -a + y = -f + -f + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertNotIn("_GUARD_TOS_INT", uops) + self.assertNotIn("_GUARD_NOS_INT", uops) + self.assertNotIn("_GUARD_TOS_FLOAT", uops) + self.assertNotIn("_GUARD_NOS_FLOAT", uops) def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst new file mode 100644 index 00000000000000..6a9d9c683f9a8c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst @@ -0,0 +1 @@ +Optimize ``_UNARY_NEGATIVE`` in JIT-compiled code. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f8fbaf232ffa2e..f8a0484bdc2b04 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -452,7 +452,13 @@ dummy_func(void) { res = sym_new_compact_int(ctx); } else { - res = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(value); + if (type == &PyLong_Type || type == &PyFloat_Type) { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 1e581afadc9569..10767ccdbd57f5 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -190,7 +190,13 @@ res = sym_new_compact_int(ctx); } else { - res = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(value); + if (type == &PyLong_Type || type == &PyFloat_Type) { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } } stack_pointer[-1] = res; break; From ceae4edf81ff35c01616c09b3ed8ddd2936a8f3a Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 23 Jun 2025 15:09:40 -0700 Subject: [PATCH 660/989] gh-119786: Add InternalDocs/qsbr.md. (gh-135411) Add internal doc for the Quiescent-State Based Reclamation (QSBR) implementation. --- InternalDocs/README.md | 3 +- InternalDocs/qsbr.md | 129 +++++++++++++++++++++++++++++++++++++++++ Python/qsbr.c | 2 +- 3 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 InternalDocs/qsbr.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index c20aa015c5bb74..6b1d919826402c 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -42,8 +42,9 @@ Program Execution - [Exception Handling](exception_handling.md) +- [Quiescent-State Based Reclamation (QSBR)](qsbr.md) Modules --- -- [asyncio](asyncio.md) \ No newline at end of file +- [asyncio](asyncio.md) diff --git a/InternalDocs/qsbr.md b/InternalDocs/qsbr.md new file mode 100644 index 00000000000000..1c4a79a7b44436 --- /dev/null +++ b/InternalDocs/qsbr.md @@ -0,0 +1,129 @@ +# Quiescent-State Based Reclamation (QSBR) + +## Introduction + +When implementing lock-free data structures, a key challenge is determining +when it is safe to free memory that has been logically removed from a +structure. Freeing memory too early can lead to use-after-free bugs if another +thread is still accessing it. Freeing it too late results in excessive memory +consumption. + +Safe memory reclamation (SMR) schemes address this by delaying the free +operation until all concurrent read accesses are guaranteed to have completed. +Quiescent-State Based Reclamation (QSBR) is a SMR scheme used in Python's +free-threaded build to manage the lifecycle of shared memory. + +QSBR requires threads to periodically report that they are in a quiescent +state. A thread is in a quiescent state if it holds no references to shared +objects that might be reclaimed. Think of it as a checkpoint where a thread +signals, "I am not in the middle of any operation that relies on a shared +resource." In Python, the eval_breaker provides a natural and convenient place +for threads to report this state. + + +## Use in Free-Threaded Python + +While CPython's memory management is dominated by reference counting and a +tracing garbage collector, these mechanisms are not suitable for all data +structures. For example, the backing array of a list object is not individually +reference-counted but may have a shorter lifetime than the `PyListObject` that +contains it. We could delay reclamation until the next GC run, but we want +reclamation to be prompt and to run the GC less frequently in the free-threaded +build, as it requires pausing all threads. + +Many operations in the free-threaded build are protected by locks. However, for +performance-critical code, we want to allow reads to happen concurrently with +updates. For instance, we want to avoid locking during most list read accesses. +If a list is resized while another thread is reading it, QSBR provides the +mechanism to determine when it is safe to free the list's old backing array. + +Specific use cases for QSBR include: + +* Dictionary keys (`PyDictKeysObject`) and list arrays (`_PyListArray`): When a +dictionary or list that may be shared between threads is resized, we use QSBR +to delay freeing the old keys or array until it's safe. For dicts and lists +that are not shared, their storage can be freed immediately upon resize. + +* Mimalloc `mi_page_t`: Non-locking dictionary and list accesses require +cooperation from the memory allocator. If an object is freed and its memory is +reused, we must ensure the new object's reference count field is at the same +memory location. In practice, this means when a mimalloc page (`mi_page_t`) +becomes empty, we don't immediately allow it to be reused for allocations of a +different size class. QSBR is used to determine when it's safe to repurpose the +page or return its memory to the OS. + + +## Implementation Details + + +### Core Implementation + +The proposal to add QSBR to Python is contained in +[Github issue 115103](https://github.com/python/cpython/issues/115103). +Many details of that proposal have been copied here, so they can be kept +up-to-date with the actual implementation. + +Python's QSBR implementation is based on FreeBSD's "Global Unbounded +Sequences." [^1][^2][^3]. It relies on a few key counters: + +* Global Write Sequence (`wr_seq`): A per-interpreter counter, `wr_seq`, is started +at 1 and incremented by 2 each time it is advanced. This ensures its value is +always odd, which can be used to distinguish it from other state values. When +an object needs to be reclaimed, `wr_seq` is advanced, and the object is tagged +with this new sequence number. + +* Per-Thread Read Sequence: Each thread has a local read sequence counter. When +a thread reaches a quiescent state (e.g., at the eval_breaker), it copies the +current global `wr_seq` to its local counter. + +* Global Read Sequence (`rd_seq`): This per-interpreter value stores the minimum +of all per-thread read sequence counters (excluding detached threads). It is +updated by a "polling" operation. + +To free an object, the following steps are taken: + +1. Advance the global `wr_seq`. + +2. Add the object's pointer to a deferred-free list, tagging it with the new + `wr_seq` value as its qsbr_goal. + +Periodically, a polling mechanism processes this deferred-free list: + +1. The minimum read sequence value across all active threads is calculated and + stored as the global `rd_seq`. + +2. For each item on the deferred-free list, if its qsbr_goal is less than or + equal to the new `rd_seq`, its memory is freed, and it is removed from the: + list. Otherwise, it remains on the list for a future attempt. + + +### Deferred Advance Optimization + +To reduce memory contention from frequent updates to the global `wr_seq`, its +advancement is sometimes deferred. Instead of incrementing `wr_seq` on every +reclamation request, each thread tracks its number of deferrals locally. Once +the deferral count reaches a limit (QSBR_DEFERRED_LIMIT, currently 10), the +thread advances the global `wr_seq` and resets its local count. + +When an object is added to the deferred-free list, its qsbr_goal is set to +`wr_seq` + 2. By setting the goal to the next sequence value, we ensure it's safe +to defer the global counter advancement. This optimization improves runtime +speed but may increase peak memory usage by slightly delaying when memory can +be reclaimed. + + +## Limitations + +Determining the `rd_seq` requires scanning over all thread states. This operation +could become a bottleneck in applications with a very large number of threads +(e.g., >1,000). Future work may address this with more advanced mechanisms, +such as a tree-based structure or incremental scanning. For now, the +implementation prioritizes simplicity, with plans for refinement if +multi-threaded benchmarks reveal performance issues. + + +## References + +[^1]: https://youtu.be/ZXUIFj4nRjk?t=694 +[^2]: https://people.kernel.org/joelfernandes/gus-vs-rcu +[^3]: http://bxr.su/FreeBSD/sys/kern/subr_smr.c#44 diff --git a/Python/qsbr.c b/Python/qsbr.c index bf34fb2523dfc8..afa03776c26b6f 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -1,6 +1,6 @@ /* * Implementation of safe memory reclamation scheme using - * quiescent states. + * quiescent states. See InternalDocs/qsbr.md. * * This is derived from the "GUS" safe memory reclamation technique * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs From caad163b691b2343d823541cfbf741f481ee9f3e Mon Sep 17 00:00:00 2001 From: Yongzi Li <204532581+Yzi-Li@users.noreply.github.com> Date: Tue, 24 Jun 2025 06:53:33 +0800 Subject: [PATCH 661/989] Docs: Use `arguments` to replace `args` in `argparse.rst` (GH-135510) --- Doc/library/argparse.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 17f126cc065a82..a03d88092dbf1f 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -955,7 +955,7 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: .. index:: single: + (plus); in argparse module -* ``'+'``. Just like ``'*'``, all command-line args present are gathered into a +* ``'+'``. Just like ``'*'``, all command-line arguments present are gathered into a list. Additionally, an error message will be generated if there wasn't at least one command-line argument present. For example:: From 2793b68f758c10fb63b264787f10d46a71fc8086 Mon Sep 17 00:00:00 2001 From: Vincent Poulailleau Date: Tue, 24 Jun 2025 01:36:30 +0200 Subject: [PATCH 662/989] Fix example according to PEP 750 in "What's new in 3.14" (GH-134727) A redundant extra part was written. Added a closing tag, to match the usage in PEP 750. --- Doc/whatsnew/3.14.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f0a87a9ada7bab..8b20e42d7d8e07 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -278,7 +278,7 @@ As another example, generating HTML attributes from data: attributes = {"src": "shrubbery.jpg", "alt": "looks nice"} template = t"" - assert html(template) == 'looks nice' + assert html(template) == 'looks nice' Compared to using an f-string, the ``html`` function has access to template attributes containing the original information: static strings, interpolations, and values From 34393cbdd46fd965de86f1e7bc89ab111f506723 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Tue, 24 Jun 2025 08:55:50 +0800 Subject: [PATCH 663/989] gh-135648: Document that `shutil.copyfileobj` doesn't flush (#135737) Adds a note about flush/close on copyfileobj, and updates the Emscripten build script to follow documented advice. --- Doc/library/shutil.rst | 7 +++++++ Tools/wasm/emscripten/__main__.py | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index e7c4c4f46bd011..2dde40c9d92f45 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -47,6 +47,13 @@ Directory and files operations 0, only the contents from the current file position to the end of the file will be copied. + :func:`copyfileobj` will *not* guarantee that the destination stream has + been flushed on completion of the copy. If you want to read from the + destination at the completion of the copy operation (for example, reading + the contents of a temporary file that has been copied from a HTTP stream), + you must ensure that you have called :func:`~io.IOBase.flush` or + :func:`~io.IOBase.close` on the file-like object before attempting to read + the destination file. .. function:: copyfile(src, dst, *, follow_symlinks=True) diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index 849bd5de44eb7b..c0d58aeaadd2cf 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -167,11 +167,12 @@ def make_build_python(context, working_dir): @subdir(HOST_BUILD_DIR, clean_ok=True) def make_emscripten_libffi(context, working_dir): shutil.rmtree(working_dir / "libffi-3.4.6", ignore_errors=True) - with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp_file: + with tempfile.NamedTemporaryFile(suffix=".tar.gz", delete_on_close=False) as tmp_file: with urlopen( "https://github.com/libffi/libffi/releases/download/v3.4.6/libffi-3.4.6.tar.gz" ) as response: shutil.copyfileobj(response, tmp_file) + tmp_file.close() shutil.unpack_archive(tmp_file.name, working_dir) call( [EMSCRIPTEN_DIR / "make_libffi.sh"], From ef4fc86afa297071cb0eb3db871640fc14694a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 24 Jun 2025 09:58:07 +0200 Subject: [PATCH 664/989] gh-135532: use `defining_class` for copying BLAKE-2 and SHA-3 objects (#135838) --- Lib/test/test_hashlib.py | 10 ++++++++++ Modules/blake2module.c | 8 +++++--- Modules/clinic/blake2module.c.h | 14 +++++++++----- Modules/clinic/sha3module.c.h | 14 +++++++++----- Modules/sha3module.c | 9 +++++---- 5 files changed, 38 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 6e4e41e4fbb4f1..7b378c45e71563 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -371,6 +371,16 @@ def test_get_builtin_constructor(self): self.assertIs(constructor, _md5.md5) self.assertEqual(sorted(builtin_constructor_cache), ['MD5', 'md5']) + def test_copy(self): + for cons in self.hash_constructors: + h1 = cons(os.urandom(16), usedforsecurity=False) + h2 = h1.copy() + self.assertIs(type(h1), type(h2)) + self.assertEqual(h1.name, h2.name) + size = (16,) if h1.name in self.shakes else () + self.assertEqual(h1.digest(*size), h2.digest(*size)) + self.assertEqual(h1.hexdigest(*size), h2.hexdigest(*size)) + def test_hexdigest(self): for cons in self.hash_constructors: h = cons(usedforsecurity=False) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 6c4349ac06bb8a..9e279e11b518d2 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -787,17 +787,19 @@ blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy) /*[clinic input] _blake2.blake2b.copy + cls: defining_class + Return a copy of the hash object. [clinic start generated code]*/ static PyObject * -_blake2_blake2b_copy_impl(Blake2Object *self) -/*[clinic end generated code: output=622d1c56b91c50d8 input=e383c2d199fd8a2e]*/ +_blake2_blake2b_copy_impl(Blake2Object *self, PyTypeObject *cls) +/*[clinic end generated code: output=5f8ea31c56c52287 input=f38f3475e9aec98d]*/ { int rc; Blake2Object *cpy; - if ((cpy = new_Blake2Object(Py_TYPE(self))) == NULL) { + if ((cpy = new_Blake2Object(cls)) == NULL) { return NULL; } diff --git a/Modules/clinic/blake2module.c.h b/Modules/clinic/blake2module.c.h index 9e9cd56e569b24..97d010d03a4b23 100644 --- a/Modules/clinic/blake2module.c.h +++ b/Modules/clinic/blake2module.c.h @@ -434,15 +434,19 @@ PyDoc_STRVAR(_blake2_blake2b_copy__doc__, "Return a copy of the hash object."); #define _BLAKE2_BLAKE2B_COPY_METHODDEF \ - {"copy", (PyCFunction)_blake2_blake2b_copy, METH_NOARGS, _blake2_blake2b_copy__doc__}, + {"copy", _PyCFunction_CAST(_blake2_blake2b_copy), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _blake2_blake2b_copy__doc__}, static PyObject * -_blake2_blake2b_copy_impl(Blake2Object *self); +_blake2_blake2b_copy_impl(Blake2Object *self, PyTypeObject *cls); static PyObject * -_blake2_blake2b_copy(PyObject *self, PyObject *Py_UNUSED(ignored)) +_blake2_blake2b_copy(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { - return _blake2_blake2b_copy_impl((Blake2Object *)self); + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { + PyErr_SetString(PyExc_TypeError, "copy() takes no arguments"); + return NULL; + } + return _blake2_blake2b_copy_impl((Blake2Object *)self, cls); } PyDoc_STRVAR(_blake2_blake2b_update__doc__, @@ -502,4 +506,4 @@ _blake2_blake2b_hexdigest(PyObject *self, PyObject *Py_UNUSED(ignored)) { return _blake2_blake2b_hexdigest_impl((Blake2Object *)self); } -/*[clinic end generated code: output=eed18dcfaf6f7731 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=60a4abbcb8950fe5 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha3module.c.h b/Modules/clinic/sha3module.c.h index d6d44bf4c514c0..1f631ff406e25b 100644 --- a/Modules/clinic/sha3module.c.h +++ b/Modules/clinic/sha3module.c.h @@ -100,15 +100,19 @@ PyDoc_STRVAR(_sha3_sha3_224_copy__doc__, "Return a copy of the hash object."); #define _SHA3_SHA3_224_COPY_METHODDEF \ - {"copy", (PyCFunction)_sha3_sha3_224_copy, METH_NOARGS, _sha3_sha3_224_copy__doc__}, + {"copy", _PyCFunction_CAST(_sha3_sha3_224_copy), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sha3_sha3_224_copy__doc__}, static PyObject * -_sha3_sha3_224_copy_impl(SHA3object *self); +_sha3_sha3_224_copy_impl(SHA3object *self, PyTypeObject *cls); static PyObject * -_sha3_sha3_224_copy(PyObject *self, PyObject *Py_UNUSED(ignored)) +_sha3_sha3_224_copy(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { - return _sha3_sha3_224_copy_impl((SHA3object *)self); + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { + PyErr_SetString(PyExc_TypeError, "copy() takes no arguments"); + return NULL; + } + return _sha3_sha3_224_copy_impl((SHA3object *)self, cls); } PyDoc_STRVAR(_sha3_sha3_224_digest__doc__, @@ -306,4 +310,4 @@ _sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t narg exit: return return_value; } -/*[clinic end generated code: output=c17e3ec670afe253 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=48be77f8a31e8a3e input=a9049054013a1b77]*/ diff --git a/Modules/sha3module.c b/Modules/sha3module.c index face90a6094beb..5764556bb680f3 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -239,16 +239,17 @@ SHA3_traverse(PyObject *self, visitproc visit, void *arg) /*[clinic input] _sha3.sha3_224.copy + cls: defining_class + Return a copy of the hash object. [clinic start generated code]*/ static PyObject * -_sha3_sha3_224_copy_impl(SHA3object *self) -/*[clinic end generated code: output=6c537411ecdcda4c input=93a44aaebea51ba8]*/ +_sha3_sha3_224_copy_impl(SHA3object *self, PyTypeObject *cls) +/*[clinic end generated code: output=13958b44c244013e input=7134b4dc0a2fbcac]*/ { SHA3object *newobj; - - if ((newobj = newSHA3object(Py_TYPE(self))) == NULL) { + if ((newobj = newSHA3object(cls)) == NULL) { return NULL; } HASHLIB_ACQUIRE_LOCK(self); From 39ea593cbbd800757cbb93e4be3fa79af84e5c8c Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 24 Jun 2025 09:08:23 +0100 Subject: [PATCH 665/989] gh-123299: Update 'What's New in Python 3.14' from `3.14` branch (#135616) --- Doc/whatsnew/3.14.rst | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 8b20e42d7d8e07..cbca720b75e96c 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1757,6 +1757,16 @@ os (Contributed by Cody Maloney in :gh:`129205`.) +os.path +------- + +* The *strict* parameter to :func:`os.path.realpath` accepts a new value, + :data:`os.path.ALLOW_MISSING`. + If used, errors other than :exc:`FileNotFoundError` will be re-raised; + the resulting path can be missing but it will be free of symlinks. + (Contributed by Petr Viktorin for :cve:`2025-4517`.) + + pathlib ------- @@ -1945,6 +1955,28 @@ sysconfig (Contributed by Xuehai Pan in :gh:`131799`.) +tarfile +------- + +* :func:`~tarfile.data_filter` now normalizes symbolic link targets in order to + avoid path traversal attacks. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2025-4138`.) +* :func:`~tarfile.TarFile.extractall` now skips fixing up directory attributes + when a directory was removed or replaced by another kind of file. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2024-12718`.) +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + now (re-)apply the extraction filter when substituting a link (hard or + symbolic) with a copy of another archive member, and when fixing up + directory attributes. + The former raises a new exception, :exc:`~tarfile.LinkFallbackError`. + (Contributed by Petr Viktorin for :cve:`2025-4330` and :cve:`2024-12718`.) +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + no longer extract rejected members when + :func:`~tarfile.TarFile.errorlevel` is zero. + (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` + and :cve:`2025-4435`.) + + threading --------- @@ -2700,6 +2732,7 @@ New features * :c:func:`PyUnicodeWriter_Discard` * :c:func:`PyUnicodeWriter_Finish` * :c:func:`PyUnicodeWriter_Format` + * :c:func:`PyUnicodeWriter_WriteASCII` * :c:func:`PyUnicodeWriter_WriteChar` * :c:func:`PyUnicodeWriter_WriteRepr` * :c:func:`PyUnicodeWriter_WriteStr` @@ -2976,7 +3009,7 @@ Deprecated :c:func:`PyUnicodeWriter_WriteSubstring(writer, str, start, end) `. * :c:func:`!_PyUnicodeWriter_WriteASCIIString`: replace ``_PyUnicodeWriter_WriteASCIIString(&writer, str)`` with - :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. + :c:func:`PyUnicodeWriter_WriteASCII(writer, str) `. * :c:func:`!_PyUnicodeWriter_WriteLatin1String`: replace ``_PyUnicodeWriter_WriteLatin1String(&writer, str)`` with :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. From 2060089254f0b00199b99dd1ae83a3fb139e890c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 24 Jun 2025 09:49:38 +0100 Subject: [PATCH 666/989] GH-135106: Restrict trashcan to GC'ed objects (GH-135682) --- ...-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst | 2 + Objects/object.c | 76 ++++++------------- 2 files changed, 26 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst new file mode 100644 index 00000000000000..b6e953a7719be1 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst @@ -0,0 +1,2 @@ +Restrict the trashcan mechanism to GC'ed objects and untrack them while in +the trashcan to prevent the GC and trashcan mechanisms conflicting. diff --git a/Objects/object.c b/Objects/object.c index 6a581a19604f9d..4d60128b092c22 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -3034,57 +3034,28 @@ Py_ReprLeave(PyObject *obj) /* Trashcan support. */ -#ifndef Py_GIL_DISABLED -/* We need to store a pointer in the refcount field of - * an object. It is important that we never store 0 (NULL). -* It is also important to not make the object appear immortal, -* or it might be untracked by the cycle GC. */ -static uintptr_t -pointer_to_safe_refcount(void *ptr) -{ - uintptr_t full = (uintptr_t)ptr; - assert((full & 3) == 0); -#if SIZEOF_VOID_P > 4 - uint32_t refcnt = (uint32_t)full; - if (refcnt >= (uint32_t)_Py_IMMORTAL_MINIMUM_REFCNT) { - full = full - ((uintptr_t)_Py_IMMORTAL_MINIMUM_REFCNT) + 1; - } - return full + 2; -#else - // Make the top two bits 0, so it appears mortal. - return (full >> 2) + 1; -#endif -} - -static void * -safe_refcount_to_pointer(uintptr_t refcnt) -{ -#if SIZEOF_VOID_P > 4 - if (refcnt & 1) { - refcnt += _Py_IMMORTAL_MINIMUM_REFCNT - 1; - } - return (void *)(refcnt - 2); -#else - return (void *)((refcnt -1) << 2); -#endif -} -#endif - /* Add op to the gcstate->trash_delete_later list. Called when the current - * call-stack depth gets large. op must be a currently untracked gc'ed - * object, with refcount 0. Py_DECREF must already have been called on it. + * call-stack depth gets large. op must be a gc'ed object, with refcount 0. + * Py_DECREF must already have been called on it. */ void _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) { _PyObject_ASSERT(op, Py_REFCNT(op) == 0); + PyTypeObject *tp = Py_TYPE(op); + assert(tp->tp_flags & Py_TPFLAGS_HAVE_GC); + int tracked = 0; + if (tp->tp_is_gc == NULL || tp->tp_is_gc(op)) { + tracked = _PyObject_GC_IS_TRACKED(op); + if (tracked) { + _PyObject_GC_UNTRACK(op); + } + } + uintptr_t tagged_ptr = ((uintptr_t)tstate->delete_later) | tracked; #ifdef Py_GIL_DISABLED - op->ob_tid = (uintptr_t)tstate->delete_later; + op->ob_tid = tagged_ptr; #else - /* Store the delete_later pointer in the refcnt field. */ - uintptr_t refcnt = pointer_to_safe_refcount(tstate->delete_later); - *((uintptr_t*)op) = refcnt; - assert(!_Py_IsImmortal(op)); + _Py_AS_GC(op)->_gc_next = tagged_ptr; #endif tstate->delete_later = op; } @@ -3099,17 +3070,17 @@ _PyTrash_thread_destroy_chain(PyThreadState *tstate) destructor dealloc = Py_TYPE(op)->tp_dealloc; #ifdef Py_GIL_DISABLED - tstate->delete_later = (PyObject*) op->ob_tid; + uintptr_t tagged_ptr = op->ob_tid; op->ob_tid = 0; _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, _Py_REF_MERGED); #else - /* Get the delete_later pointer from the refcnt field. - * See _PyTrash_thread_deposit_object(). */ - uintptr_t refcnt = *((uintptr_t*)op); - tstate->delete_later = safe_refcount_to_pointer(refcnt); - op->ob_refcnt = 0; + uintptr_t tagged_ptr = _Py_AS_GC(op)->_gc_next; + _Py_AS_GC(op)->_gc_next = 0; #endif - + tstate->delete_later = (PyObject *)(tagged_ptr & ~1); + if (tagged_ptr & 1) { + _PyObject_GC_TRACK(op); + } /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but * Py_DECREF was already called on this object, and in @@ -3183,10 +3154,11 @@ void _Py_Dealloc(PyObject *op) { PyTypeObject *type = Py_TYPE(op); + unsigned long gc_flag = type->tp_flags & Py_TPFLAGS_HAVE_GC; destructor dealloc = type->tp_dealloc; PyThreadState *tstate = _PyThreadState_GET(); intptr_t margin = _Py_RecursionLimit_GetMargin(tstate); - if (margin < 2) { + if (margin < 2 && gc_flag) { _PyTrash_thread_deposit_object(tstate, (PyObject *)op); return; } @@ -3232,7 +3204,7 @@ _Py_Dealloc(PyObject *op) Py_XDECREF(old_exc); Py_DECREF(type); #endif - if (tstate->delete_later && margin >= 4) { + if (tstate->delete_later && margin >= 4 && gc_flag) { _PyTrash_thread_destroy_chain(tstate); } } From 15c6d63fe6fc62c6d78d2fad81965a8e6f7b7b98 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 24 Jun 2025 12:21:35 +0200 Subject: [PATCH 667/989] gh-135494: Fix python -m test --pgo -x test_re (#135713) Fix regrtest to support excluding tests from --pgo tests. --- Lib/test/libregrtest/main.py | 16 ++++++++++------ Lib/test/test_regrtest.py | 11 +++++++++++ ...025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst | 2 ++ 3 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index 0d9c059a93872d..a2d01b157ac89b 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -190,6 +190,12 @@ def find_tests(self, tests: TestList | None = None) -> tuple[TestTuple, TestList strip_py_suffix(tests) + exclude_tests = set() + if self.exclude: + for arg in self.cmdline_args: + exclude_tests.add(arg) + self.cmdline_args = [] + if self.pgo: # add default PGO tests if no tests are specified setup_pgo_tests(self.cmdline_args, self.pgo_extended) @@ -200,17 +206,15 @@ def find_tests(self, tests: TestList | None = None) -> tuple[TestTuple, TestList if self.tsan_parallel: setup_tsan_parallel_tests(self.cmdline_args) - exclude_tests = set() - if self.exclude: - for arg in self.cmdline_args: - exclude_tests.add(arg) - self.cmdline_args = [] - alltests = findtests(testdir=self.test_dir, exclude=exclude_tests) if not self.fromfile: selected = tests or self.cmdline_args + if exclude_tests: + # Support "--pgo/--tsan -x test_xxx" command + selected = [name for name in selected + if name not in exclude_tests] if selected: selected = split_test_packages(selected) else: diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index a43d2678ebd3be..5bc3c5924b07fb 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -2346,6 +2346,17 @@ def check(output): output = self.run_tests('-j1', '-v', testname, env=env, isolated=False) check(output) + def test_pgo_exclude(self): + # Get PGO tests + output = self.run_tests('--pgo', '--list-tests') + pgo_tests = output.strip().split() + + # Exclude test_re + output = self.run_tests('--pgo', '--list-tests', '-x', 'test_re') + tests = output.strip().split() + self.assertNotIn('test_re', tests) + self.assertEqual(len(tests), len(pgo_tests) - 1) + class TestUtils(unittest.TestCase): def test_format_duration(self): diff --git a/Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst b/Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst new file mode 100644 index 00000000000000..832d1fe033e229 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst @@ -0,0 +1,2 @@ +Fix regrtest to support excluding tests from ``--pgo`` tests. Patch by +Victor Stinner. From e5f03b94b6d4decbf433d385f692c1b8d9b7e88d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 24 Jun 2025 13:09:46 +0200 Subject: [PATCH 668/989] gh-135487: fix `reprlib.Repr.repr_int` when given very large integers (#135506) --- Lib/reprlib.py | 17 +++++++- Lib/test/test_reprlib.py | 40 +++++++++++++++---- ...-06-14-12-06-55.gh-issue-135487.KdVFff.rst | 2 + 3 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst diff --git a/Lib/reprlib.py b/Lib/reprlib.py index 441d1be4bdede2..ab18247682b69a 100644 --- a/Lib/reprlib.py +++ b/Lib/reprlib.py @@ -181,7 +181,22 @@ def repr_str(self, x, level): return s def repr_int(self, x, level): - s = builtins.repr(x) # XXX Hope this isn't too slow... + try: + s = builtins.repr(x) + except ValueError as exc: + assert 'sys.set_int_max_str_digits()' in str(exc) + # Those imports must be deferred due to Python's build system + # where the reprlib module is imported before the math module. + import math, sys + # Integers with more than sys.get_int_max_str_digits() digits + # are rendered differently as their repr() raises a ValueError. + # See https://github.com/python/cpython/issues/135487. + k = 1 + int(math.log10(abs(x))) + # Note: math.log10(abs(x)) may be overestimated or underestimated, + # but for simplicity, we do not compute the exact number of digits. + max_digits = sys.get_int_max_str_digits() + return (f'<{x.__class__.__name__} instance with roughly {k} ' + f'digits (limit at {max_digits}) at 0x{id(x):x}>') if len(s) > self.maxlong: i = max(0, (self.maxlong-3)//2) j = max(0, self.maxlong-3-i) diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index 16623654c29b28..d5631efcdb75b7 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -151,14 +151,38 @@ def test_frozenset(self): eq(r(frozenset({1, 2, 3, 4, 5, 6, 7})), "frozenset({1, 2, 3, 4, 5, 6, ...})") def test_numbers(self): - eq = self.assertEqual - eq(r(123), repr(123)) - eq(r(123), repr(123)) - eq(r(1.0/3), repr(1.0/3)) - - n = 10**100 - expected = repr(n)[:18] + "..." + repr(n)[-19:] - eq(r(n), expected) + for x in [123, 1.0 / 3]: + self.assertEqual(r(x), repr(x)) + + max_digits = sys.get_int_max_str_digits() + for k in [100, max_digits - 1]: + with self.subTest(f'10 ** {k}', k=k): + n = 10 ** k + expected = repr(n)[:18] + "..." + repr(n)[-19:] + self.assertEqual(r(n), expected) + + def re_msg(n, d): + return (rf'<{n.__class__.__name__} instance with roughly {d} ' + rf'digits \(limit at {max_digits}\) at 0x[a-f0-9]+>') + + k = max_digits + with self.subTest(f'10 ** {k}', k=k): + n = 10 ** k + self.assertRaises(ValueError, repr, n) + self.assertRegex(r(n), re_msg(n, k + 1)) + + for k in [max_digits + 1, 2 * max_digits]: + self.assertGreater(k, 100) + with self.subTest(f'10 ** {k}', k=k): + n = 10 ** k + self.assertRaises(ValueError, repr, n) + self.assertRegex(r(n), re_msg(n, k + 1)) + with self.subTest(f'10 ** {k} - 1', k=k): + n = 10 ** k - 1 + # Here, since math.log10(n) == math.log10(n-1), + # the number of digits of n - 1 is overestimated. + self.assertRaises(ValueError, repr, n) + self.assertRegex(r(n), re_msg(n, k + 1)) def test_instance(self): eq = self.assertEqual diff --git a/Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst b/Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst new file mode 100644 index 00000000000000..3ef51fa31dfac9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst @@ -0,0 +1,2 @@ +Fix :meth:`!reprlib.Repr.repr_int` when given integers with more than +:func:`sys.get_int_max_str_digits` digits. Patch by Bénédikt Tran. From b3ab94acd308591bbdf264f1722fedc7ee25d6fa Mon Sep 17 00:00:00 2001 From: sobolevn Date: Tue, 24 Jun 2025 19:33:25 +0300 Subject: [PATCH 669/989] gh-135878: Fix crash in `types.SimpleNamespace.__repr__` (#135889) Co-authored-by: Peter Bierma --- .../Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst | 3 +++ Objects/namespaceobject.c | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst diff --git a/Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst b/Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst new file mode 100644 index 00000000000000..969cf2dfa40878 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst @@ -0,0 +1,3 @@ +Fixes a crash of :class:`types.SimpleNamespace` on :term:`free threading` builds, +when several threads were calling its :meth:`~object.__repr__` method at the +same time. diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index caebe6bf543567..0fc2bcea4cb06e 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -124,9 +124,10 @@ namespace_repr(PyObject *ns) if (PyUnicode_Check(key) && PyUnicode_GET_LENGTH(key) > 0) { PyObject *value, *item; - value = PyDict_GetItemWithError(d, key); - if (value != NULL) { + int has_key = PyDict_GetItemRef(d, key, &value); + if (has_key == 1) { item = PyUnicode_FromFormat("%U=%R", key, value); + Py_DECREF(value); if (item == NULL) { loop_error = 1; } @@ -135,7 +136,7 @@ namespace_repr(PyObject *ns) Py_DECREF(item); } } - else if (PyErr_Occurred()) { + else if (has_key < 0) { loop_error = 1; } } From fea5ccc55d8486300beb1d0254da030a4da10394 Mon Sep 17 00:00:00 2001 From: mpage Date: Tue, 24 Jun 2025 10:02:50 -0700 Subject: [PATCH 670/989] gh-135805: Document the X option and env var for controlling thread-local bytecode (#135868) Document the X option and env var for controlling thread-local bytecode. --- Doc/using/cmdline.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 40a46a62031ef7..a5867b489e0053 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -669,6 +669,13 @@ Miscellaneous options .. versionadded:: 3.14 + * :samp:`-X tlbc={0,1}` enables (1, the default) or disables (0) thread-local + bytecode in builds configured with :option:`--disable-gil`. When disabled, + this also disables the specializing interpreter. See also + :envvar:`PYTHON_TLBC`. + + .. versionadded:: 3.14 + It also allows passing arbitrary values and retrieving them through the :data:`sys._xoptions` dictionary. @@ -1302,6 +1309,16 @@ conflict. .. versionadded:: 3.13 +.. envvar:: PYTHON_TLBC + + If set to ``1`` enables thread-local bytecode. If set to ``0`` thread-local + bytecode and the specializing interpreter are disabled. Only applies to + builds configured with :option:`--disable-gil`. + + See also the :option:`-X tlbc <-X>` command-line option. + + .. versionadded:: 3.14 + Debug-mode variables ~~~~~~~~~~~~~~~~~~~~ From 4e6f0d116e9725efb2d8eb6c3274aa3c024a6fb4 Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Tue, 24 Jun 2025 14:53:14 -0400 Subject: [PATCH 671/989] gh-135855: Raise TypeError When Passing Non-dict Object to `_interpreters.set___main___attrs` (gh-135856) --- Lib/test/test__interpreters.py | 15 +++++++++++++ ...-06-23-10-19-11.gh-issue-135855.-J0AGF.rst | 3 +++ Modules/_interpretersmodule.c | 22 +++++++++---------- Python/crossinterp.c | 1 + 4 files changed, 29 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst diff --git a/Lib/test/test__interpreters.py b/Lib/test/test__interpreters.py index ad3ebbfdff64a7..a32d5d81d2bf2d 100644 --- a/Lib/test/test__interpreters.py +++ b/Lib/test/test__interpreters.py @@ -485,6 +485,21 @@ def test_signatures(self): msg = r'_interpreters.run_func\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.run_func(self.id, lambda: None, shared=1) + # See https://github.com/python/cpython/issues/135855 + msg = r'_interpreters.set___main___attrs\(\) argument 2 must be dict, not int' + with self.assertRaisesRegex(TypeError, msg): + _interpreters.set___main___attrs(self.id, 1) + + def test_invalid_shared_none(self): + msg = r'must be dict, not None' + with self.assertRaisesRegex(TypeError, msg): + _interpreters.exec(self.id, 'a', shared=None) + with self.assertRaisesRegex(TypeError, msg): + _interpreters.run_string(self.id, 'a', shared=None) + with self.assertRaisesRegex(TypeError, msg): + _interpreters.run_func(self.id, lambda: None, shared=None) + with self.assertRaisesRegex(TypeError, msg): + _interpreters.set___main___attrs(self.id, None) def test_invalid_shared_encoding(self): # See https://github.com/python/cpython/issues/127196 diff --git a/Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst b/Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst new file mode 100644 index 00000000000000..fcf495bdceb168 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst @@ -0,0 +1,3 @@ +Raise :exc:`TypeError` instead of :exc:`SystemError` when +:func:`!_interpreters.set___main___attrs` is passed a non-dict object. +Patch by Brian Schubert. diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index e201ccd53daf8a..b920c32474f4e6 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -1039,8 +1039,8 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) PyObject *id, *updates; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "OO|$p:" MODULE_NAME_STR ".set___main___attrs", - kwlist, &id, &updates, &restricted)) + "OO!|$p:" MODULE_NAME_STR ".set___main___attrs", + kwlist, &id, &PyDict_Type, &updates, &restricted)) { return NULL; } @@ -1054,16 +1054,14 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) } // Check the updates. - if (updates != Py_None) { - Py_ssize_t size = PyObject_Size(updates); - if (size < 0) { - return NULL; - } - if (size == 0) { - PyErr_SetString(PyExc_ValueError, - "arg 2 must be a non-empty mapping"); - return NULL; - } + Py_ssize_t size = PyDict_Size(updates); + if (size < 0) { + return NULL; + } + if (size == 0) { + PyErr_SetString(PyExc_ValueError, + "arg 2 must be a non-empty dict"); + return NULL; } _PyXI_session *session = _PyXI_NewSession(); diff --git a/Python/crossinterp.c b/Python/crossinterp.c index c44bcd559000b3..16a23f0351cd26 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -2617,6 +2617,7 @@ _PyXI_Enter(_PyXI_session *session, // Convert the attrs for cross-interpreter use. _PyXI_namespace *sharedns = NULL; if (nsupdates != NULL) { + assert(PyDict_Check(nsupdates)); Py_ssize_t len = PyDict_Size(nsupdates); if (len < 0) { if (result != NULL) { From ee0e22c088ce5483d4bacd5457d4feb5886bb58a Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 24 Jun 2025 14:41:41 -0700 Subject: [PATCH 672/989] GH-90117: Check for list and tuple before MappingView in pprint (GH-135779) --- Lib/pprint.py | 68 +++++++++---------- ...5-06-20-17-06-59.gh-issue-90117.GYWVrn.rst | 1 + 2 files changed, 35 insertions(+), 34 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst diff --git a/Lib/pprint.py b/Lib/pprint.py index 1e611481b51ac0..92a2c543ac279c 100644 --- a/Lib/pprint.py +++ b/Lib/pprint.py @@ -653,6 +653,40 @@ def _safe_repr(self, object, context, maxlevels, level): del context[objid] return "{%s}" % ", ".join(components), readable, recursive + if (issubclass(typ, list) and r is list.__repr__) or \ + (issubclass(typ, tuple) and r is tuple.__repr__): + if issubclass(typ, list): + if not object: + return "[]", True, False + format = "[%s]" + elif len(object) == 1: + format = "(%s,)" + else: + if not object: + return "()", True, False + format = "(%s)" + objid = id(object) + if maxlevels and level >= maxlevels: + return format % "...", False, objid in context + if objid in context: + return _recursion(object), False, True + context[objid] = 1 + readable = True + recursive = False + components = [] + append = components.append + level += 1 + for o in object: + orepr, oreadable, orecur = self.format( + o, context, maxlevels, level) + append(orepr) + if not oreadable: + readable = False + if orecur: + recursive = True + del context[objid] + return format % ", ".join(components), readable, recursive + if issubclass(typ, _collections.abc.MappingView) and r in self._view_reprs: objid = id(object) if maxlevels and level >= maxlevels: @@ -689,40 +723,6 @@ def _safe_repr(self, object, context, maxlevels, level): del context[objid] return typ.__name__ + '([%s])' % ", ".join(components), readable, recursive - if (issubclass(typ, list) and r is list.__repr__) or \ - (issubclass(typ, tuple) and r is tuple.__repr__): - if issubclass(typ, list): - if not object: - return "[]", True, False - format = "[%s]" - elif len(object) == 1: - format = "(%s,)" - else: - if not object: - return "()", True, False - format = "(%s)" - objid = id(object) - if maxlevels and level >= maxlevels: - return format % "...", False, objid in context - if objid in context: - return _recursion(object), False, True - context[objid] = 1 - readable = True - recursive = False - components = [] - append = components.append - level += 1 - for o in object: - orepr, oreadable, orecur = self.format( - o, context, maxlevels, level) - append(orepr) - if not oreadable: - readable = False - if orecur: - recursive = True - del context[objid] - return format % ", ".join(components), readable, recursive - rep = repr(object) return rep, (rep and not rep.startswith('<')), False diff --git a/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst b/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst new file mode 100644 index 00000000000000..2bb15cb6d9c61c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst @@ -0,0 +1 @@ +Speed up :mod:`pprint` for :class:`list` and :class:`tuple`. From 18d32fb646b804b26c80910b2c65938e81e2cced Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Wed, 25 Jun 2025 06:42:38 +0100 Subject: [PATCH 673/989] gh-91555: Revert disabling of logger while handling log record. (GH-135858) Revert "gh-91555: disable logger while handling log record (GH-131812)" This reverts commit 2561e148ec985755baa3984b91fd0bfc089b283c. --- Lib/logging/__init__.py | 28 ++----- Lib/test/test_logging.py | 83 ------------------- ...5-03-30-16-42-38.gh-issue-91555.ShVtwW.rst | 2 - 3 files changed, 8 insertions(+), 105 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index 5c3c44249345f3..c5860d53b1bdff 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -1475,8 +1475,6 @@ class Logger(Filterer): level, and "input.csv", "input.xls" and "input.gnu" for the sub-levels. There is no arbitrary limit to the depth of nesting. """ - _tls = threading.local() - def __init__(self, name, level=NOTSET): """ Initialize the logger with a name and an optional level. @@ -1673,19 +1671,14 @@ def handle(self, record): This method is used for unpickled records received from a socket, as well as those created locally. Logger-level filtering is applied. """ - if self._is_disabled(): + if self.disabled: return - - self._tls.in_progress = True - try: - maybe_record = self.filter(record) - if not maybe_record: - return - if isinstance(maybe_record, LogRecord): - record = maybe_record - self.callHandlers(record) - finally: - self._tls.in_progress = False + maybe_record = self.filter(record) + if not maybe_record: + return + if isinstance(maybe_record, LogRecord): + record = maybe_record + self.callHandlers(record) def addHandler(self, hdlr): """ @@ -1773,7 +1766,7 @@ def isEnabledFor(self, level): """ Is this logger enabled for level 'level'? """ - if self._is_disabled(): + if self.disabled: return False try: @@ -1823,11 +1816,6 @@ def _hierlevel(logger): if isinstance(item, Logger) and item.parent is self and _hierlevel(item) == 1 + _hierlevel(item.parent)) - def _is_disabled(self): - # We need to use getattr as it will only be set the first time a log - # message is recorded on any given thread - return self.disabled or getattr(self._tls, 'in_progress', False) - def __repr__(self): level = getLevelName(self.getEffectiveLevel()) return '<%s %s (%s)>' % (self.__class__.__name__, self.name, level) diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index e672dfcbb465cc..3819965ed2cbf9 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -4214,89 +4214,6 @@ def __init__(self, *args, **kwargs): handler = logging.getHandlerByName('custom') self.assertEqual(handler.custom_kwargs, custom_kwargs) - # See gh-91555 and gh-90321 - @support.requires_subprocess() - def test_deadlock_in_queue(self): - queue = multiprocessing.Queue() - handler = logging.handlers.QueueHandler(queue) - logger = multiprocessing.get_logger() - level = logger.level - try: - logger.setLevel(logging.DEBUG) - logger.addHandler(handler) - logger.debug("deadlock") - finally: - logger.setLevel(level) - logger.removeHandler(handler) - - def test_recursion_in_custom_handler(self): - class BadHandler(logging.Handler): - def __init__(self): - super().__init__() - def emit(self, record): - logger.debug("recurse") - logger = logging.getLogger("test_recursion_in_custom_handler") - logger.addHandler(BadHandler()) - logger.setLevel(logging.DEBUG) - logger.debug("boom") - - @threading_helper.requires_working_threading() - def test_thread_supression_noninterference(self): - lock = threading.Lock() - logger = logging.getLogger("test_thread_supression_noninterference") - - # Block on the first call, allow others through - # - # NOTE: We need to bypass the base class's lock, otherwise that will - # block multiple calls to the same handler itself. - class BlockOnceHandler(TestHandler): - def __init__(self, barrier): - super().__init__(support.Matcher()) - self.barrier = barrier - - def createLock(self): - self.lock = None - - def handle(self, record): - self.emit(record) - - def emit(self, record): - if self.barrier: - barrier = self.barrier - self.barrier = None - barrier.wait() - with lock: - pass - super().emit(record) - logger.info("blow up if not supressed") - - barrier = threading.Barrier(2) - handler = BlockOnceHandler(barrier) - logger.addHandler(handler) - logger.setLevel(logging.DEBUG) - - t1 = threading.Thread(target=logger.debug, args=("1",)) - with lock: - - # Ensure first thread is blocked in the handler, hence supressing logging... - t1.start() - barrier.wait() - - # ...but the second thread should still be able to log... - t2 = threading.Thread(target=logger.debug, args=("2",)) - t2.start() - t2.join(timeout=3) - - self.assertEqual(len(handler.buffer), 1) - self.assertTrue(handler.matches(levelno=logging.DEBUG, message='2')) - - # The first thread should still be blocked here - self.assertTrue(t1.is_alive()) - - # Now the lock has been released the first thread should complete - t1.join() - self.assertEqual(len(handler.buffer), 2) - self.assertTrue(handler.matches(levelno=logging.DEBUG, message='1')) class ManagerTest(BaseTest): def test_manager_loggerclass(self): diff --git a/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst b/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst deleted file mode 100644 index e8f5ba56fcc23d..00000000000000 --- a/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst +++ /dev/null @@ -1,2 +0,0 @@ -Ignore log messages generated during handling of log messages, to avoid -deadlock or infinite recursion. From 113de8545ffe74a4a1dddb9351fa1cbd3562b621 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 25 Jun 2025 00:06:32 -0700 Subject: [PATCH 674/989] GH-133136: Revise QSBR to reduce excess memory held (gh-135473) The free threading build uses QSBR to delay the freeing of dictionary keys and list arrays when the objects are accessed by multiple threads in order to allow concurrent reads to proceed with holding the object lock. The requests are processed in batches to reduce execution overhead, but for large memory blocks this can lead to excess memory usage. Take into account the size of the memory block when deciding when to process QSBR requests. Also track the amount of memory being held by QSBR for mimalloc pages. Advance the write sequence if this memory exceeds a limit. Advancing the sequence will allow it to be freed more quickly. Process the held QSBR items from the "eval breaker", rather than from `_PyMem_FreeDelayed()`. This gives a higher chance that the global read sequence has advanced enough so that items can be freed. Co-authored-by: Sam Gross --- Include/internal/pycore_pymem.h | 2 +- Include/internal/pycore_qsbr.h | 31 ++++-- ...-06-03-21-06-22.gh-issue-133136.Usnvri.rst | 2 + Objects/codeobject.c | 2 +- Objects/dictobject.c | 4 +- Objects/listobject.c | 3 +- Objects/obmalloc.c | 96 +++++++++++++++++-- Python/ceval_gil.c | 4 + Python/qsbr.c | 12 +-- 9 files changed, 129 insertions(+), 27 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index e669a30b072d18..f3f2ae0a140828 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -88,7 +88,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str); extern int _PyMem_DebugEnabled(void); // Enqueue a pointer to be freed possibly after some delay. -extern void _PyMem_FreeDelayed(void *ptr); +extern void _PyMem_FreeDelayed(void *ptr, size_t size); // Periodically process delayed free requests. extern void _PyMem_ProcessDelayed(PyThreadState *tstate); diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index b835c3abaf5d0b..1f9b3fcf777493 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -48,8 +48,21 @@ struct _qsbr_thread_state { // Thread state (or NULL) PyThreadState *tstate; - // Used to defer advancing write sequence a fixed number of times - int deferrals; + // Number of held items added by this thread since the last write sequence + // advance + int deferred_count; + + // Estimate for the amount of memory that is held by this thread since + // the last write sequence advance + size_t deferred_memory; + + // Amount of memory in mimalloc pages deferred from collection. When + // deferred, they are prevented from being used for a different size class + // and in a different thread. + size_t deferred_page_memory; + + // True if the deferred memory frees should be processed. + bool should_process; // Is this thread state allocated? bool allocated; @@ -109,11 +122,17 @@ _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal) extern uint64_t _Py_qsbr_advance(struct _qsbr_shared *shared); -// Batches requests to advance the write sequence. This advances the write -// sequence every N calls, which reduces overhead but increases time to -// reclamation. Returns the new goal. +// Return the next value for the write sequence (current plus the increment). extern uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr); +_Py_qsbr_shared_next(struct _qsbr_shared *shared); + +// Return true if deferred memory frees held by QSBR should be processed to +// determine if they can be safely freed. +static inline bool +_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr) +{ + return qsbr->should_process; +} // Have the read sequences advanced to the given goal? If this returns true, // it safe to reclaim any memory tagged with the goal (or earlier goal). diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst new file mode 100644 index 00000000000000..a9501c13c95b3a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst @@ -0,0 +1,2 @@ +Limit excess memory usage in the :term:`free threading` build when a +large dictionary or list is resized and accessed by multiple threads. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3f53d4cfeb20ac..91772bc9d19098 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -3369,7 +3369,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) } memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); - _PyMem_FreeDelayed(tlbc); + _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *)); tlbc = new_tlbc; } char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 72901377cbb3da..6b7b150f0e2f2c 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr) { #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(keys); + _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys)); return; } #endif @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr) assert(values->embedded == 0); #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(values); + _PyMem_FreeDelayed(values, values_size_from_count(values->capacity)); return; } #endif diff --git a/Objects/listobject.c b/Objects/listobject.c index c5895645a2dd12..23d3472b6d4153 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr) #ifdef Py_GIL_DISABLED _PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item); if (use_qsbr) { - _PyMem_FreeDelayed(array); + size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *); + _PyMem_FreeDelayed(array, size); } else { PyMem_Free(array); diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index d4b8327cb73b37..deb7fd957e57dd 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -124,6 +124,33 @@ _PyMem_mi_page_is_safe_to_free(mi_page_t *page) } +#ifdef Py_GIL_DISABLED + +// If we are deferring collection of more than this amount of memory for +// mimalloc pages, advance the write sequence. Advancing allows these +// pages to be re-used in a different thread or for a different size class. +#define QSBR_PAGE_MEM_LIMIT 4096*20 + +// Return true if the global write sequence should be advanced for a mimalloc +// page that is deferred from collection. +static bool +should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page) +{ + size_t bsize = mi_page_block_size(page); + size_t page_size = page->capacity*bsize; + if (page_size > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + qsbr->deferred_page_memory += page_size; + if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + return false; +} +#endif + static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) { @@ -139,7 +166,14 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) _PyMem_mi_page_clear_qsbr(page); page->retire_expire = 0; - page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr); + + if (should_advance_qsbr_for_page(tstate->qsbr, page)) { + page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared); + } + llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node); return false; } @@ -1141,8 +1175,44 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state) } } + +#ifdef Py_GIL_DISABLED + +// For deferred advance on free: the number of deferred items before advancing +// the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally +// want to process a chunk before it overflows. +#define QSBR_DEFERRED_LIMIT 127 + +// If the deferred memory exceeds 1 MiB, advance the write sequence. This +// helps limit memory usage due to QSBR delaying frees too long. +#define QSBR_FREE_MEM_LIMIT 1024*1024 + +// Return true if the global write sequence should be advanced for a deferred +// memory free. +static bool +should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size) +{ + if (size > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + qsbr->deferred_count++; + qsbr->deferred_memory += size; + if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT || + qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + return false; +} +#endif + static void -free_delayed(uintptr_t ptr) +free_delayed(uintptr_t ptr, size_t size) { #ifndef Py_GIL_DISABLED free_work_item(ptr, NULL, NULL); @@ -1200,23 +1270,32 @@ free_delayed(uintptr_t ptr) } assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK); - uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr); + uint64_t seq; + if (should_advance_qsbr_for_free(tstate->qsbr, size)) { + seq = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + seq = _Py_qsbr_shared_next(tstate->qsbr->shared); + } buf->array[buf->wr_idx].ptr = ptr; buf->array[buf->wr_idx].qsbr_goal = seq; buf->wr_idx++; if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) { + // Normally the processing of delayed items is done from the eval + // breaker. Processing here is a safety measure to ensure too much + // work does not accumulate. _PyMem_ProcessDelayed((PyThreadState *)tstate); } #endif } void -_PyMem_FreeDelayed(void *ptr) +_PyMem_FreeDelayed(void *ptr, size_t size) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed((uintptr_t)ptr); + free_delayed((uintptr_t)ptr, size); } } @@ -1226,7 +1305,10 @@ _PyObject_XDecRefDelayed(PyObject *ptr) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed(((uintptr_t)ptr)|0x01); + // We use 0 as the size since we don't have an easy way to know the + // actual size. If we are freeing many objects, the write sequence + // will be advanced due to QSBR_DEFERRED_LIMIT. + free_delayed(((uintptr_t)ptr)|0x01, 0); } } #endif @@ -1317,6 +1399,8 @@ _PyMem_ProcessDelayed(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + tstate_impl->qsbr->should_process = false; + // Process thread-local work process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 57d8f68b000b60..aa68371ac8febf 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1387,6 +1387,10 @@ _Py_HandlePending(PyThreadState *tstate) _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_EXPLICIT_MERGE_BIT); _Py_brc_merge_refcounts(tstate); } + /* Process deferred memory frees held by QSBR */ + if (_Py_qsbr_should_process(((_PyThreadStateImpl *)tstate)->qsbr)) { + _PyMem_ProcessDelayed(tstate); + } #endif /* GC scheduled to run */ diff --git a/Python/qsbr.c b/Python/qsbr.c index afa03776c26b6f..c992c285cb13e4 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -41,10 +41,6 @@ // Starting size of the array of qsbr thread states #define MIN_ARRAY_SIZE 8 -// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing -// the write sequence. -#define QSBR_DEFERRED_LIMIT 10 - // Allocate a QSBR thread state from the freelist static struct _qsbr_thread_state * qsbr_allocate(struct _qsbr_shared *shared) @@ -117,13 +113,9 @@ _Py_qsbr_advance(struct _qsbr_shared *shared) } uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr) +_Py_qsbr_shared_next(struct _qsbr_shared *shared) { - if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) { - return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR; - } - qsbr->deferrals = 0; - return _Py_qsbr_advance(qsbr->shared); + return _Py_qsbr_shared_current(shared) + QSBR_INCR; } static uint64_t From ca87a47b3d92aabaefbbe79c0493d66602184b41 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Wed, 25 Jun 2025 04:44:55 -0400 Subject: [PATCH 675/989] gh-135755: Docs: C API: Document missing `PyFunction_GET*` macros (GH-135762) Co-authored-by: Brian Schubert Co-authored-by: Petr Viktorin --- Doc/c-api/function.rst | 20 ++++++++++++++++++++ Doc/data/refcounts.dat | 24 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst index 63b78f677674e9..5fb8567ef8c95f 100644 --- a/Doc/c-api/function.rst +++ b/Doc/c-api/function.rst @@ -95,6 +95,13 @@ There are a few functions specific to Python functions. .. versionadded:: 3.12 + +.. c:function:: PyObject* PyFunction_GetKwDefaults(PyObject *op) + + Return the keyword-only argument default values of the function object *op*. This can be a + dictionary of arguments or ``NULL``. + + .. c:function:: PyObject* PyFunction_GetClosure(PyObject *op) Return the closure associated with the function object *op*. This can be ``NULL`` @@ -123,6 +130,19 @@ There are a few functions specific to Python functions. Raises :exc:`SystemError` and returns ``-1`` on failure. +.. c:function:: PyObject *PyFunction_GET_CODE(PyObject *op) + PyObject *PyFunction_GET_GLOBALS(PyObject *op) + PyObject *PyFunction_GET_MODULE(PyObject *op) + PyObject *PyFunction_GET_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_KW_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_CLOSURE(PyObject *op) + PyObject *PyFunction_GET_ANNOTATIONS(PyObject *op) + + These functions are similar to their ``PyFunction_Get*`` counterparts, but + do not do type checking. Passing anything other than an instance of + :c:data:`PyFunction_Type` is undefined behavior. + + .. c:function:: int PyFunction_AddWatcher(PyFunction_WatchCallback callback) Register *callback* as a function watcher for the current interpreter. diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 99cc823c0c3772..144c5608e07426 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -963,21 +963,45 @@ PyFunction_Check:PyObject*:o:0: PyFunction_GetAnnotations:PyObject*::0: PyFunction_GetAnnotations:PyObject*:op:0: +PyFunction_GET_ANNOTATIONS:PyObject*::0: +PyFunction_GET_ANNOTATIONS:PyObject*:op:0: + PyFunction_GetClosure:PyObject*::0: PyFunction_GetClosure:PyObject*:op:0: +PyFunction_GET_CLOSURE:PyObject*::0: +PyFunction_GET_CLOSURE:PyObject*:op:0: + PyFunction_GetCode:PyObject*::0: PyFunction_GetCode:PyObject*:op:0: +PyFunction_GET_CODE:PyObject*::0: +PyFunction_GET_CODE:PyObject*:op:0: + PyFunction_GetDefaults:PyObject*::0: PyFunction_GetDefaults:PyObject*:op:0: +PyFunction_GET_DEFAULTS:PyObject*::0: +PyFunction_GET_DEFAULTS:PyObject*:op:0: + +PyFunction_GetKwDefaults:PyObject*::0: +PyFunction_GetKwDefaults:PyObject*:op:0: + +PyFunction_GET_KW_DEFAULTS:PyObject*::0: +PyFunction_GET_KW_DEFAULTS:PyObject*:op:0: + PyFunction_GetGlobals:PyObject*::0: PyFunction_GetGlobals:PyObject*:op:0: +PyFunction_GET_GLOBALS:PyObject*::0: +PyFunction_GET_GLOBALS:PyObject*:op:0: + PyFunction_GetModule:PyObject*::0: PyFunction_GetModule:PyObject*:op:0: +PyFunction_GET_MODULE:PyObject*::0: +PyFunction_GET_MODULE:PyObject*:op:0: + PyFunction_New:PyObject*::+1: PyFunction_New:PyObject*:code:+1: PyFunction_New:PyObject*:globals:+1: From dd59c786cfb1018eb5abe877bfa7265ea9a3c2b9 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 25 Jun 2025 11:54:42 +0300 Subject: [PATCH 676/989] gh-135839: Fix `module_traverse` and `module_clear` in `_interpchannelsmodule` (#135840) --- Modules/_interpchannelsmodule.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index ee5e2b005e0a5b..9c1f8615161275 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -3614,8 +3614,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -3625,8 +3624,7 @@ module_clear(PyObject *mod) assert(state != NULL); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void From 1b1ae82fab0093924b8b89eb1c7eea11d7be7b18 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 25 Jun 2025 13:03:05 +0200 Subject: [PATCH 677/989] gh-135755: Move SPECIAL_ constants to a private header (GH-135922) Macros without a `Py`/`_Py` prefix should not be defined in public headers. --- Include/ceval.h | 7 ------- Include/internal/pycore_ceval.h | 7 +++++++ Modules/_opcode.c | 2 +- Python/ceval.c | 2 +- Python/codegen.c | 1 + 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Include/ceval.h b/Include/ceval.h index 32ab38972e548f..e9df8684996e23 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -133,13 +133,6 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); #define FVS_MASK 0x4 #define FVS_HAVE_SPEC 0x4 -/* Special methods used by LOAD_SPECIAL */ -#define SPECIAL___ENTER__ 0 -#define SPECIAL___EXIT__ 1 -#define SPECIAL___AENTER__ 2 -#define SPECIAL___AEXIT__ 3 -#define SPECIAL_MAX 3 - #ifndef Py_LIMITED_API # define Py_CPYTHON_CEVAL_H # include "cpython/ceval.h" diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 18623cc8f1c945..cc2defbdf77821 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -366,6 +366,13 @@ extern int _PyRunRemoteDebugger(PyThreadState *tstate); PyAPI_FUNC(_PyStackRef) _PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame* frame, _PyStackRef iter, _PyStackRef *index_ptr); +/* Special methods used by LOAD_SPECIAL */ +#define SPECIAL___ENTER__ 0 +#define SPECIAL___EXIT__ 1 +#define SPECIAL___AENTER__ 2 +#define SPECIAL___AEXIT__ 3 +#define SPECIAL_MAX 3 + #ifdef __cplusplus } #endif diff --git a/Modules/_opcode.c b/Modules/_opcode.c index c295f7b3152577..ef271b6ef566b9 100644 --- a/Modules/_opcode.c +++ b/Modules/_opcode.c @@ -5,7 +5,7 @@ #include "Python.h" #include "compile.h" #include "opcode.h" -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL_MAX #include "pycore_code.h" #include "pycore_compile.h" #include "pycore_intrinsics.h" diff --git a/Python/ceval.c b/Python/ceval.c index 3da6f61f5acde5..d1de4875656fd3 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -8,7 +8,7 @@ #include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_cell.h" // PyCell_GetRef() -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL___ENTER__ #include "pycore_code.h" #include "pycore_dict.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS diff --git a/Python/codegen.c b/Python/codegen.c index 0023d72cd5e91d..27fe8e1957b2fe 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -28,6 +28,7 @@ #include "pycore_pystate.h" // _Py_GetConfig() #include "pycore_symtable.h" // PySTEntryObject #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString +#include "pycore_ceval.h" // SPECIAL___ENTER__ #define NEED_OPCODE_METADATA #include "pycore_opcode_metadata.h" // _PyOpcode_opcode_metadata, _PyOpcode_num_popped/pushed From c2f2fd4eca3b342e781355f9b9e716827fc81a5b Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 25 Jun 2025 13:51:32 +0200 Subject: [PATCH 678/989] gh-131591: Make --without-remote-debug work (GH-135925) The feature is checked using `defined(Py_REMOTE_DEBUG)`; defining the macro (even as `0`) enables it. --- configure | 3 --- configure.ac | 2 -- 2 files changed, 5 deletions(-) diff --git a/configure b/configure index 8334e247c3e1bb..50223d81cd976e 100755 --- a/configure +++ b/configure @@ -29933,9 +29933,6 @@ printf "%s\n" "#define Py_REMOTE_DEBUG 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else - -printf "%s\n" "#define Py_REMOTE_DEBUG 0" >>confdefs.h - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi diff --git a/configure.ac b/configure.ac index 82fb2d8c492d48..f58e16c774f4aa 100644 --- a/configure.ac +++ b/configure.ac @@ -7168,8 +7168,6 @@ if test "$with_remote_debug" = yes; then [Define if you want to enable remote debugging support.]) AC_MSG_RESULT([yes]) else - AC_DEFINE([Py_REMOTE_DEBUG], [0], - [Define if you want to enable remote debugging support.]) AC_MSG_RESULT([no]) fi From bcc2cbaa7f112323939e853ed69fd82f19568ccf Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 25 Jun 2025 17:17:02 +0300 Subject: [PATCH 679/989] gh-135839: Fix `module_traverse` and `module_clear` in subinterp modules (#135937) --- Modules/_interpqueuesmodule.c | 6 ++---- Modules/_interpretersmodule.c | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index de06b8b41fe585..e5afe746f90bdc 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -1952,8 +1952,7 @@ static int module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1962,8 +1961,7 @@ module_clear(PyObject *mod) module_state *state = get_module_state(mod); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index b920c32474f4e6..e7feaa7f186aee 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -1706,8 +1706,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1715,8 +1714,7 @@ module_clear(PyObject *mod) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void From d2154912b3b10823c138e904e74f2a1e7e7ca96c Mon Sep 17 00:00:00 2001 From: HarryLHW <123lhw321@gmail.com> Date: Wed, 25 Jun 2025 22:24:58 +0800 Subject: [PATCH 680/989] Docs: Add cross-reference for `positional_item` in the `calls` productionlist (GH-129977) Add missing hyperlink for `positional_item` --- Doc/reference/expressions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 429b3cd1f006a2..17f39aaf5f57cd 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1023,7 +1023,7 @@ series of :term:`arguments `: : ["," `keywords_arguments`] : | `starred_and_keywords` ["," `keywords_arguments`] : | `keywords_arguments` - positional_arguments: positional_item ("," positional_item)* + positional_arguments: `positional_item` ("," `positional_item`)* positional_item: `assignment_expression` | "*" `expression` starred_and_keywords: ("*" `expression` | `keyword_item`) : ("," "*" `expression` | "," `keyword_item`)* From a88b49c3f2184428124890741778e92c0fdb6446 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 25 Jun 2025 23:02:53 +0800 Subject: [PATCH 681/989] gh-135927: Fix MSVC Clatest C builds (GH-135935) --- Include/pymacro.h | 4 ++-- Include/pyport.h | 3 ++- .../next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst | 1 + Modules/mathmodule.c | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst diff --git a/Include/pymacro.h b/Include/pymacro.h index bfe660e8303d0f..b2886ddac5d17e 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -73,14 +73,14 @@ # else # define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T # endif +# elif defined(_MSC_VER) +# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T # elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T # elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L # define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T # elif (defined(__GNUC__) || defined(__clang__)) # define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T -# elif defined(_MSC_VER) -# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T # else # define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T # endif diff --git a/Include/pyport.h b/Include/pyport.h index 3eac119bf8e8d8..73f071c41a6687 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -49,7 +49,8 @@ // Static inline functions should use _Py_NULL rather than using directly NULL // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, // _Py_NULL is defined as nullptr. -#if (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ +#if (defined(__GNUC__) || defined(__clang__)) && \ + (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ || (defined(__cplusplus) && __cplusplus >= 201103) # define _Py_NULL nullptr #else diff --git a/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst b/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst new file mode 100644 index 00000000000000..21a2c87d3448c1 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst @@ -0,0 +1 @@ +Fix building with MSVC when passing option ``/std:clatest``. diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index bbbb49115681de..1837de4735c176 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -3148,7 +3148,7 @@ static PyObject * math_issubnormal_impl(PyObject *module, double x) /*[clinic end generated code: output=4e76ac98ddcae761 input=9a20aba7107d0d95]*/ { -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +#if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L return PyBool_FromLong(issubnormal(x)); #else return PyBool_FromLong(isfinite(x) && x && !isnormal(x)); From cbfaf41caf135b8598a560854cd59e992a2ccfed Mon Sep 17 00:00:00 2001 From: Joseph Tibbertsma Date: Wed, 25 Jun 2025 09:41:36 -0700 Subject: [PATCH 682/989] Fix needless spinning in `_PyMutex_LockTimed` with zero timeout (gh-135872) The free threading build could spin unnecessarily on `_Py_yield()` if the initial compare and swap failed. --- .../2025-06-23-18-08-32.gh-issue-135871.50C528.rst | 2 ++ Python/lock.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst new file mode 100644 index 00000000000000..ce29ddecefe17f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst @@ -0,0 +1,2 @@ +Non-blocking mutex lock attempts now return immediately when the lock is busy +instead of briefly spinning in the :term:`free threading` build. diff --git a/Python/lock.c b/Python/lock.c index b125ad0c9e356d..ea6ac00bfeccb4 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -58,7 +58,7 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) return PY_LOCK_ACQUIRED; } } - else if (timeout == 0) { + if (timeout == 0) { return PY_LOCK_FAILURE; } From 6227662ff3bf838d31e9441eda935d24733d705a Mon Sep 17 00:00:00 2001 From: Rob Reynolds Date: Wed, 25 Jun 2025 11:40:00 -0600 Subject: [PATCH 683/989] Docs: Fix indentation in `slice` class of `functions.rst` (GH-134393) Paragraph should not be under `slice.step`. It applies to the whole class. --------- Co-authored-by: Rob Reynolds <13379223+reynoldsnlp@users.noreply.github.com> --- Doc/library/functions.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 2ecce3dba5a0b9..80bd1275973f8d 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1839,15 +1839,15 @@ are always available. They are listed here in alphabetical order. ``range(start, stop, step)``. The *start* and *step* arguments default to ``None``. + Slice objects have read-only data attributes :attr:`!start`, + :attr:`!stop`, and :attr:`!step` which merely return the argument + values (or their default). They have no other explicit functionality; + however, they are used by NumPy and other third-party packages. + .. attribute:: slice.start .. attribute:: slice.stop .. attribute:: slice.step - Slice objects have read-only data attributes :attr:`!start`, - :attr:`!stop`, and :attr:`!step` which merely return the argument - values (or their default). They have no other explicit functionality; - however, they are used by NumPy and other third-party packages. - Slice objects are also generated when extended indexing syntax is used. For example: ``a[start:stop:step]`` or ``a[start:stop, i]``. See :func:`itertools.islice` for an alternate version that returns an From 0fadd9fd205e4df105676359876a6d6684f215a3 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 26 Jun 2025 04:01:25 +0100 Subject: [PATCH 684/989] gh-125142: remove duplicated import in `Lib/pydoc.py` (gh-135215) --- Lib/pydoc.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 7528178fdcae97..d508fb70ea429e 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1812,7 +1812,6 @@ def writedocs(dir, pkgpath='', done=None): def _introdoc(): - import textwrap ver = '%d.%d' % sys.version_info[:2] if os.environ.get('PYTHON_BASIC_REPL'): pyrepl_keys = '' @@ -2170,7 +2169,6 @@ def showtopic(self, topic, more_xrefs=''): if more_xrefs: xrefs = (xrefs or '') + ' ' + more_xrefs if xrefs: - import textwrap text = 'Related help topics: ' + ', '.join(xrefs.split()) + '\n' wrapped_text = textwrap.wrap(text, 72) doc += '\n%s\n' % '\n'.join(wrapped_text) From 1f5e23fd7015a8f7b14d0181ec83efa95c5d5b68 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 25 Jun 2025 20:03:24 -0700 Subject: [PATCH 685/989] Add whatsnew text for warnings module changes. (gh-135869) --- Doc/whatsnew/3.14.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index cbca720b75e96c..a74d414ae4bb70 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -978,6 +978,23 @@ be specified by the build backend, as it will no longer be determined automatically by the C compiler. For a running interpreter, the setting that was used at compile time can be found using :func:`sysconfig.get_config_var`. +A new flag has been added, :data:`~sys.flags.context_aware_warnings`. This +flag defaults to true for the free-threaded build and false for the GIL-enabled +build. If the flag is true then the :class:`warnings.catch_warnings` context +manager uses a context variable for warning filters. This makes the context +manager behave predicably when used with multiple threads or asynchronous +tasks. + +A new flag has been added, :data:`~sys.flags.thread_inherit_context`. This flag +defaults to true for the free-threaded build and false for the GIL-enabled +build. If the flag is true then threads created with :class:`threading.Thread` +start with a copy of the :class:`~contextvars.Context()` of the caller of +:meth:`~threading.Thread.start`. Most significantly, this makes the warning +filtering context established by :class:`~warnings.catch_warnings` be +"inherited" by threads (or asyncio tasks) started within that context. It also +affects other modules that use context variables, such as the :mod:`decimal` +context manager. + .. _whatsnew314-pyrepl-highlighting: @@ -1028,6 +1045,18 @@ Please report any bugs or major performance regressions that you encounter! .. seealso:: :pep:`744` +Concurrent safe warnings control +-------------------------------- + +The :class:`warnings.catch_warnings` context manager will now optionally +use a context variable for warning filters. This is enabled by setting +the :data:`~sys.flags.context_aware_warnings` flag, either with the ``-X`` +command-line option or an environment variable. This gives predicable +warnings control when using :class:`~warnings.catch_warnings` combined with +multiple threads or asynchronous tasks. The flag defaults to true for the +free-threaded build and false for the GIL-enabled build. + +(Contributed by Neil Schemenauer and Kumar Aditya in :gh:`130010`.) Other language changes ====================== From e3ea6f2b3b084700a34ce392f5cf897407469b3a Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Wed, 25 Jun 2025 23:44:08 -0400 Subject: [PATCH 686/989] gh-135956: Remove duplicate word in _pydatetime docstring (#135957) _pydatetime.isoformat docstring repeats 'giving'. --- Lib/_pydatetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index 71f619024e570d..bc35823f70144e 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -2164,7 +2164,7 @@ def isoformat(self, sep='T', timespec='auto'): By default, the fractional part is omitted if self.microsecond == 0. If self.tzinfo is not None, the UTC offset is also attached, giving - giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. + a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. Optional argument sep specifies the separator between date and time, default 'T'. From a1da208eec3028b1ecae804d4c0dc6b43cdddae9 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 26 Jun 2025 09:25:41 +0200 Subject: [PATCH 687/989] gh-131591: Add Py_ prefix to MAX_SCRIPT_PATH_SIZE; remove unprefixed struct tag (GH-135924) Names/macros defined in public headers should have `Py`/`_Py` prefixes. --- Include/cpython/pystate.h | 6 +++--- Include/internal/pycore_debug_offsets.h | 2 +- Modules/posixmodule.c | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 54d7e62292966e..be582122118e44 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -28,10 +28,10 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); #define PyTrace_OPCODE 7 /* Remote debugger support */ -#define MAX_SCRIPT_PATH_SIZE 512 -typedef struct _remote_debugger_support { +#define Py_MAX_SCRIPT_PATH_SIZE 512 +typedef struct { int32_t debugger_pending_call; - char debugger_script_path[MAX_SCRIPT_PATH_SIZE]; + char debugger_script_path[Py_MAX_SCRIPT_PATH_SIZE]; } _PyRemoteDebuggerSupport; typedef struct _err_stackitem { diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index ce3fcb109f49f7..1b59fa2ef60014 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -368,7 +368,7 @@ typedef struct _Py_DebugOffsets { .remote_debugging_enabled = offsetof(PyInterpreterState, config.remote_debug), \ .debugger_pending_call = offsetof(_PyRemoteDebuggerSupport, debugger_pending_call), \ .debugger_script_path = offsetof(_PyRemoteDebuggerSupport, debugger_script_path), \ - .debugger_script_path_size = MAX_SCRIPT_PATH_SIZE, \ + .debugger_script_path_size = Py_MAX_SCRIPT_PATH_SIZE, \ }, \ } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 7dc5ef39a566e4..b570f81b7cf7c2 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -685,7 +685,8 @@ static void reset_remotedebug_data(PyThreadState *tstate) { tstate->remote_debugger_support.debugger_pending_call = 0; - memset(tstate->remote_debugger_support.debugger_script_path, 0, MAX_SCRIPT_PATH_SIZE); + memset(tstate->remote_debugger_support.debugger_script_path, 0, + Py_MAX_SCRIPT_PATH_SIZE); } From 10a3d431881bb9169abde97f85ea6a670e1ef3cc Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Thu, 26 Jun 2025 05:43:08 -0400 Subject: [PATCH 688/989] gh-135755: Move `PyFunction_GET_BUILTINS` to the private API (GH-135938) --- Include/cpython/funcobject.h | 5 ----- Include/internal/pycore_function.h | 5 +++++ Modules/_testinternalcapi.c | 5 +++-- Objects/funcobject.c | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h index 18249b95befe65..598cd330bc9ca9 100644 --- a/Include/cpython/funcobject.h +++ b/Include/cpython/funcobject.h @@ -97,11 +97,6 @@ static inline PyObject* PyFunction_GET_GLOBALS(PyObject *func) { } #define PyFunction_GET_GLOBALS(func) PyFunction_GET_GLOBALS(_PyObject_CAST(func)) -static inline PyObject* PyFunction_GET_BUILTINS(PyObject *func) { - return _PyFunction_CAST(func)->func_builtins; -} -#define PyFunction_GET_BUILTINS(func) PyFunction_GET_BUILTINS(_PyObject_CAST(func)) - static inline PyObject* PyFunction_GET_MODULE(PyObject *func) { return _PyFunction_CAST(func)->func_module; } diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index a30d52d49bdc4d..6e1209659565a3 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -41,6 +41,11 @@ extern PyObject *_Py_set_function_type_params( PyAPI_FUNC(int) _PyFunction_VerifyStateless(PyThreadState *, PyObject *); +static inline PyObject* _PyFunction_GET_BUILTINS(PyObject *func) { + return _PyFunction_CAST(func)->func_builtins; +} +#define _PyFunction_GET_BUILTINS(func) _PyFunction_GET_BUILTINS(_PyObject_CAST(func)) + #ifdef __cplusplus } diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 804cb4e4d1c8ee..fdf22a0c994d3a 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -21,6 +21,7 @@ #include "pycore_fileutils.h" // _Py_normpath() #include "pycore_flowgraph.h" // _PyCompile_OptimizeCfg() #include "pycore_frame.h" // _PyInterpreterFrame +#include "pycore_function.h" // _PyFunction_GET_BUILTINS #include "pycore_gc.h" // PyGC_Head #include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_import.h" // _PyImport_ClearExtension() @@ -1022,7 +1023,7 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) globalsns = PyFunction_GET_GLOBALS(codearg); } if (builtinsns == NULL) { - builtinsns = PyFunction_GET_BUILTINS(codearg); + builtinsns = _PyFunction_GET_BUILTINS(codearg); } codearg = PyFunction_GET_CODE(codearg); } @@ -1190,7 +1191,7 @@ verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs) globalsns = PyFunction_GET_GLOBALS(codearg); } if (builtinsns == NULL) { - builtinsns = PyFunction_GET_BUILTINS(codearg); + builtinsns = _PyFunction_GET_BUILTINS(codearg); } codearg = PyFunction_GET_CODE(codearg); } diff --git a/Objects/funcobject.c b/Objects/funcobject.c index f87b0e5d8f1e47..f8dd10a346d613 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1256,7 +1256,7 @@ _PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func) return -1; } // Check the builtins. - PyObject *builtinsns = PyFunction_GET_BUILTINS(func); + PyObject *builtinsns = _PyFunction_GET_BUILTINS(func); if (builtinsns != NULL && !PyDict_Check(builtinsns)) { _PyErr_Format(tstate, PyExc_TypeError, "unsupported builtins %R", builtinsns); From 9193efdeab4596f987528ba278afa2eca93a9e8a Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 26 Jun 2025 11:48:37 +0200 Subject: [PATCH 689/989] gh-125206: Make _Py_FFI_SUPPORT_C_COMPLEX private (GH-135932) --- Modules/_ctypes/_ctypes_test.c | 4 ++-- Modules/_ctypes/cfield.c | 4 ++-- Modules/_ctypes/ctypes.h | 2 +- configure | 2 +- configure.ac | 2 +- pyconfig.h.in | 6 +++--- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Modules/_ctypes/_ctypes_test.c b/Modules/_ctypes/_ctypes_test.c index d28e5708b44933..66338805007853 100644 --- a/Modules/_ctypes/_ctypes_test.c +++ b/Modules/_ctypes/_ctypes_test.c @@ -23,7 +23,7 @@ # define _Py_thread_local __thread #endif -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # include // csqrt() # undef I // for _ctypes_test_generated.c.h #endif @@ -457,7 +457,7 @@ EXPORT(double) my_sqrt(double a) return sqrt(a); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) EXPORT(double complex) my_csqrt(double complex a) { return csqrt(a); diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 163b92642615e5..547e2471a1cbc0 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -759,7 +759,7 @@ d_get(void *ptr, Py_ssize_t size) return PyFloat_FromDouble(val); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) /* We don't use _Complex types here, using arrays instead, as the C11+ standard says: "Each complex type has the same representation and alignment @@ -1599,7 +1599,7 @@ for base_code, base_c_type in [ /////////////////////////////////////////////////////////////////////////// TABLE_ENTRY_SW(d, &ffi_type_double); -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) if (Py_FFI_COMPLEX_AVAILABLE) { TABLE_ENTRY(D, &ffi_type_complex_double); TABLE_ENTRY(F, &ffi_type_complex_float); diff --git a/Modules/_ctypes/ctypes.h b/Modules/_ctypes/ctypes.h index 6a45c11e61af5c..5b4f97d43b8721 100644 --- a/Modules/_ctypes/ctypes.h +++ b/Modules/_ctypes/ctypes.h @@ -23,7 +23,7 @@ // Do we support C99 complex types in ffi? // For Apple's libffi, this must be determined at runtime (see gh-128156). -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # if USING_APPLE_OS_LIBFFI && defined(__has_builtin) # if __has_builtin(__builtin_available) # define Py_FFI_COMPLEX_AVAILABLE __builtin_available(macOS 10.15, *) diff --git a/configure b/configure index 50223d81cd976e..43b36d9231e341 100755 --- a/configure +++ b/configure @@ -15742,7 +15742,7 @@ fi printf "%s\n" "$ac_cv_ffi_complex_double_supported" >&6; } if test "$ac_cv_ffi_complex_double_supported" = "yes"; then -printf "%s\n" "#define Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h +printf "%s\n" "#define _Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h fi diff --git a/configure.ac b/configure.ac index f58e16c774f4aa..e77696e3a4e025 100644 --- a/configure.ac +++ b/configure.ac @@ -4163,7 +4163,7 @@ int main(void) [ac_cv_ffi_complex_double_supported=no]) ])]) if test "$ac_cv_ffi_complex_double_supported" = "yes"; then - AC_DEFINE([Py_FFI_SUPPORT_C_COMPLEX], [1], + AC_DEFINE([_Py_FFI_SUPPORT_C_COMPLEX], [1], [Defined if _Complex C type can be used with libffi.]) fi diff --git a/pyconfig.h.in b/pyconfig.h.in index 65a2c55217c258..d4f1da7fb10776 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1736,9 +1736,6 @@ /* Defined if Python is built as a shared library. */ #undef Py_ENABLE_SHARED -/* Defined if _Complex C type can be used with libffi. */ -#undef Py_FFI_SUPPORT_C_COMPLEX - /* Define if you want to disable the GIL */ #undef Py_GIL_DISABLED @@ -2026,6 +2023,9 @@ /* Maximum length in bytes of a thread name */ #undef _PYTHREAD_NAME_MAXLEN +/* Defined if _Complex C type can be used with libffi. */ +#undef _Py_FFI_SUPPORT_C_COMPLEX + /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT From 6be17baeb5bcfc78f0b7fcfe5221df0744c865e8 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 26 Jun 2025 13:05:01 +0200 Subject: [PATCH 690/989] gh-135755: Use private names (_Py*) for header file guards new in 3.14 (GH-135921) These are private API; let's name new ones accordingly. --- Include/audit.h | 10 +++++----- Include/cpython/audit.h | 2 +- Include/refcount.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Include/audit.h b/Include/audit.h index 793b7077e1027b..9be54ad4411096 100644 --- a/Include/audit.h +++ b/Include/audit.h @@ -1,5 +1,5 @@ -#ifndef Py_AUDIT_H -#define Py_AUDIT_H +#ifndef _Py_AUDIT_H +#define _Py_AUDIT_H #ifdef __cplusplus extern "C" { #endif @@ -18,13 +18,13 @@ PyAPI_FUNC(int) PySys_AuditTuple( #ifndef Py_LIMITED_API -# define Py_CPYTHON_AUDIT_H +# define _Py_CPYTHON_AUDIT_H # include "cpython/audit.h" -# undef Py_CPYTHON_AUDIT_H +# undef _Py_CPYTHON_AUDIT_H #endif #ifdef __cplusplus } #endif -#endif /* !Py_AUDIT_H */ +#endif /* !_Py_AUDIT_H */ diff --git a/Include/cpython/audit.h b/Include/cpython/audit.h index 3c5c7a8c06091d..536f9248632097 100644 --- a/Include/cpython/audit.h +++ b/Include/cpython/audit.h @@ -1,4 +1,4 @@ -#ifndef Py_CPYTHON_AUDIT_H +#ifndef _Py_CPYTHON_AUDIT_H # error "this header file must not be included directly" #endif diff --git a/Include/refcount.h b/Include/refcount.h index ebd1dba6d15e1a..034c453f449f5b 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -1,5 +1,5 @@ -#ifndef Py_REFCOUNT_H -#define Py_REFCOUNT_H +#ifndef _Py_REFCOUNT_H +#define _Py_REFCOUNT_H #ifdef __cplusplus extern "C" { #endif @@ -561,4 +561,4 @@ static inline PyObject* _Py_XNewRef(PyObject *obj) #ifdef __cplusplus } #endif -#endif // !Py_REFCOUNT_H +#endif // !_Py_REFCOUNT_H From ffb2a02f98d904505c8a82d8540c36dee4c67eed Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Thu, 26 Jun 2025 19:41:41 +0800 Subject: [PATCH 691/989] gh-135965: Delete duplicate word in isolating-extensions howto (#135964) Change use use to use. --- Doc/howto/isolating-extensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index b2109b1503992b..fbc426ba1d7d9a 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -453,7 +453,7 @@ Avoiding ``PyObject_New`` GC-tracked objects need to be allocated using GC-aware functions. -If you use use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: +If you use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: - Get and call type's :c:member:`~PyTypeObject.tp_alloc` slot, if possible. That is, replace ``TYPE *o = PyObject_New(TYPE, typeobj)`` with:: From fb9e292919d82326acea456aa071c9af6aff5626 Mon Sep 17 00:00:00 2001 From: Dylan Date: Thu, 26 Jun 2025 08:02:50 -0500 Subject: [PATCH 692/989] gh-129958: New syntax error in format spec applies to both f-strings and t-strings (#135570) Co-authored-by: Tomas R. Co-authored-by: Lysandros Nikolaou --- Lib/test/test_tstring.py | 1 + .../2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst | 2 ++ Parser/lexer/lexer.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst diff --git a/Lib/test/test_tstring.py b/Lib/test/test_tstring.py index e72a1ea54176d5..aabae38556735b 100644 --- a/Lib/test/test_tstring.py +++ b/Lib/test/test_tstring.py @@ -219,6 +219,7 @@ def test_syntax_errors(self): ("t'{lambda:1}'", "t-string: lambda expressions are not allowed " "without parentheses"), ("t'{x:{;}}'", "t-string: expecting a valid expression after '{'"), + ("t'{1:d\n}'", "t-string: newlines are not allowed in format specifiers") ): with self.subTest(case), self.assertRaisesRegex(SyntaxError, err): eval(case) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst new file mode 100644 index 00000000000000..70b3e99425df14 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst @@ -0,0 +1,2 @@ +Differentiate between t-strings and f-strings in syntax error for newlines +in format specifiers of single-quoted interpolated strings. diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 4d10bccf0a53f2..0a078dd594148c 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -1421,7 +1421,8 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct return MAKE_TOKEN( _PyTokenizer_syntaxerror( tok, - "f-string: newlines are not allowed in format specifiers for single quoted f-strings" + "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings", + TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok) ) ); } From 0d76dccc3b4376ba075a1737f58809e3d83aaaa3 Mon Sep 17 00:00:00 2001 From: Connor Denihan <188690869+cdenihan@users.noreply.github.com> Date: Thu, 26 Jun 2025 09:27:25 -0400 Subject: [PATCH 693/989] gh-135110: Fix misleading `generator.close()` documentation (GH-135152) The documentation incorrectly stated that generator.close() 'raises' a GeneratorExit exception. This was misleading because the method doesn't raise the exception to the caller - it sends the exception internally to the generator and returns None. --- Doc/howto/functional.rst | 2 +- Doc/reference/expressions.rst | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index b4f3463afee812..78e56e0c64fb10 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -602,7 +602,7 @@ generators: raise an exception inside the generator; the exception is raised by the ``yield`` expression where the generator's execution is paused. -* :meth:`~generator.close` raises a :exc:`GeneratorExit` exception inside the +* :meth:`~generator.close` sends a :exc:`GeneratorExit` exception to the generator to terminate the iteration. On receiving this exception, the generator's code must either raise :exc:`GeneratorExit` or :exc:`StopIteration`; catching the exception and doing anything else is diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 17f39aaf5f57cd..24544a055c3ed2 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -625,8 +625,10 @@ is already executing raises a :exc:`ValueError` exception. .. method:: generator.close() - Raises a :exc:`GeneratorExit` at the point where the generator function was - paused. If the generator function catches the exception and returns a + Raises a :exc:`GeneratorExit` exception at the point where the generator + function was paused (equivalent to calling ``throw(GeneratorExit)``). + The exception is raised by the yield expression where the generator was paused. + If the generator function catches the exception and returns a value, this value is returned from :meth:`close`. If the generator function is already closed, or raises :exc:`GeneratorExit` (by not catching the exception), :meth:`close` returns :const:`None`. If the generator yields a From a4625d597f9fc2d083fbb9c22d3ffcec73b2061a Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Fri, 27 Jun 2025 02:18:32 +1200 Subject: [PATCH 694/989] gh-91555: add warning to docs about possibility of deadlock/infinite recursion (GH-135954) * gh-91555: add warning to docs about possibility of deadlock/infinite recursion Attempt to clarify in the documentation that care must be taken when using multiprocessing classes to implement logging since they have builtin internal logging, and hence may cause deadlock/infinite recursion. * Update Doc/library/logging.handlers.rst Co-authored-by: Vinay Sajip * Change whitespace. --------- Co-authored-by: Vinay Sajip --- Doc/library/logging.handlers.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index 8f3aa1dfdd0cde..d74ef73ee28497 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -1059,6 +1059,15 @@ possible, while any potentially slow operations (such as sending an email via .. note:: If you are using :mod:`multiprocessing`, you should avoid using :class:`~queue.SimpleQueue` and instead use :class:`multiprocessing.Queue`. + .. warning:: + + The :mod:`multiprocessing` module uses an internal logger created and + accessed via :meth:`~multiprocessing.get_logger`. + :class:`multiprocessing.Queue` will log ``DEBUG`` level messages upon + items being queued. If those log messages are processed by a + :class:`QueueHandler` using the same :class:`multiprocessing.Queue` instance, + it will cause a deadlock or infinite recursion. + .. method:: emit(record) Enqueues the result of preparing the LogRecord. Should an exception From 8594d2c03dccd9731e9fc83a9fe6a19a3a090cb7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 26 Jun 2025 18:11:49 +0200 Subject: [PATCH 695/989] gh-135927: Check _MSC_VER to define _Py_NULL macro (#135987) --- Include/pyport.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/pyport.h b/Include/pyport.h index 73f071c41a6687..0675294d5bc3b1 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -49,9 +49,9 @@ // Static inline functions should use _Py_NULL rather than using directly NULL // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, // _Py_NULL is defined as nullptr. -#if (defined(__GNUC__) || defined(__clang__)) && \ - (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ - || (defined(__cplusplus) && __cplusplus >= 201103) +#if !defined(_MSC_VER) && \ + ((defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ + || (defined(__cplusplus) && __cplusplus >= 201103)) # define _Py_NULL nullptr #else # define _Py_NULL NULL From 642e5dfc74310d15bb81f8e94167590380a5fbfb Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 26 Jun 2025 21:20:07 +0100 Subject: [PATCH 696/989] IDLE: Update NEWS2x.txt with 2.7.0 release date (#129908) --- Lib/idlelib/NEWS2x.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/idlelib/NEWS2x.txt b/Lib/idlelib/NEWS2x.txt index 6751ca5f111b18..3721193007e59b 100644 --- a/Lib/idlelib/NEWS2x.txt +++ b/Lib/idlelib/NEWS2x.txt @@ -1,6 +1,6 @@ What's New in IDLE 2.7? (Merged into 3.1 before 2.7 release.) ======================= -*Release date: XX-XXX-2010* +*Release date: 03-Jul-2010* - idle.py modified and simplified to better support developing experimental versions of IDLE which are not installed in the standard location. From 58a42dea97f4fa0df38ef4a95a2ede65e0549f71 Mon Sep 17 00:00:00 2001 From: Nathan Korth Date: Thu, 26 Jun 2025 18:35:45 -0400 Subject: [PATCH 697/989] gh-135995: Fix missing char in palmos encoding (#135990) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0x8b correctly encodes to ‹, but 0x9b was mistakenly marked as a control character instead of ›. --------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Brian Schubert Co-authored-by: Terry Jan Reedy --- Lib/encodings/palmos.py | 2 +- .../next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst diff --git a/Lib/encodings/palmos.py b/Lib/encodings/palmos.py index c506d654523496..df164ca5b9549c 100644 --- a/Lib/encodings/palmos.py +++ b/Lib/encodings/palmos.py @@ -201,7 +201,7 @@ def getregentry(): '\u02dc' # 0x98 -> SMALL TILDE '\u2122' # 0x99 -> TRADE MARK SIGN '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - '\x9b' # 0x9B -> + '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE '\x9d' # 0x9D -> '\x9e' # 0x9E -> diff --git a/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst b/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst new file mode 100644 index 00000000000000..998b3cd85b1d5d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst @@ -0,0 +1 @@ +In the palmos encoding, make byte ``0x9b`` decode to ``›`` (U+203A - SINGLE RIGHT-POINTING ANGLE QUOTATION MARK). From 34ce1920ca33c11ca2c379ed0ef30a91010bef4f Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Thu, 26 Jun 2025 20:00:19 -0400 Subject: [PATCH 698/989] Docs: Fix duplicate word typos (GH-135958) --- Doc/c-api/init.rst | 2 +- Doc/c-api/long.rst | 2 +- Doc/extending/newtypes_tutorial.rst | 2 +- Doc/library/ctypes.rst | 2 +- Doc/library/email.header.rst | 4 ++-- Doc/library/exceptions.rst | 2 +- Doc/library/faulthandler.rst | 2 +- Doc/library/mmap.rst | 2 +- Doc/library/pathlib.rst | 2 +- Doc/library/socketserver.rst | 2 +- Doc/library/threading.rst | 2 +- Doc/using/cmdline.rst | 4 ++-- Doc/whatsnew/3.13.rst | 2 +- 13 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 3106bf9808f254..41fd4ea14ef12f 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -1250,7 +1250,7 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. c:function:: void PyInterpreterState_Clear(PyInterpreterState *interp) Reset all information in an interpreter state object. There must be - an :term:`attached thread state` for the the interpreter. + an :term:`attached thread state` for the interpreter. .. audit-event:: cpython.PyInterpreterState_Clear "" c.PyInterpreterState_Clear diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 25d9e62e387279..2d0bda76697e81 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -439,7 +439,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. All *n_bytes* of the buffer are written: large buffers are padded with zeroes. - If the returned value is greater than than *n_bytes*, the value was + If the returned value is greater than *n_bytes*, the value was truncated: as many of the lowest bits of the value as could fit are written, and the higher bits are ignored. This matches the typical behavior of a C-style downcast. diff --git a/Doc/extending/newtypes_tutorial.rst b/Doc/extending/newtypes_tutorial.rst index f14690de4f86e8..3bbee33bd50698 100644 --- a/Doc/extending/newtypes_tutorial.rst +++ b/Doc/extending/newtypes_tutorial.rst @@ -277,7 +277,7 @@ be an instance of a subclass. The explicit cast to ``CustomObject *`` above is needed because we defined ``Custom_dealloc`` to take a ``PyObject *`` argument, as the ``tp_dealloc`` function pointer expects to receive a ``PyObject *`` argument. - By assigning to the the ``tp_dealloc`` slot of a type, we declare + By assigning to the ``tp_dealloc`` slot of a type, we declare that it can only be called with instances of our ``CustomObject`` class, so the cast to ``(CustomObject *)`` is safe. This is object-oriented polymorphism, in C! diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index e00fe9c8145f12..846cece3761858 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -2965,7 +2965,7 @@ fields, or any other data types containing pointer type fields. .. attribute:: is_anonymous True if this field is anonymous, that is, it contains nested sub-fields - that should be be merged into a containing structure or union. + that should be merged into a containing structure or union. .. _ctypes-arrays-pointers: diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst index c3392a62b8ee79..f49885b8785235 100644 --- a/Doc/library/email.header.rst +++ b/Doc/library/email.header.rst @@ -206,7 +206,7 @@ The :mod:`email.header` module also provides the following convenient functions. .. note:: - This function exists for for backwards compatibility only. For + This function exists for backwards compatibility only. For new code, we recommend using :class:`email.headerregistry.HeaderRegistry`. @@ -225,5 +225,5 @@ The :mod:`email.header` module also provides the following convenient functions. .. note:: - This function exists for for backwards compatibility only, and is + This function exists for backwards compatibility only, and is not recommended for use in new code. diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index bb72032891ea98..9806ae80905ca0 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -1048,7 +1048,7 @@ their subgroups based on the types of the contained exceptions. subclasses that need a different constructor signature need to override that rather than :meth:`~object.__init__`. For example, the following defines an exception group subclass which accepts an exit_code and - and constructs the group's message from it. :: + constructs the group's message from it. :: class Errors(ExceptionGroup): def __new__(cls, errors, exit_code): diff --git a/Doc/library/faulthandler.rst b/Doc/library/faulthandler.rst index 5058b85bffb15c..1977f4d3ba3916 100644 --- a/Doc/library/faulthandler.rst +++ b/Doc/library/faulthandler.rst @@ -90,7 +90,7 @@ An error will be printed instead of the stack. Additionally, some compilers do not support :term:`CPython's ` implementation of C stack dumps. As a result, a different error may be printed -instead of the stack, even if the the operating system supports dumping stacks. +instead of the stack, even if the operating system supports dumping stacks. .. note:: diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst index 4e20c07331a220..8fca79b23e4e15 100644 --- a/Doc/library/mmap.rst +++ b/Doc/library/mmap.rst @@ -269,7 +269,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length Resizing a map created with *access* of :const:`ACCESS_READ` or :const:`ACCESS_COPY`, will raise a :exc:`TypeError` exception. - Resizing a map created with with *trackfd* set to ``False``, + Resizing a map created with *trackfd* set to ``False``, will raise a :exc:`ValueError` exception. **On Windows**: Resizing the map will raise an :exc:`OSError` if there are other diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 86351e65dc4ed6..47986a2d9602ee 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1985,7 +1985,7 @@ The :mod:`pathlib.types` module provides types for static type checking. If *follow_symlinks* is ``False``, return ``True`` only if the path is a file (without following symlinks); return ``False`` if the path - is a directory or other other non-file, or if it doesn't exist. + is a directory or other non-file, or if it doesn't exist. .. method:: is_symlink() diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 753f12460b824b..7fb629f7d2f256 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -543,7 +543,7 @@ objects that simplify communication by providing the standard file interface):: The difference is that the ``readline()`` call in the second handler will call ``recv()`` multiple times until it encounters a newline character, while the -the first handler had to use a ``recv()`` loop to accumulate data until a +first handler had to use a ``recv()`` loop to accumulate data until a newline itself. If it had just used a single ``recv()`` without the loop it would just have returned what has been received so far from the client. TCP is stream based: data arrives in the order it was sent, but there no diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index 52fefd590daf18..cabb41442f8419 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -621,7 +621,7 @@ since it is impossible to detect the termination of alien threads. an error to :meth:`~Thread.join` a thread before it has been started and attempts to do so raise the same exception. - If an attempt is made to join a running daemonic thread in in late stages + If an attempt is made to join a running daemonic thread in late stages of :term:`Python finalization ` :meth:`!join` raises a :exc:`PythonFinalizationError`. diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index a5867b489e0053..cad49e2deeb46f 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -653,7 +653,7 @@ Miscellaneous options .. versionadded:: 3.13 * :samp:`-X thread_inherit_context={0,1}` causes :class:`~threading.Thread` - to, by default, use a copy of context of of the caller of + to, by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, threads will start with an empty context. If unset, the value of this option defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also @@ -1284,7 +1284,7 @@ conflict. .. envvar:: PYTHON_THREAD_INHERIT_CONTEXT If this variable is set to ``1`` then :class:`~threading.Thread` will, - by default, use a copy of context of of the caller of ``Thread.start()`` + by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, new threads will start with an empty context. If unset, this variable defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also :option:`-X thread_inherit_context<-X>`. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 580a3d8154dee1..ef7c36d8539dfd 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1996,7 +1996,7 @@ New Deprecations (Contributed by Alex Waygood in :gh:`105566` and :gh:`105570`.) * Deprecate the :func:`typing.no_type_check_decorator` decorator function, - to be removed in in Python 3.15. + to be removed in Python 3.15. After eight years in the :mod:`typing` module, it has yet to be supported by any major type checker. (Contributed by Alex Waygood in :gh:`106309`.) From b38810bab76c11ea09260a817b3354aebc2af580 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Fri, 27 Jun 2025 12:46:49 +0800 Subject: [PATCH 699/989] gh-135966: Modify iOS testbed to make app_packages a site directory (#135967) The iOS testbed now treats the app_packages folder as a site folder. This ensures it is on the path, but also ensures any .pth files are processed on app startup. --- Doc/using/ios.rst | 11 ++-- ...-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst | 1 + iOS/testbed/iOSTestbedTests/iOSTestbedTests.m | 51 +++++++++++++++---- 3 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst diff --git a/Doc/using/ios.rst b/Doc/using/ios.rst index 7d5c6331bef5ce..0fb28f8c866b02 100644 --- a/Doc/using/ios.rst +++ b/Doc/using/ios.rst @@ -298,9 +298,9 @@ To add Python to an iOS Xcode project: * Signal handlers (:c:member:`PyConfig.install_signal_handlers`) are *enabled*; * System logging (:c:member:`PyConfig.use_system_logger`) is *enabled* (optional, but strongly recommended; this is enabled by default); - * ``PYTHONHOME`` for the interpreter is configured to point at the + * :envvar:`PYTHONHOME` for the interpreter is configured to point at the ``python`` subfolder of your app's bundle; and - * The ``PYTHONPATH`` for the interpreter includes: + * The :envvar:`PYTHONPATH` for the interpreter includes: - the ``python/lib/python3.X`` subfolder of your app's bundle, - the ``python/lib/python3.X/lib-dynload`` subfolder of your app's bundle, and @@ -324,7 +324,12 @@ modules in your app, some additional steps will be required: the ``lib-dynload`` folder can be copied and adapted for this purpose. * If you're using a separate folder for third-party packages, ensure that folder - is included as part of the ``PYTHONPATH`` configuration in step 10. + is included as part of the :envvar:`PYTHONPATH` configuration in step 10. + +* If any of the folders that contain third-party packages will contain ``.pth`` + files, you should add that folder as a *site directory* (using + :meth:`site.addsitedir`), rather than adding to :envvar:`PYTHONPATH` or + :attr:`sys.path` directly. Testing a Python package ------------------------ diff --git a/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst b/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst new file mode 100644 index 00000000000000..8dc007431f3919 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst @@ -0,0 +1 @@ +The iOS testbed now handles the ``app_packages`` folder as a site directory. diff --git a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m index dd6e76f9496fe0..b502a6eb277b0b 100644 --- a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m +++ b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m @@ -15,6 +15,11 @@ - (void)testPython { PyStatus status; PyPreConfig preconfig; PyConfig config; + PyObject *app_packages_path; + PyObject *method_args; + PyObject *result; + PyObject *site_module; + PyObject *site_addsitedir_attr; PyObject *sys_module; PyObject *sys_path_attr; NSArray *test_args; @@ -111,29 +116,55 @@ - (void)testPython { return; } - sys_module = PyImport_ImportModule("sys"); - if (sys_module == NULL) { - XCTFail(@"Could not import sys module"); + // Add app_packages as a site directory. This both adds to sys.path, + // and ensures that any .pth files in that directory will be executed. + site_module = PyImport_ImportModule("site"); + if (site_module == NULL) { + XCTFail(@"Could not import site module"); return; } - sys_path_attr = PyObject_GetAttrString(sys_module, "path"); - if (sys_path_attr == NULL) { - XCTFail(@"Could not access sys.path"); + site_addsitedir_attr = PyObject_GetAttrString(site_module, "addsitedir"); + if (site_addsitedir_attr == NULL || !PyCallable_Check(site_addsitedir_attr)) { + XCTFail(@"Could not access site.addsitedir"); return; } - // Add the app packages path path = [NSString stringWithFormat:@"%@/app_packages", resourcePath, nil]; NSLog(@"App packages path: %@", path); wtmp_str = Py_DecodeLocale([path UTF8String], NULL); - failed = PyList_Insert(sys_path_attr, 0, PyUnicode_FromString([path UTF8String])); - if (failed) { - XCTFail(@"Unable to add app packages to sys.path"); + app_packages_path = PyUnicode_FromWideChar(wtmp_str, wcslen(wtmp_str)); + if (app_packages_path == NULL) { + XCTFail(@"Could not convert app_packages path to unicode"); return; } PyMem_RawFree(wtmp_str); + method_args = Py_BuildValue("(O)", app_packages_path); + if (method_args == NULL) { + XCTFail(@"Could not create arguments for site.addsitedir"); + return; + } + + result = PyObject_CallObject(site_addsitedir_attr, method_args); + if (result == NULL) { + XCTFail(@"Could not add app_packages directory using site.addsitedir"); + return; + } + + // Add test code to sys.path + sys_module = PyImport_ImportModule("sys"); + if (sys_module == NULL) { + XCTFail(@"Could not import sys module"); + return; + } + + sys_path_attr = PyObject_GetAttrString(sys_module, "path"); + if (sys_path_attr == NULL) { + XCTFail(@"Could not access sys.path"); + return; + } + path = [NSString stringWithFormat:@"%@/app", resourcePath, nil]; NSLog(@"App path: %@", path); wtmp_str = Py_DecodeLocale([path UTF8String], NULL); From 2fc68e180ffdb31886938203e89a75b220a58cec Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Thu, 26 Jun 2025 23:48:05 -0500 Subject: [PATCH 700/989] gh-135551: Change how sorting picks minimum run length (#135553) New scheme from Stefan Pochmann for picking minimum run lengths. By allowing them to change a little from one run to the next, it's possible to arrange for that all merges, at all levels, strongly tend to be as evenly balanced as possible, for randomly ordered data. Meaning the number of initial runs is a power of 2, and all merges involve runs whose lengths differ by no more than 1. --- Misc/ACKS | 1 + ...-06-16-03-56-15.gh-issue-135551.hRTQO-.rst | 1 + Objects/listobject.c | 48 +++-- Objects/listsort.txt | 175 ++++++++++++++++-- 4 files changed, 184 insertions(+), 41 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst diff --git a/Misc/ACKS b/Misc/ACKS index 74cf29cdbc552f..6ab50763feadd9 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1481,6 +1481,7 @@ Jean-François Piéronne Oleg Plakhotnyuk Anatoliy Platonov Marcel Plch +Stefan Pochmann Kirill Podoprigora Remi Pointel Jon Poler diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst new file mode 100644 index 00000000000000..22dda2a3e972a8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst @@ -0,0 +1 @@ +Sorting randomly ordered lists will often run a bit faster, thanks to a new scheme for picking minimum run lengths from Stefan Pochmann, which arranges for the merge tree to be as evenly balanced as is possible. diff --git a/Objects/listobject.c b/Objects/listobject.c index 23d3472b6d4153..1b36f4c25abf4d 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1685,10 +1685,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) /* Avoid malloc for small temp arrays. */ #define MERGESTATE_TEMP_SIZE 256 -/* The largest value of minrun. This must be a power of 2, and >= 1, so that - * the compute_minrun() algorithm guarantees to return a result no larger than - * this, - */ +/* The largest value of minrun. This must be a power of 2, and >= 1 */ #define MAX_MINRUN 64 #if ((MAX_MINRUN) < 1) || ((MAX_MINRUN) & ((MAX_MINRUN) - 1)) #error "MAX_MINRUN must be a power of 2, and >= 1" @@ -1749,6 +1746,11 @@ struct s_MergeState { * of tuples. It may be set to safe_object_compare, but the idea is that hopefully * we can assume more, and use one of the special-case compares. */ int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *); + + /* Varisbles used for minrun computation. The "ideal" minrun length is + * the infinite precision listlen / 2**e. See listsort.txt. + */ + Py_ssize_t mr_current, mr_e, mr_mask; }; /* binarysort is the best method for sorting small arrays: it does few @@ -2210,6 +2212,14 @@ merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc, ms->min_gallop = MIN_GALLOP; ms->listlen = list_size; ms->basekeys = lo->keys; + + /* State for generating minrun values. See listsort.txt. */ + ms->mr_e = 0; + while (list_size >> ms->mr_e >= MAX_MINRUN) { + ++ms->mr_e; + } + ms->mr_mask = (1 << ms->mr_e) - 1; + ms->mr_current = 0; } /* Free all the temp memory owned by the MergeState. This must be called @@ -2687,27 +2697,15 @@ merge_force_collapse(MergeState *ms) return 0; } -/* Compute a good value for the minimum run length; natural runs shorter - * than this are boosted artificially via binary insertion. - * - * If n < MAX_MINRUN return n (it's too small to bother with fancy stuff). - * Else if n is an exact power of 2, return MAX_MINRUN / 2. - * Else return an int k, MAX_MINRUN / 2 <= k <= MAX_MINRUN, such that n/k is - * close to, but strictly less than, an exact power of 2. - * - * See listsort.txt for more info. - */ -static Py_ssize_t -merge_compute_minrun(Py_ssize_t n) +/* Return the next minrun value to use. See listsort.txt. */ +Py_LOCAL_INLINE(Py_ssize_t) +minrun_next(MergeState *ms) { - Py_ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */ - - assert(n >= 0); - while (n >= MAX_MINRUN) { - r |= n & 1; - n >>= 1; - } - return n + r; + ms->mr_current += ms->listlen; + assert(ms->mr_current >= 0); /* no overflow */ + Py_ssize_t result = ms->mr_current >> ms->mr_e; + ms->mr_current &= ms->mr_mask; + return result; } /* Here we define custom comparison functions to optimize for the cases one commonly @@ -3075,7 +3073,6 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* March over the array once, left to right, finding natural runs, * and extending short natural runs to minrun elements. */ - minrun = merge_compute_minrun(nremaining); do { Py_ssize_t n; @@ -3084,6 +3081,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (n < 0) goto fail; /* If short, extend to min(minrun, nremaining). */ + minrun = minrun_next(&ms); if (n < minrun) { const Py_ssize_t force = nremaining <= minrun ? nremaining : minrun; diff --git a/Objects/listsort.txt b/Objects/listsort.txt index f387d9c116e502..5b2fc7d50a25ca 100644 --- a/Objects/listsort.txt +++ b/Objects/listsort.txt @@ -270,8 +270,8 @@ result. This has two primary good effects: Computing minrun ---------------- -If N < MAX_MINRUN, minrun is N. IOW, binary insertion sort is used for the -whole array then; it's hard to beat that given the overheads of trying +If N < MAX_MINRUN, minrun is N. IOW, binary insertion sort is used for the +whole array then; it's hard to beat that given the overheads of trying something fancier (see note BINSORT). When N is a power of 2, testing on random data showed that minrun values of @@ -288,7 +288,6 @@ that 32 isn't a good choice for the general case! Consider N=2112: >>> divmod(2112, 32) (66, 0) ->>> If the data is randomly ordered, we're very likely to end up with 66 runs each of length 32. The first 64 of these trigger a sequence of perfectly @@ -301,22 +300,94 @@ to get 64 elements into place). If we take minrun=33 in this case, then we're very likely to end up with 64 runs each of length 33, and then all merges are perfectly balanced. Better! -What we want to avoid is picking minrun such that in +The original code used a cheap heuristic to pick a minrun that avoided the +very worst cases of imbalance for the final merge, but "pretty bad" cases +still existed. - q, r = divmod(N, minrun) +In 2025, Stefan Pochmann found a much better approach, based on letting minrun +vary a bit from one run to the next. Under his scheme, at _all_ levels of the +merge tree: -q is a power of 2 and r>0 (then the last merge only gets r elements into -place, and r < minrun is small compared to N), or q a little larger than a -power of 2 regardless of r (then we've got a case similar to "2112", again -leaving too little work for the last merge to do). +- The number of runs is a power of 2. +- At most two different run lengths appear. +- When two do appear, the smaller is one less than the larger. +- The lengths of run pairs merged never differ by more than one. -Instead we pick a minrun in range(MAX_MINRUN / 2, MAX_MINRUN + 1) such that -N/minrun is exactly a power of 2, or if that isn't possible, is close to, but -strictly less than, a power of 2. This is easier to do than it may sound: -take the first log2(MAX_MINRUN) bits of N, and add 1 if any of the remaining -bits are set. In fact, that rule covers every case in this section, including -small N and exact powers of 2; merge_compute_minrun() is a deceptively simple -function. +So, in all respects, as perfectly balanced as possible. + +For the 2112 case, that also keeps minrun at 33, but we were lucky there +that 2112 is 33 times a power of 2. The new approach doesn't rely on luck. + +For example, with 315 random elements, the old scheme uses fixed minrun=40 and +produces runs of length 40, except for the last. The new scheme produces a +mix of lengths 39 and 40: + +old: 40 40 40 40 40 40 40 35 +new: 39 39 40 39 39 40 39 40 + +Both schemes produce eight runs, a power of 2. That's good for a balanced +merge tree. But the new scheme allows merges where left and right length +never differ by more than 1: + +39 39 40 39 39 40 39 40 + 78 79 79 79 + 157 158 + 315 + +(This shows merges downward, e.g., two runs of length 39 are merged and +become a run of length 78.) + +With larger lists, the old scheme can get even more unbalanced. For example, +with 32769 elements (that's 2**15 + 1), it uses minrun=33 and produces 993 +runs (of length 33). That's not even a power of 2. The new scheme instead +produces 1024 runs, all with length 32 except for the last one with length 33. + +How does it work? Ideally, all runs would be exactly equally long. For the +above example, each run would have 315/8 = 39.375 elements. Which of course +doesn't work. But we can get close: + +For the first run, we'd like 39.375 elements. Since that's impossible, we +instead use 39 (the floor) and remember the current leftover fraction 0.375. +For the second run, we add 0.375 + 39.375 = 39.75. Again impossible, so we +instead use 39 and remember 0.75. For the third run, we add 0.75 + 39.375 = +40.125. This time we get 40 and remember 0.125. And so on. Here's a Python +generator doing that: + +def gen_minruns_with_floats(n): + mr = n + while mr >= MAX_MINRUN: + mr /= 2 + + mr_current = 0 + while True: + mr_current += mr + yield int(mr_current) + mr_current %= 1 + +But while all arithmetic here can be done exactly using binery floating point, +floats have less precision that a Py_ssize_t, and mixing floats with ints is +needlessly expensive anyway. + +So here's an integer version, where the internal numbers are scaled up by +2**e, or rather not divided by 2**e. Instead, only each yielded minrun gets +divided (by right-shifting). For example instead of adding 39.375 and +reducing modulo 1, it just adds 315 and reduces modulo 8. And always divides +by 8 to get each actual minrun value: + +def gen_minruns_simpler(n): + e = 0 + while (n >> e) >= MAX_MINRUN: + e += 1 + mask = (1 << e) - 1 + + mr_current = 0 + while True: + mr_current += n + yield mr_current >> e + mr_current &= mask + +See note MINRUN CODE for a full implementation and a driver that exhaustively +verifies the claims above for all list lengths through 2 million. The Merge Pattern @@ -820,3 +891,75 @@ partially mitigated by pre-scanning the data to determine whether the data is homogeneous with respect to type. If so, it is sometimes possible to substitute faster type-specific comparisons for the slower, generic PyObject_RichCompareBool. + +MINRUN CODE +from itertools import accumulate +try: + from itertools import batched +except ImportError: + from itertools import islice + def batched(xs, k): + it = iter(xs) + while chunk := tuple(islice(it, k)): + yield chunk + +MAX_MINRUN = 64 + +def gen_minruns(n): + # In listobject.c, initialization is done in merge_init(), and + # the body of the loop in minrun_next(). + mr_e = 0 + while (n >> mr_e) >= MAX_MINRUN: + mr_e += 1 + mr_mask = (1 << mr_e) - 1 + + mr_current = 0 + while True: + mr_current += n + yield mr_current >> mr_e + mr_current &= mr_mask + +def chew(n, show=False): + if n < 1: + return + + sizes = [] + tot = 0 + for size in gen_minruns(n): + sizes.append(size) + tot += size + if tot >= n: + break + assert tot == n + print(n, len(sizes)) + + small, large = MAX_MINRUN // 2, MAX_MINRUN + while len(sizes) > 1: + assert not len(sizes) & 1 + assert len(sizes).bit_count() == 1 # i.e., power of 2 + assert sum(sizes) == n + assert min(sizes) >= min(n, small) + assert max(sizes) <= large + + d = set(sizes) + assert len(d) <= 2 + if len(d) == 2: + lo, hi = sorted(d) + assert lo + 1 == hi + + mr = n / len(sizes) + for i, s in enumerate(accumulate(sizes, initial=0)): + assert int(mr * i) == s + + newsizes = [] + for a, b in batched(sizes, 2): + assert abs(a - b) <= 1 + newsizes.append(a + b) + sizes = newsizes + smsll = large + large *= 2 + + assert sizes[0] == n + +for n in range(2_000_001): + chew(n) \ No newline at end of file From 0c6c09b7377e10dcf80844c961b578fbdc6f5375 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Fri, 27 Jun 2025 12:58:20 +0800 Subject: [PATCH 701/989] gh-135968: Add iOS binary stubs for strip (#135970) Adds iOS binary stubs for invoking `strip` --- .../Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst | 1 + iOS/Resources/bin/arm64-apple-ios-simulator-strip | 2 ++ iOS/Resources/bin/arm64-apple-ios-strip | 2 ++ iOS/Resources/bin/x86_64-apple-ios-simulator-strip | 2 ++ 4 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst create mode 100755 iOS/Resources/bin/arm64-apple-ios-simulator-strip create mode 100755 iOS/Resources/bin/arm64-apple-ios-strip create mode 100755 iOS/Resources/bin/x86_64-apple-ios-simulator-strip diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst b/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst new file mode 100644 index 00000000000000..1c0b3825c71c1d --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst @@ -0,0 +1 @@ +Stubs for ``strip`` are now provided as part of an iOS install. diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-strip b/iOS/Resources/bin/arm64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..fd59d309b73a20 --- /dev/null +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-strip b/iOS/Resources/bin/arm64-apple-ios-strip new file mode 100755 index 00000000000000..75e823a3d02d61 --- /dev/null +++ b/iOS/Resources/bin/arm64-apple-ios-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphoneos${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-strip b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..c5cfb28929195a --- /dev/null +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch x86_64 "$@" From 07183ebce36462aaaea4d20e0502b20821dd2682 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 10:50:59 +0300 Subject: [PATCH 702/989] gh-53203: Fix strptime() for %c, %x and %X formats on some locales (#135971) * Add detection of decimal non-ASCII alt digits. * Add support of non-decimal alt digits on locale lzh_TW. * Accept only numbers in correct range if alt digits are known. * Fix bug in detecting the position of the week day name on locales byn_ER and wal_ET. * Fix support of single-digit hour on locales ar_SA and bg_BG. * Add support for %T, %R, %r, %C, %OC. * Prepare code to use nl_langinfo(). --- Lib/_strptime.py | 190 ++++++++++++++---- Lib/test/test_strptime.py | 28 ++- ...5-06-26-11-52-40.gh-issue-53203.TMigBr.rst | 2 + 3 files changed, 164 insertions(+), 56 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index ae67949626d460..7ac6f36360cb69 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -14,6 +14,7 @@ import time import locale import calendar +import re from re import compile as re_compile from re import sub as re_sub from re import IGNORECASE @@ -41,6 +42,21 @@ def _findall(haystack, needle): yield i i += len(needle) + +lzh_TW_alt_digits = ( + # 〇:一:二:三:四:五:六:七:八:九 + '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db', + '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d', + # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九 + '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db', + '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d', + # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九 + '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db', + '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d', + # 卅:卅一 + '\u5345', '\u5345\u4e00') + + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -84,6 +100,7 @@ def __init__(self): self.__calc_weekday() self.__calc_month() self.__calc_am_pm() + self.__calc_alt_digits() self.__calc_timezone() self.__calc_date_time() if _getlang() != self.lang: @@ -119,9 +136,43 @@ def __calc_am_pm(self): am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm + def __calc_alt_digits(self): + # Set self.LC_alt_digits by using time.strftime(). + + # The magic data should contain all decimal digits. + time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0)) + s = time.strftime("%x%X", time_tuple) + if s.isascii(): + # Fast path -- all digits are ASCII. + self.LC_alt_digits = () + return + + digits = ''.join(sorted(set(re.findall(r'\d', s)))) + if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9: + # All 10 decimal digits from the same set. + if digits.isascii(): + # All digits are ASCII. + self.LC_alt_digits = () + return + + self.LC_alt_digits = [a + b for a in digits for b in digits] + # Test whether the numbers contain leading zero. + time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0)) + if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2): + self.LC_alt_digits[:10] = digits + return + + # Either non-Gregorian calendar or non-decimal numbers. + if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s): + # lzh_TW + self.LC_alt_digits = lzh_TW_alt_digits + return + + self.LC_alt_digits = None + def __calc_date_time(self): - # Set self.date_time, self.date, & self.time by using - # time.strftime(). + # Set self.LC_date_time, self.LC_date, self.LC_time and + # self.LC_time_ampm by using time.strftime(). # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of # overloaded numbers is minimized. The order in which searches for @@ -129,26 +180,32 @@ def __calc_date_time(self): # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) - replacement_pairs = [ + replacement_pairs = [] + + # Non-ASCII digits + if self.LC_alt_digits or self.LC_alt_digits is None: + for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'), + (44, '%OM'), (55, '%OS'), (17, '%Od'), + (3, '%Om'), (2, '%Ow'), (10, '%OI')]: + if self.LC_alt_digits is None: + s = chr(0x660 + n // 10) + chr(0x660 + n % 10) + replacement_pairs.append((s, d)) + if n < 10: + replacement_pairs.append((s[1], d)) + elif len(self.LC_alt_digits) > n: + replacement_pairs.append((self.LC_alt_digits[n], d)) + else: + replacement_pairs.append((time.strftime(d, time_tuple), d)) + replacement_pairs += [ ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I'), - # Non-ASCII digits - ('\u0661\u0669\u0669\u0669', '%Y'), - ('\u0669\u0669', '%Oy'), - ('\u0662\u0662', '%OH'), - ('\u0664\u0664', '%OM'), - ('\u0665\u0665', '%OS'), - ('\u0661\u0667', '%Od'), - ('\u0660\u0663', '%Om'), - ('\u0663', '%Om'), - ('\u0662', '%Ow'), - ('\u0661\u0660', '%OI'), ] + date_time = [] - for directive in ('%c', '%x', '%X'): + for directive in ('%c', '%x', '%X', '%r'): current_format = time.strftime(directive, time_tuple).lower() current_format = current_format.replace('%', '%%') # The month and the day of the week formats are treated specially @@ -172,9 +229,10 @@ def __calc_date_time(self): if tz: current_format = current_format.replace(tz, "%Z") # Transform all non-ASCII digits to digits in range U+0660 to U+0669. - current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", - 'H': r"(?P2[0-3]|[0-1]\d|\d)", + 'H': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'k': r"(?P2[0-3]|[0-1]\d|\d| \d)", 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", + 'l': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -312,16 +373,49 @@ def __init__(self, locale_time=None): for tz in tz_names), 'Z'), '%': '%'} - for d in 'dmyHIMS': - mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d - mapping['Ow'] = r'(?P\d)' + if self.locale_time.LC_alt_digits is None: + for d in 'dmyCHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + else: + mapping.update({ + 'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd', + '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'), + 'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm', + '1[0-2]|0[1-9]|[1-9]'), + 'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w', + '[0-6]'), + 'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y', + '[0-9][0-9]'), + 'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C', + '[0-9][0-9]'), + 'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H', + '2[0-3]|[0-1][0-9]|[0-9]'), + 'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I', + '1[0-2]|0[1-9]|[1-9]'), + 'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M', + '[0-5][0-9]|[0-9]'), + 'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S', + '6[0-1]|[0-5][0-9]|[0-9]'), + }) + mapping.update({ + 'e': mapping['d'], + 'Oe': mapping['Od'], + 'P': mapping['p'], + 'Op': mapping['p'], + 'W': mapping['U'].replace('U', 'W'), + }) mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) + base.__setitem__('T', self.pattern('%H:%M:%S')) + base.__setitem__('R', self.pattern('%H:%M')) + base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm)) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) base.__setitem__('x', self.pattern(self.locale_time.LC_date)) base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - def __seqToRE(self, to_convert, directive): + def __seqToRE(self, to_convert, directive, altregex=None): """Convert a list to a regex string for matching a directive. Want possible matching values to be from longest to shortest. This @@ -337,8 +431,9 @@ def __seqToRE(self, to_convert, directive): else: return '' regex = '|'.join(re_escape(stuff) for stuff in to_convert) - regex = '(?P<%s>%s' % (directive, regex) - return '%s)' % regex + if altregex is not None: + regex += '|' + altregex + return '(?P<%s>%s)' % (directive, regex) def pattern(self, format): """Return regex pattern for the format string. @@ -365,7 +460,7 @@ def repl(m): nonlocal day_of_month_in_format day_of_month_in_format = True return self[format_char] - format = re_sub(r'%([OE]?\\?.?)', repl, format) + format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format) if day_of_month_in_format and not year_in_format: import warnings warnings.warn("""\ @@ -467,6 +562,15 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # values weekday = julian = None found_dict = found.groupdict() + if locale_time.LC_alt_digits: + def parse_int(s): + try: + return locale_time.LC_alt_digits.index(s) + except ValueError: + return int(s) + else: + parse_int = int + for group_key in found_dict.keys(): # Directives not explicitly handled below: # c, x, X @@ -474,30 +578,34 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # U, W # worthless without day of the week if group_key == 'y': - year = int(found_dict['y']) - # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 - if year <= 68: - year += 2000 + year = parse_int(found_dict['y']) + if 'C' in found_dict: + century = parse_int(found_dict['C']) + year += century * 100 else: - year += 1900 + # Open Group specification for strptime() states that a %y + #value in the range of [00, 68] is in the century 2000, while + #[69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 elif group_key == 'Y': year = int(found_dict['Y']) elif group_key == 'G': iso_year = int(found_dict['G']) elif group_key == 'm': - month = int(found_dict['m']) + month = parse_int(found_dict['m']) elif group_key == 'B': month = locale_time.f_month.index(found_dict['B'].lower()) elif group_key == 'b': month = locale_time.a_month.index(found_dict['b'].lower()) elif group_key == 'd': - day = int(found_dict['d']) + day = parse_int(found_dict['d']) elif group_key == 'H': - hour = int(found_dict['H']) + hour = parse_int(found_dict['H']) elif group_key == 'I': - hour = int(found_dict['I']) + hour = parse_int(found_dict['I']) ampm = found_dict.get('p', '').lower() # If there was no AM/PM indicator, we'll treat this like AM if ampm in ('', locale_time.am_pm[0]): @@ -513,9 +621,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): if hour != 12: hour += 12 elif group_key == 'M': - minute = int(found_dict['M']) + minute = parse_int(found_dict['M']) elif group_key == 'S': - second = int(found_dict['S']) + second = parse_int(found_dict['S']) elif group_key == 'f': s = found_dict['f'] # Pad to always return microseconds. diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 268230f6da78f8..e52c46f8c58cce 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -221,14 +221,16 @@ def test_ValueError(self): self.assertRaises(ValueError, _strptime._strptime_time, data_string="%d", format="%A") for bad_format in ("%", "% ", "%\n"): - with self.assertRaisesRegex(ValueError, "stray % in format "): + with (self.subTest(format=bad_format), + self.assertRaisesRegex(ValueError, "stray % in format ")): _strptime._strptime_time("2005", bad_format) - for bad_format in ("%e", "%Oe", "%O", "%O ", "%Ee", "%E", "%E ", - "%.", "%+", "%_", "%~", "%\\", + for bad_format in ("%i", "%Oi", "%O", "%O ", "%Ee", "%E", "%E ", + "%.", "%+", "%~", "%\\", "%O.", "%O+", "%O_", "%O~", "%O\\"): directive = bad_format[1:].rstrip() - with self.assertRaisesRegex(ValueError, - f"'{re.escape(directive)}' is a bad directive in format "): + with (self.subTest(format=bad_format), + self.assertRaisesRegex(ValueError, + f"'{re.escape(directive)}' is a bad directive in format ")): _strptime._strptime_time("2005", bad_format) msg_week_no_year_or_weekday = r"ISO week directive '%V' must be used with " \ @@ -480,13 +482,11 @@ def test_bad_timezone(self): # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # On Windows: ar_IN, ar_SA, fa_IR, ps_AF. - # - # BUG: Generates regexp that does not match the current date and time - # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', - 'my_MM', 'or_IN', 'shn_MM', 'az_IR') + 'my_MM', 'or_IN', 'shn_MM', 'az_IR', + 'byn_ER', 'wal_ET', 'lzh_TW') def test_date_time_locale(self): # Test %c directive loc = locale.getlocale(locale.LC_TIME)[0] @@ -525,11 +525,9 @@ def test_date_time_locale2(self): # NB: Does not roundtrip because use non-Gregorian calendar: # lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF. - # BUG: Generates regexp that does not match the current date - # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', - 'az_IR', 'my_MM', 'or_IN', 'shn_MM') + 'az_IR', 'my_MM', 'or_IN', 'shn_MM', 'lzh_TW') def test_date_locale(self): # Test %x directive now = time.time() @@ -546,7 +544,7 @@ def test_date_locale(self): # NB: Dates before 1969 do not roundtrip on many locales, including C. @unittest.skipIf(support.linked_to_musl(), "musl libc issue, bpo-46390") @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM') + 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM', 'lzh_TW') def test_date_locale2(self): # Test %x directive loc = locale.getlocale(locale.LC_TIME)[0] @@ -562,11 +560,11 @@ def test_date_locale2(self): # norwegian, nynorsk. # * Hours are in 12-hour notation without AM/PM indication: hy_AM, # ms_MY, sm_WS. - # BUG: Generates regexp that does not match the current time for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'aa_ET', 'am_ET', 'az_IR', 'byn_ER', 'fa_IR', 'gez_ET', 'my_MM', 'om_ET', 'or_IN', 'shn_MM', 'sid_ET', 'so_SO', - 'ti_ET', 'tig_ER', 'wal_ET') + 'ti_ET', 'tig_ER', 'wal_ET', 'lzh_TW', + 'ar_SA', 'bg_BG') def test_time_locale(self): # Test %X directive loc = locale.getlocale(locale.LC_TIME)[0] diff --git a/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst b/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst new file mode 100644 index 00000000000000..ba2fae49fdc933 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst @@ -0,0 +1,2 @@ +Fix :func:`time.strptime` for ``%c`` and ``%x`` formats on locales byn_ER, +wal_ET and lzh_TW, and for ``%X`` format on locales ar_SA, bg_BG and lzh_TW. From e23518fa96583d0190d457adb807b19545df26cf Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 27 Jun 2025 11:01:51 +0200 Subject: [PATCH 703/989] gh-136017: avoid decref in rich compare for bool objects (#136018) --- Objects/object.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Objects/object.c b/Objects/object.c index 4d60128b092c22..1223983753ac46 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1131,11 +1131,14 @@ PyObject_RichCompareBool(PyObject *v, PyObject *w, int op) res = PyObject_RichCompare(v, w, op); if (res == NULL) return -1; - if (PyBool_Check(res)) + if (PyBool_Check(res)) { ok = (res == Py_True); - else + assert(_Py_IsImmortal(res)); + } + else { ok = PyObject_IsTrue(res); - Py_DECREF(res); + Py_DECREF(res); + } return ok; } From f3aec60d7a01c5f085a3ef2d6670d46b42b8ddd3 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 27 Jun 2025 13:00:25 +0300 Subject: [PATCH 704/989] gh-128051: Fix tests if sys.float_repr_style is 'legacy' (#135908) Co-authored-by: Victor Stinner --- Lib/difflib.py | 4 ++-- Lib/test/test_builtin.py | 3 ++- Lib/test/test_configparser.py | 12 ++++++------ Lib/test/test_ctypes/test_parameters.py | 4 +++- Lib/test/test_enum.py | 2 +- Lib/test/test_float.py | 2 ++ Lib/test/test_format.py | 10 +++++----- Lib/test/test_fstring.py | 8 ++++---- Lib/test/test_json/test_tool.py | 2 +- Lib/test/test_optparse.py | 4 ++-- Lib/test/test_peepholer.py | 6 +++--- Lib/test/test_pprint.py | 2 +- Lib/test/test_reprlib.py | 20 ++++++++++---------- Lib/test/test_statistics.py | 3 ++- Lib/test/test_str.py | 8 ++++---- Lib/test/test_types.py | 4 ++-- 16 files changed, 50 insertions(+), 44 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 18801a9b19eb9d..487936dbf47cdc 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -78,8 +78,8 @@ class SequenceMatcher: sequences. As a rule of thumb, a .ratio() value over 0.6 means the sequences are close matches: - >>> print(round(s.ratio(), 3)) - 0.866 + >>> print(round(s.ratio(), 2)) + 0.87 >>> If you're only interested in where the sequences match, diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d221aa5e1d999f..14fe3355239615 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2991,7 +2991,8 @@ def test_namespace_order(self): def load_tests(loader, tests, pattern): from doctest import DocTestSuite - tests.addTest(DocTestSuite(builtins)) + if sys.float_repr_style == 'short': + tests.addTest(DocTestSuite(builtins)) return tests if __name__ == "__main__": diff --git a/Lib/test/test_configparser.py b/Lib/test/test_configparser.py index 23904d17d326d8..e7364e18742c16 100644 --- a/Lib/test/test_configparser.py +++ b/Lib/test/test_configparser.py @@ -986,12 +986,12 @@ def test_add_section_default(self): def test_defaults_keyword(self): """bpo-23835 fix for ConfigParser""" - cf = self.newconfig(defaults={1: 2.4}) - self.assertEqual(cf[self.default_section]['1'], '2.4') - self.assertAlmostEqual(cf[self.default_section].getfloat('1'), 2.4) - cf = self.newconfig(defaults={"A": 5.2}) - self.assertEqual(cf[self.default_section]['a'], '5.2') - self.assertAlmostEqual(cf[self.default_section].getfloat('a'), 5.2) + cf = self.newconfig(defaults={1: 2.5}) + self.assertEqual(cf[self.default_section]['1'], '2.5') + self.assertAlmostEqual(cf[self.default_section].getfloat('1'), 2.5) + cf = self.newconfig(defaults={"A": 5.25}) + self.assertEqual(cf[self.default_section]['a'], '5.25') + self.assertAlmostEqual(cf[self.default_section].getfloat('a'), 5.25) class ConfigParserTestCaseNoInterpolation(BasicTestCase, unittest.TestCase): diff --git a/Lib/test/test_ctypes/test_parameters.py b/Lib/test/test_ctypes/test_parameters.py index f89521cf8b3a67..46f8ff93efa915 100644 --- a/Lib/test/test_ctypes/test_parameters.py +++ b/Lib/test/test_ctypes/test_parameters.py @@ -1,3 +1,4 @@ +import sys import unittest import test.support from ctypes import (CDLL, PyDLL, ArgumentError, @@ -240,7 +241,8 @@ def test_parameter_repr(self): self.assertRegex(repr(c_ulonglong.from_param(20000)), r"^$") self.assertEqual(repr(c_float.from_param(1.5)), "") self.assertEqual(repr(c_double.from_param(1.5)), "") - self.assertEqual(repr(c_double.from_param(1e300)), "") + if sys.float_repr_style == 'short': + self.assertEqual(repr(c_double.from_param(1e300)), "") self.assertRegex(repr(c_longdouble.from_param(1.5)), r"^$") self.assertRegex(repr(c_char_p.from_param(b'hihi')), r"^$") self.assertRegex(repr(c_wchar_p.from_param('hihi')), r"^$") diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 221f9db7763764..bbc7630fa83f45 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -36,7 +36,7 @@ def load_tests(loader, tests, ignore): optionflags=doctest.ELLIPSIS|doctest.NORMALIZE_WHITESPACE, )) howto_tests = os.path.join(REPO_ROOT, 'Doc/howto/enum.rst') - if os.path.exists(howto_tests): + if os.path.exists(howto_tests) and sys.float_repr_style == 'short': tests.addTests(doctest.DocFileSuite( howto_tests, module_relative=False, diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 237d7b5d35edd7..00518abcb11b46 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -795,6 +795,8 @@ def test_format(self): self.assertRaises(ValueError, format, x, '.6,n') @support.requires_IEEE_754 + @unittest.skipUnless(sys.float_repr_style == 'short', + "applies only when using short float repr style") def test_format_testfile(self): with open(format_testfile, encoding="utf-8") as testfile: for line in testfile: diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index c7cc32e09490b2..1f626d87fa6c7a 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -346,12 +346,12 @@ def __bytes__(self): testcommon(b"%s", memoryview(b"abc"), b"abc") # %a will give the equivalent of # repr(some_obj).encode('ascii', 'backslashreplace') - testcommon(b"%a", 3.14, b"3.14") + testcommon(b"%a", 3.25, b"3.25") testcommon(b"%a", b"ghi", b"b'ghi'") testcommon(b"%a", "jkl", b"'jkl'") testcommon(b"%a", "\u0544", b"'\\u0544'") # %r is an alias for %a - testcommon(b"%r", 3.14, b"3.14") + testcommon(b"%r", 3.25, b"3.25") testcommon(b"%r", b"ghi", b"b'ghi'") testcommon(b"%r", "jkl", b"'jkl'") testcommon(b"%r", "\u0544", b"'\\u0544'") @@ -407,19 +407,19 @@ def test_non_ascii(self): self.assertEqual(format("abc", "\u2007<5"), "abc\u2007\u2007") self.assertEqual(format(123, "\u2007<5"), "123\u2007\u2007") - self.assertEqual(format(12.3, "\u2007<6"), "12.3\u2007\u2007") + self.assertEqual(format(12.5, "\u2007<6"), "12.5\u2007\u2007") self.assertEqual(format(0j, "\u2007<4"), "0j\u2007\u2007") self.assertEqual(format(1+2j, "\u2007<8"), "(1+2j)\u2007\u2007") self.assertEqual(format("abc", "\u2007>5"), "\u2007\u2007abc") self.assertEqual(format(123, "\u2007>5"), "\u2007\u2007123") - self.assertEqual(format(12.3, "\u2007>6"), "\u2007\u200712.3") + self.assertEqual(format(12.5, "\u2007>6"), "\u2007\u200712.5") self.assertEqual(format(1+2j, "\u2007>8"), "\u2007\u2007(1+2j)") self.assertEqual(format(0j, "\u2007>4"), "\u2007\u20070j") self.assertEqual(format("abc", "\u2007^5"), "\u2007abc\u2007") self.assertEqual(format(123, "\u2007^5"), "\u2007123\u2007") - self.assertEqual(format(12.3, "\u2007^6"), "\u200712.3\u2007") + self.assertEqual(format(12.5, "\u2007^6"), "\u200712.5\u2007") self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007") self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007") diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index f54557056784f2..58a30c8e6ac447 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1336,9 +1336,9 @@ def test_equal_equal(self): def test_conversions(self): self.assertEqual(f'{3.14:10.10}', ' 3.14') - self.assertEqual(f'{3.14!s:10.10}', '3.14 ') - self.assertEqual(f'{3.14!r:10.10}', '3.14 ') - self.assertEqual(f'{3.14!a:10.10}', '3.14 ') + self.assertEqual(f'{1.25!s:10.10}', '1.25 ') + self.assertEqual(f'{1.25!r:10.10}', '1.25 ') + self.assertEqual(f'{1.25!a:10.10}', '1.25 ') self.assertEqual(f'{"a"}', 'a') self.assertEqual(f'{"a"!r}', "'a'") @@ -1347,7 +1347,7 @@ def test_conversions(self): # Conversions can have trailing whitespace after them since it # does not provide any significance self.assertEqual(f"{3!s }", "3") - self.assertEqual(f'{3.14!s :10.10}', '3.14 ') + self.assertEqual(f'{1.25!s :10.10}', '1.25 ') # Not a conversion. self.assertEqual(f'{"a!r"}', "a!r") diff --git a/Lib/test/test_json/test_tool.py b/Lib/test/test_json/test_tool.py index 9ea2679c77ec17..30f9bb3331605c 100644 --- a/Lib/test/test_json/test_tool.py +++ b/Lib/test/test_json/test_tool.py @@ -270,7 +270,7 @@ def test_colors(self): (r'" \"foo\" "', f'{t.string}" \\"foo\\" "{t.reset}'), ('"α"', f'{t.string}"\\u03b1"{t.reset}'), ('123', f'{t.number}123{t.reset}'), - ('-1.2345e+23', f'{t.number}-1.2345e+23{t.reset}'), + ('-1.25e+23', f'{t.number}-1.25e+23{t.reset}'), (r'{"\\": ""}', f'''\ {ob} diff --git a/Lib/test/test_optparse.py b/Lib/test/test_optparse.py index e6ffd2b0ffeb0e..e476e4727803e5 100644 --- a/Lib/test/test_optparse.py +++ b/Lib/test/test_optparse.py @@ -615,9 +615,9 @@ def test_float_default(self): self.parser.add_option( "-p", "--prob", help="blow up with probability PROB [default: %default]") - self.parser.set_defaults(prob=0.43) + self.parser.set_defaults(prob=0.25) expected_help = self.help_prefix + \ - " -p PROB, --prob=PROB blow up with probability PROB [default: 0.43]\n" + " -p PROB, --prob=PROB blow up with probability PROB [default: 0.25]\n" self.assertHelp(self.parser, expected_help) def test_alt_expand(self): diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index ef596630b930f7..3d7300e1480256 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -718,9 +718,9 @@ def format(fmt, *values): self.assertEqual(format('x = %d!', 1234), 'x = 1234!') self.assertEqual(format('x = %x!', 1234), 'x = 4d2!') self.assertEqual(format('x = %f!', 1234), 'x = 1234.000000!') - self.assertEqual(format('x = %s!', 1234.5678901), 'x = 1234.5678901!') - self.assertEqual(format('x = %f!', 1234.5678901), 'x = 1234.567890!') - self.assertEqual(format('x = %d!', 1234.5678901), 'x = 1234!') + self.assertEqual(format('x = %s!', 1234.0000625), 'x = 1234.0000625!') + self.assertEqual(format('x = %f!', 1234.0000625), 'x = 1234.000063!') + self.assertEqual(format('x = %d!', 1234.0000625), 'x = 1234!') self.assertEqual(format('x = %s%% %%%%', 1234), 'x = 1234% %%') self.assertEqual(format('x = %s!', '%% %s'), 'x = %% %s!') self.assertEqual(format('x = %s, y = %d', 12, 34), 'x = 12, y = 34') diff --git a/Lib/test/test_pprint.py b/Lib/test/test_pprint.py index 0c84d3d3bfd17a..41c337ade7eca1 100644 --- a/Lib/test/test_pprint.py +++ b/Lib/test/test_pprint.py @@ -458,7 +458,7 @@ def __new__(cls, celsius_degrees): return super().__new__(Temperature, celsius_degrees) def __repr__(self): kelvin_degrees = self + 273.15 - return f"{kelvin_degrees}°K" + return f"{kelvin_degrees:.2f}°K" self.assertEqual(pprint.pformat(Temperature(1000)), '1273.15°K') def test_sorted_dict(self): diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index d5631efcdb75b7..22a55b57c076eb 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -397,20 +397,20 @@ def test_valid_indent(self): 'object': { 1: 'two', b'three': [ - (4.5, 6.7), + (4.5, 6.25), [set((8, 9)), frozenset((10, 11))], ], }, 'tests': ( (dict(indent=None), '''\ - {1: 'two', b'three': [(4.5, 6.7), [{8, 9}, frozenset({10, 11})]]}'''), + {1: 'two', b'three': [(4.5, 6.25), [{8, 9}, frozenset({10, 11})]]}'''), (dict(indent=False), '''\ { 1: 'two', b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -430,7 +430,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -450,7 +450,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -470,7 +470,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -490,7 +490,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -518,7 +518,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -538,7 +538,7 @@ def test_valid_indent(self): -->b'three': [ -->-->( -->-->-->4.5, - -->-->-->6.7, + -->-->-->6.25, -->-->), -->-->[ -->-->-->{ @@ -558,7 +558,7 @@ def test_valid_indent(self): ....b'three': [ ........( ............4.5, - ............6.7, + ............6.25, ........), ........[ ............{ diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 0dd619dd7c8ceb..8250b0aef09aec 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -3319,7 +3319,8 @@ def tearDown(self): def load_tests(loader, tests, ignore): """Used for doctest/unittest integration.""" tests.addTests(doctest.DocTestSuite()) - tests.addTests(doctest.DocTestSuite(statistics)) + if sys.float_repr_style == 'short': + tests.addTests(doctest.DocTestSuite(statistics)) return tests diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index d6a7bd0da59910..2584fbf72d3fa6 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1231,10 +1231,10 @@ def __repr__(self): self.assertEqual('{0:\x00^6}'.format(3), '\x00\x003\x00\x00\x00') self.assertEqual('{0:<6}'.format(3), '3 ') - self.assertEqual('{0:\x00<6}'.format(3.14), '3.14\x00\x00') - self.assertEqual('{0:\x01<6}'.format(3.14), '3.14\x01\x01') - self.assertEqual('{0:\x00^6}'.format(3.14), '\x003.14\x00') - self.assertEqual('{0:^6}'.format(3.14), ' 3.14 ') + self.assertEqual('{0:\x00<6}'.format(3.25), '3.25\x00\x00') + self.assertEqual('{0:\x01<6}'.format(3.25), '3.25\x01\x01') + self.assertEqual('{0:\x00^6}'.format(3.25), '\x003.25\x00') + self.assertEqual('{0:^6}'.format(3.25), ' 3.25 ') self.assertEqual('{0:\x00<12}'.format(3+2.0j), '(3+2j)\x00\x00\x00\x00\x00\x00') self.assertEqual('{0:\x01<12}'.format(3+2.0j), '(3+2j)\x01\x01\x01\x01\x01\x01') diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index a117413301bebe..02592ea5eb21a1 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -517,8 +517,8 @@ def test(f, format_spec, result): # and a number after the decimal. This is tricky, because # a totally empty format specifier means something else. # So, just use a sign flag - test(1e200, '+g', '+1e+200') - test(1e200, '+', '+1e+200') + test(1.25e200, '+g', '+1.25e+200') + test(1.25e200, '+', '+1.25e+200') test(1.1e200, '+g', '+1.1e+200') test(1.1e200, '+', '+1.1e+200') From c45f4f3ebe34529a8db3a7918e8dd2e9f7ce8e86 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 14:35:55 +0300 Subject: [PATCH 705/989] gh-78465: Fix error message for cls.__new__(cls, ...) where cls is not instantiable (GH-135981) Previous error message suggested to use cls.__new__(), which obviously does not work. Now the error message is the same as for cls(...). --- Lib/test/support/__init__.py | 1 + Lib/test/test_sys.py | 7 +------ Lib/test/test_types.py | 5 ++--- .../2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst | 2 ++ Objects/typeobject.c | 5 +++++ 5 files changed, 11 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 51c0ce11e8269d..fd39d3f7c95368 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2333,6 +2333,7 @@ def check_disallow_instantiation(testcase, tp, *args, **kwds): qualname = f"{name}" msg = f"cannot create '{re.escape(qualname)}' instances" testcase.assertRaisesRegex(TypeError, msg, tp, *args, **kwds) + testcase.assertRaisesRegex(TypeError, msg, tp.__new__, tp, *args, **kwds) def get_recursion_depth(): """Get the recursion depth of the caller function. diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 27524d86355b9c..486bf10a0b5647 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -869,12 +869,7 @@ def test_sys_flags(self): def assert_raise_on_new_sys_type(self, sys_attr): # Users are intentionally prevented from creating new instances of # sys.flags, sys.version_info, and sys.getwindowsversion. - arg = sys_attr - attr_type = type(sys_attr) - with self.assertRaises(TypeError): - attr_type(arg) - with self.assertRaises(TypeError): - attr_type.__new__(attr_type, arg) + support.check_disallow_instantiation(self, type(sys_attr), sys_attr) def test_sys_flags_no_instantiation(self): self.assert_raise_on_new_sys_type(sys.flags) diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 02592ea5eb21a1..fc26e71ffcb67b 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -2,7 +2,7 @@ from test.support import ( run_with_locale, cpython_only, no_rerun, - MISSING_C_DOCSTRINGS, EqualToForwardRef, + MISSING_C_DOCSTRINGS, EqualToForwardRef, check_disallow_instantiation, ) from test.support.script_helper import assert_python_ok from test.support.import_helper import import_fresh_module @@ -1148,8 +1148,7 @@ def test_or_type_operator_reference_cycle(self): msg='Check for union reference leak.') def test_instantiation(self): - with self.assertRaises(TypeError): - types.UnionType() + check_disallow_instantiation(self, types.UnionType) self.assertIs(int, types.UnionType[int]) self.assertIs(int, types.UnionType[int, int]) self.assertEqual(int | str, types.UnionType[int, str]) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst new file mode 100644 index 00000000000000..99734d63c5d87e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst @@ -0,0 +1,2 @@ +Fix error message for ``cls.__new__(cls, ...)`` where ``cls`` is not +instantiable builtin or extension type (with ``tp_new`` set to ``NULL``). diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b9d549610693c1..6e7471cb5941a7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -10020,6 +10020,11 @@ tp_new_wrapper(PyObject *self, PyObject *args, PyObject *kwds) /* If staticbase is NULL now, it is a really weird type. In the spirit of backwards compatibility (?), just shut up. */ if (staticbase && staticbase->tp_new != type->tp_new) { + if (staticbase->tp_new == NULL) { + PyErr_Format(PyExc_TypeError, + "cannot create '%s' instances", subtype->tp_name); + return NULL; + } PyErr_Format(PyExc_TypeError, "%s.__new__(%s) is not safe, use %s.__new__()", type->tp_name, From 695ab61351c019a7fcd731eebd77c172c90bf9e0 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 27 Jun 2025 19:37:44 +0800 Subject: [PATCH 706/989] gh-132732: Automatically constant evaluate pure operations (GH-132733) This adds a "macro" to the optimizer DSL called "REPLACE_OPCODE_IF_EVALUATES_PURE", which allows automatically constant evaluating a bytecode body if certain inputs have no side effects upon evaluations (such as ints, strings, and floats). Co-authored-by: Tomas R. --- Include/internal/pycore_optimizer.h | 5 +- Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/test_generated_cases.py | 197 ++++++++++++ ...-04-19-16-22-47.gh-issue-132732.jgqhlF.rst | 1 + Python/bytecodes.c | 2 +- Python/optimizer_analysis.c | 7 + Python/optimizer_bytecodes.c | 77 +---- Python/optimizer_cases.c.h | 304 ++++++++++++++---- Python/optimizer_symbols.c | 39 +++ Tools/cases_generator/optimizer_generator.py | 194 ++++++++++- 10 files changed, 706 insertions(+), 122 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 576c27947824b4..8b7f12bf03d624 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_typedefs.h" // _PyInterpreterFrame #include "pycore_uop_ids.h" -#include "pycore_stackref.h" +#include "pycore_stackref.h" // _PyStackRef #include @@ -316,6 +316,9 @@ extern JitOptRef _Py_uop_sym_new_type( JitOptContext *ctx, PyTypeObject *typ); extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); +extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val); +bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym); +_PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym); extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx); extern bool _Py_uop_sym_has_type(JitOptRef sym); extern bool _Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ); diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 52cbc2fffe484e..ff7e800aa9bb1a 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -106,7 +106,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 9e0fd1218f2534..eb01328b6ea946 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -2224,5 +2224,202 @@ def test_validate_uop_unused_size_mismatch(self): "Inputs must have equal sizes"): self.run_cases_test(input, input2, output) + def test_pure_uop_body_copied_in(self): + # Note: any non-escaping call works. + # In this case, we use PyStackRef_IsNone. + input = """ + pure op(OP, (foo -- res)) { + res = PyStackRef_IsNone(foo); + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_known(ctx, foo); + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + res_stackref = PyStackRef_IsNone(foo); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = sym_new_known(ctx, foo); + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_pure_uop_body_copied_in_deopt(self): + # Note: any non-escaping call works. + # In this case, we use PyStackRef_IsNone. + input = """ + pure op(OP, (foo -- res)) { + DEOPT_IF(PyStackRef_IsNull(foo)); + res = foo; + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = foo; + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (PyStackRef_IsNull(foo)) { + ctx->done = true; + break; + } + res_stackref = foo; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = foo; + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_pure_uop_body_copied_in_error_if(self): + # Note: any non-escaping call works. + # In this case, we use PyStackRef_IsNone. + input = """ + pure op(OP, (foo -- res)) { + ERROR_IF(PyStackRef_IsNull(foo)); + res = foo; + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = foo; + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (PyStackRef_IsNull(foo)) { + goto error; + } + res_stackref = foo; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = foo; + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + + def test_replace_opcode_uop_body_copied_in_complex(self): + input = """ + pure op(OP, (foo -- res)) { + if (foo) { + res = PyStackRef_IsNone(foo); + } + else { + res = 1; + } + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_known(ctx, foo); + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (foo) { + res_stackref = PyStackRef_IsNone(foo); + } + else { + res_stackref = 1; + } + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = sym_new_known(ctx, foo); + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_replace_opocode_uop_reject_array_effects(self): + input = """ + pure op(OP, (foo[2] -- res)) { + if (foo) { + res = PyStackRef_IsNone(foo); + } + else { + res = 1; + } + } + """ + input2 = """ + op(OP, (foo[2] -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_unknown(ctx); + } + """ + output = """ + """ + with self.assertRaisesRegex(SyntaxError, + "Pure evaluation cannot take array-like inputs"): + self.run_cases_test(input, input2, output) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst new file mode 100644 index 00000000000000..aadaf2169fd01a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst @@ -0,0 +1 @@ +Automatically constant evaluate bytecode operations marked as pure in the JIT optimizer. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 535e552e047475..1a5a9ff13a23a5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -850,7 +850,7 @@ dummy_func( DEOPT_IF(!res); } - pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { + op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 145a8c118d3612..fab6fef5ccda10 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -26,6 +26,8 @@ #include "pycore_function.h" #include "pycore_uop_ids.h" #include "pycore_range.h" +#include "pycore_unicodeobject.h" +#include "pycore_ceval.h" #include #include @@ -321,7 +323,10 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, /* Shortened forms for convenience, used in optimizer_bytecodes.c */ #define sym_is_not_null _Py_uop_sym_is_not_null #define sym_is_const _Py_uop_sym_is_const +#define sym_is_safe_const _Py_uop_sym_is_safe_const #define sym_get_const _Py_uop_sym_get_const +#define sym_new_const_steal _Py_uop_sym_new_const_steal +#define sym_get_const_as_stackref _Py_uop_sym_get_const_as_stackref #define sym_new_unknown _Py_uop_sym_new_unknown #define sym_new_not_null _Py_uop_sym_new_not_null #define sym_new_type _Py_uop_sym_new_type @@ -350,6 +355,8 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define JUMP_TO_LABEL(label) goto label; + static int optimize_to_bool( _PyUOpInstruction *this_instr, diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f8a0484bdc2b04..3182e8b3b70144 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -181,6 +181,7 @@ dummy_func(void) { } op(_BINARY_OP, (lhs, rhs -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs); bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); @@ -235,35 +236,23 @@ dummy_func(void) { } op(_BINARY_OP_ADD_INT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_compact_int(ctx); } op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_compact_int(ctx); } op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_compact_int(ctx); } op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) + - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); // TODO (gh-134584): Refactor this to use another uop if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); @@ -271,23 +260,8 @@ dummy_func(void) { } op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) - - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); // TODO (gh-134584): Refactor this to use another uop if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); @@ -295,23 +269,8 @@ dummy_func(void) { } op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) * - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); // TODO (gh-134584): Refactor this to use another uop if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); @@ -319,19 +278,8 @@ dummy_func(void) { } op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); - assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyUnicode_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyUnicode_Type); } op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) { @@ -443,6 +391,7 @@ dummy_func(void) { } op(_UNARY_NOT, (value -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(value); sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 10767ccdbd57f5..8d30df3aa7d429 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -206,6 +206,21 @@ JitOptRef value; JitOptRef res; value = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, value) + ) { + JitOptRef value_sym = value; + _PyStackRef value = sym_get_const_as_stackref(ctx, value_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + assert(PyStackRef_BoolCheck(value)); + res_stackref = PyStackRef_IsFalse(value) + ? PyStackRef_True : PyStackRef_False; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); stack_pointer[-1] = res; @@ -391,7 +406,41 @@ } case _BINARY_OP_MULTIPLY_INT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; + } + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -400,7 +449,41 @@ } case _BINARY_OP_ADD_INT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; + } + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -409,7 +492,41 @@ } case _BINARY_OP_SUBTRACT_INT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; + } + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -443,29 +560,42 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) * - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval * + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + break; } + res = sym_new_type(ctx, &PyFloat_Type); if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -475,29 +605,42 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) + - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval + + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + break; } + res = sym_new_type(ctx, &PyFloat_Type); if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -507,29 +650,42 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) - - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval - + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + break; } + res = sym_new_type(ctx, &PyFloat_Type); if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -566,24 +722,39 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); - assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right)); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyUnicode_CheckExact(left_o)); + assert(PyUnicode_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + PyObject *res_o = PyUnicode_Concat(left_o, right_o); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + if (res_o == NULL) { goto error; } - res = sym_new_const(ctx, temp); + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyUnicode_Type); - stack_pointer += -1; + break; } - stack_pointer[-1] = res; + res = sym_new_type(ctx, &PyUnicode_Type); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -2539,6 +2710,31 @@ JitOptRef res; rhs = stack_pointer[-1]; lhs = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, lhs) && + sym_is_safe_const(ctx, rhs) + ) { + JitOptRef lhs_sym = lhs; + JitOptRef rhs_sym = rhs; + _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym); + _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); + PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); + assert(_PyEval_BinaryOps[oparg]); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + PyObject *res_o = _PyEval_BinaryOps[oparg](lhs_o, rhs_o); + if (res_o == NULL) { + JUMP_TO_LABEL(error); + } + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + break; + } bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index c3d9e0e778bf55..e4dbca8362f4ce 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -185,6 +185,35 @@ _Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef ref) return NULL; } +_PyStackRef +_Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) +{ + PyObject *const_val = _Py_uop_sym_get_const(ctx, sym); + if (const_val == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectBorrow(const_val); +} + +/* + Indicates whether the constant is safe to constant evaluate + (without side effects). + */ +bool +_Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) +{ + PyObject *const_val = _Py_uop_sym_get_const(ctx, sym); + if (const_val == NULL) { + return false; + } + PyTypeObject *typ = Py_TYPE(const_val); + return (typ == &PyLong_Type) || + (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &PyTuple_Type) || + (typ == &PyBool_Type); +} + void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) { @@ -467,6 +496,16 @@ _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val) return ref; } +JitOptRef +_Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val) +{ + assert(const_val != NULL); + JitOptRef res = _Py_uop_sym_new_const(ctx, const_val); + // Decref once because sym_new_const increfs it. + Py_DECREF(const_val); + return res; +} + JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx) { diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index 3b4fe64b02a807..4556b6d5a74f37 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -12,6 +12,8 @@ analyze_files, StackItem, analysis_error, + CodeSection, + Label, ) from generators_common import ( DEFAULT_INPUT, @@ -19,6 +21,7 @@ write_header, Emitter, TokenIterator, + always_true, ) from cwriter import CWriter from typing import TextIO @@ -75,6 +78,9 @@ def type_name(var: StackItem) -> str: return "JitOptRef *" return "JitOptRef " +def stackref_type_name(var: StackItem) -> str: + assert not var.is_array(), "Unsafe to convert a symbol to an array-like StackRef." + return "_PyStackRef " def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: variables = {"unused"} @@ -135,6 +141,12 @@ def emit_default(out: CWriter, uop: Uop, stack: Stack) -> None: class OptimizerEmitter(Emitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels) + self._replacers["REPLACE_OPCODE_IF_EVALUATES_PURE"] = self.replace_opcode_if_evaluates_pure + self.original_uop = original_uop + self.stack = stack + def emit_save(self, storage: Storage) -> None: storage.flush(self.out) @@ -145,6 +157,185 @@ def goto_label(self, goto: Token, label: Token, storage: Storage) -> None: self.out.emit(goto) self.out.emit(label) + def replace_opcode_if_evaluates_pure( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + assert isinstance(uop, Uop) + input_identifiers = [] + for token in tkn_iter: + if token.kind == "IDENTIFIER": + input_identifiers.append(token) + if token.kind == "SEMI": + break + + if len(input_identifiers) == 0: + raise analysis_error( + "To evaluate an operation as pure, it must have at least 1 input", + tkn + ) + # Check that the input identifiers belong to the uop's + # input stack effect + uop_stack_effect_input_identifers = {inp.name for inp in uop.stack.inputs} + for input_tkn in input_identifiers: + if input_tkn.text not in uop_stack_effect_input_identifers: + raise analysis_error(f"{input_tkn.text} referenced in " + f"REPLACE_OPCODE_IF_EVALUATES_PURE but does not " + f"exist in the base uop's input stack effects", + input_tkn) + input_identifiers_as_str = {tkn.text for tkn in input_identifiers} + used_stack_inputs = [inp for inp in uop.stack.inputs if inp.name in input_identifiers_as_str] + assert len(used_stack_inputs) > 0 + emitter = OptimizerConstantEmitter(self.out, {}, self.original_uop, self.stack.copy()) + emitter.emit("if (\n") + for inp in used_stack_inputs[:-1]: + emitter.emit(f"sym_is_safe_const(ctx, {inp.name}) &&\n") + emitter.emit(f"sym_is_safe_const(ctx, {used_stack_inputs[-1].name})\n") + emitter.emit(') {\n') + # Declare variables, before they are shadowed. + for inp in used_stack_inputs: + if inp.used: + emitter.emit(f"{type_name(inp)}{inp.name}_sym = {inp.name};\n") + # Shadow the symbolic variables with stackrefs. + for inp in used_stack_inputs: + if inp.is_array(): + raise analysis_error("Pure evaluation cannot take array-like inputs.", tkn) + if inp.used: + emitter.emit(f"{stackref_type_name(inp)}{inp.name} = sym_get_const_as_stackref(ctx, {inp.name}_sym);\n") + # Rename all output variables to stackref variant. + for outp in self.original_uop.stack.outputs: + if outp.is_array(): + raise analysis_error( + "Array output StackRefs not supported for evaluating pure ops.", + self.original_uop.body.open + ) + emitter.emit(f"_PyStackRef {outp.name}_stackref;\n") + + + storage = Storage.for_uop(self.stack, self.original_uop, CWriter.null(), check_liveness=False) + # No reference management of outputs needed. + for var in storage.outputs: + var.in_local = True + emitter.emit("/* Start of uop copied from bytecodes for constant evaluation */\n") + emitter.emit_tokens(self.original_uop, storage, inst=None, emit_braces=False) + self.out.start_line() + emitter.emit("/* End of uop copied from bytecodes for constant evaluation */\n") + # Finally, assign back the output stackrefs to symbolics. + for outp in self.original_uop.stack.outputs: + # All new stackrefs are created from new references. + # That's how the stackref contract works. + if not outp.peek: + emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n") + else: + emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n") + storage.flush(self.out) + emitter.emit("break;\n") + emitter.emit("}\n") + return True + +class OptimizerConstantEmitter(OptimizerEmitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels, original_uop, stack) + # Replace all outputs to point to their stackref versions. + overrides = { + outp.name: self.emit_stackref_override for outp in self.original_uop.stack.outputs + } + self._replacers = {**self._replacers, **overrides} + + def emit_to_with_replacement( + self, + out: CWriter, + tkn_iter: TokenIterator, + end: str, + uop: CodeSection, + storage: Storage, + inst: Instruction | None + ) -> Token: + parens = 0 + for tkn in tkn_iter: + if tkn.kind == end and parens == 0: + return tkn + if tkn.kind == "LPAREN": + parens += 1 + if tkn.kind == "RPAREN": + parens -= 1 + if tkn.text in self._replacers: + self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst) + else: + out.emit(tkn) + raise analysis_error(f"Expecting {end}. Reached end of file", tkn) + + def emit_stackref_override( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.emit(tkn) + self.out.emit("_stackref ") + return True + + def deopt_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.start_line() + self.out.emit("if (") + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.emit(") {\n") + next(tkn_iter) # Semi colon + # We guarantee this will deopt in real-world code + # via constants analysis. So just bail. + self.emit("ctx->done = true;\n") + self.emit("break;\n") + self.emit("}\n") + return not always_true(first_tkn) + + exit_if = deopt_if + + def error_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + unconditional = always_true(first_tkn) + if unconditional: + next(tkn_iter) + next(tkn_iter) # RPAREN + self.out.start_line() + else: + self.out.emit_at("if ", tkn) + self.emit(lparen) + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.out.emit(") {\n") + next(tkn_iter) # Semi colon + storage.clear_inputs("at ERROR_IF") + + self.out.emit("goto error;\n") + if not unconditional: + self.out.emit("}\n") + return not unconditional + + def write_uop( override: Uop | None, uop: Uop, @@ -175,13 +366,14 @@ def write_uop( cast = f"uint{cache.size*16}_t" out.emit(f"{type}{cache.name} = ({cast})this_instr->operand0;\n") if override: - emitter = OptimizerEmitter(out, {}) + emitter = OptimizerEmitter(out, {}, uop, stack.copy()) # No reference management of inputs needed. for var in storage.inputs: # type: ignore[possibly-undefined] var.in_local = False _, storage = emitter.emit_tokens(override, storage, None, False) out.start_line() storage.flush(out) + out.start_line() else: emit_default(out, uop, stack) out.start_line() From de0d014815667982c683adb2b2cc16ae2bfb3c82 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 27 Jun 2025 16:23:33 +0300 Subject: [PATCH 707/989] gh-92266: Replace tabs with four spaces in Python files (#135983) --- .pre-commit-config.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 822a8a9f4e5076..86410c46d1d707 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,13 @@ repos: name: Run Black on Tools/jit/ files: ^Tools/jit/ + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.5 + hooks: + - id: remove-tabs + types: [python] + exclude: ^Tools/c-analyzer/cpython/_parser.py + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: From 731f5b8ab3970e344bfbc4ff86df767a0795f0fc Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 16:47:03 +0300 Subject: [PATCH 708/989] =?UTF-8?q?gh-136028:=20Fix=20parsing=20month=20na?= =?UTF-8?q?mes=20containing=20"=C4=B0"=20(U+0130)=20in=20strptime()=20(GH-?= =?UTF-8?q?136029)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This affects locales az_AZ, ber_DZ, ber_MA and crh_UA. --- Lib/_strptime.py | 12 ++++++++++-- Lib/test/test_strptime.py | 9 +++++++++ .../2025-06-27-13-34-28.gh-issue-136028.RY727g.rst | 3 +++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 7ac6f36360cb69..cdc55e8daaffa6 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -42,6 +42,14 @@ def _findall(haystack, needle): yield i i += len(needle) +def _fixmonths(months): + yield from months + # The lower case of 'İ' ('\u0130') is 'i\u0307'. + # The re module only supports 1-to-1 character matching in + # case-insensitive mode. + for s in months: + if 'i\u0307' in s: + yield s.replace('i\u0307', '\u0130') lzh_TW_alt_digits = ( # 〇:一:二:三:四:五:六:七:八:九 @@ -366,8 +374,8 @@ def __init__(self, locale_time=None): 'z': r"(?P([+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?)|(?-i:Z))?", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), - 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), - 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), + 'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'), + 'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'), 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index e52c46f8c58cce..0241e543cd7dde 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -337,6 +337,15 @@ def test_month_locale(self): self.roundtrip('%B', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) self.roundtrip('%b', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) + @run_with_locales('LC_TIME', 'az_AZ', 'ber_DZ', 'ber_MA', 'crh_UA') + def test_month_locale2(self): + # Test for month directives + # Month name contains 'İ' ('\u0130') + self.roundtrip('%B', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0)) + self.roundtrip('%b', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0)) + self.roundtrip('%B', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0)) + self.roundtrip('%b', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0)) + def test_day(self): # Test for day directives self.roundtrip('%d %Y', 2) diff --git a/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst b/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst new file mode 100644 index 00000000000000..9859df7cf6a69c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst @@ -0,0 +1,3 @@ +Fix parsing month names containing "İ" (U+0130, LATIN CAPITAL LETTER I WITH +DOT ABOVE) in :func:`time.strptime`. This affects locales az_AZ, ber_DZ, +ber_MA and crh_UA. From 065194c1a971b59547f1bb2cc64760c4bf0ee674 Mon Sep 17 00:00:00 2001 From: Will Childs-Klein Date: Fri, 27 Jun 2025 11:01:16 -0400 Subject: [PATCH 709/989] gh-135571: Guard `_hashlib` usage in `test_hashlib.py` (#135572) --- Lib/test/test_hashlib.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 7b378c45e71563..5bad483ae9dafc 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -279,7 +279,10 @@ def test_clinic_signature(self): with self.assertWarnsRegex(DeprecationWarning, DEPRECATED_STRING_PARAMETER): hashlib.new(digest_name, string=b'') - if self._hashlib: + # Make sure that _hashlib contains the constructor + # to test when using a combination of libcrypto and + # interned hash implementations. + if self._hashlib and digest_name in self._hashlib._constructors: self._hashlib.new(digest_name, b'') self._hashlib.new(digest_name, data=b'') with self.assertWarnsRegex(DeprecationWarning, @@ -333,7 +336,8 @@ def test_clinic_signature_errors(self): with self.subTest(digest_name, args=args, kwds=kwds): with self.assertRaisesRegex(TypeError, errmsg): hashlib.new(digest_name, *args, **kwds) - if self._hashlib: + if (self._hashlib and + digest_name in self._hashlib._constructors): with self.assertRaisesRegex(TypeError, errmsg): self._hashlib.new(digest_name, *args, **kwds) From 1e975aee28924afbd956183918cef278e09ce8f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Jun 2025 17:12:21 +0200 Subject: [PATCH 710/989] gh-135755: rename undocumented `HACL_CAN_COMPILE_SIMD{128,256}` macros (#135847) Rename undocumented `HACL_CAN_COMPILE_SIMD{128,256}` macros to `_Py_HACL_CAN_COMPILE_VEC{128,256}`. These macros are private. --- Modules/blake2module.c | 66 +++++++++++++++++++------------------- Modules/hmacmodule.c | 21 ++++++------ PCbuild/pythoncore.vcxproj | 8 +++-- configure | 4 +-- configure.ac | 6 ++-- pyconfig.h.in | 12 +++---- 6 files changed, 62 insertions(+), 55 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 9e279e11b518d2..163f238a4268d0 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -43,25 +43,25 @@ // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't // great; but when compiling a universal2 binary, autoconf will set -// HACL_CAN_COMPILE_SIMD128 and HACL_CAN_COMPILE_SIMD256 because they *can* be -// compiled on x86_64. If we're on macOS ARM64, disable these preprocessor -// symbols. +// _Py_HACL_CAN_COMPILE_VEC{128,256} because they *can* be compiled on x86_64. +// If we're on macOS ARM64, we however disable these preprocessor symbols. #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2s.h" #include "_hacl/Hacl_Hash_Blake2b.h" -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 #include "_hacl/Hacl_Hash_Blake2s_Simd128.h" #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2b_Simd256.h" #endif @@ -88,7 +88,7 @@ blake2_get_state(PyObject *module) return (Blake2State *)state; } -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) static inline Blake2State * blake2_get_state_from_type(PyTypeObject *module) { @@ -181,7 +181,7 @@ blake2module_init_cpu_features(Blake2State *state) #undef ECX_SSE3 #undef EBX_AVX2 -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else @@ -191,7 +191,7 @@ blake2module_init_cpu_features(Blake2State *state) state->can_run_simd128 = false; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else @@ -332,18 +332,18 @@ is_blake2s(blake2_impl impl) static inline blake2_impl type_to_impl(PyTypeObject *type) { -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) Blake2State *st = blake2_get_state_from_type(type); #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 return st->can_run_simd256 ? Blake2b_256 : Blake2b; #else return Blake2b; #endif } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 return st->can_run_simd128 ? Blake2s_128 : Blake2s; #else return Blake2s; @@ -357,10 +357,10 @@ typedef struct { union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; #endif }; @@ -429,13 +429,13 @@ blake2_update_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len) switch (self->impl) { // blake2b_256_state and blake2s_128_state must be if'd since // otherwise this results in an unresolved symbol at link-time. -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update, self->blake2b_256_state, buf, len); return; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update, self->blake2s_128_state, buf, len); @@ -555,12 +555,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, // Ensure that the states are NULL-initialized in case of an error. // See: py_blake2_clear() for more details. switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: self->blake2b_256_state = NULL; break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: self->blake2s_128_state = NULL; break; @@ -623,12 +623,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, } while (0) switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: BLAKE2_MALLOC(Blake2b_Simd256, self->blake2b_256_state); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: BLAKE2_MALLOC(Blake2s_Simd128, self->blake2s_128_state); break; @@ -756,12 +756,12 @@ blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy) } while (0) switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: BLAKE2_COPY(Blake2b_Simd256, blake2b_256_state); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: BLAKE2_COPY(Blake2s_Simd128, blake2s_128_state); break; @@ -840,12 +840,12 @@ static uint8_t blake2_blake2b_compute_digest(Blake2Object *self, uint8_t *digest) { switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: return Hacl_Hash_Blake2b_Simd256_digest( self->blake2b_256_state, digest); #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: return Hacl_Hash_Blake2s_Simd128_digest( self->blake2s_128_state, digest); @@ -923,11 +923,11 @@ static Hacl_Hash_Blake2b_index hacl_get_blake2_info(Blake2Object *self) { switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: return Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state); #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: return Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state); #endif @@ -975,12 +975,12 @@ py_blake2_clear(PyObject *op) } while (0) switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: BLAKE2_FREE(Blake2b_Simd256, self->blake2b_256_state); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: BLAKE2_FREE(Blake2s_Simd128, self->blake2s_128_state); break; diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index e7a5ccbb19b45c..95e400231bb65c 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -31,14 +31,15 @@ #endif #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_HMAC.h" #include "_hacl/Hacl_Streaming_HMAC.h" // Hacl_Agile_Hash_* identifiers @@ -361,7 +362,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) { switch (kind) { case Py_hmac_kind_hmac_blake2s_32: { -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 if (state->can_run_simd128) { return Py_hmac_kind_hmac_vectorized_blake2s_32; } @@ -369,7 +370,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) return kind; } case Py_hmac_kind_hmac_blake2b_32: { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 if (state->can_run_simd256) { return Py_hmac_kind_hmac_vectorized_blake2b_32; } @@ -1601,7 +1602,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) #undef ECX_SSE3 #undef EBX_AVX2 -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else @@ -1611,7 +1612,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) state->can_run_simd128 = false; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 32a8f2dbad3d5e..b911c9385634d7 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -419,8 +419,12 @@ - HACL_CAN_COMPILE_SIMD128;%(PreprocessorDefinitions) - HACL_CAN_COMPILE_SIMD256;%(PreprocessorDefinitions) + + _Py_HACL_CAN_COMPILE_VEC128;%(PreprocessorDefinitions) + + + _Py_HACL_CAN_COMPILE_VEC256;%(PreprocessorDefinitions) + diff --git a/configure b/configure index 43b36d9231e341..75ae1699a8e451 100755 --- a/configure +++ b/configure @@ -32633,7 +32633,7 @@ then : LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD128 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC128 1" >>confdefs.h # macOS universal2 builds *support* the -msse etc flags because they're @@ -32709,7 +32709,7 @@ then : LIBHACL_SIMD256_FLAGS="-mavx2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD256 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h # macOS universal2 builds *support* the -mavx2 compiler flag because it's diff --git a/configure.ac b/configure.ac index e77696e3a4e025..4da1ba78b54b0d 100644 --- a/configure.ac +++ b/configure.ac @@ -8026,7 +8026,8 @@ then AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[ [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD128], [1], [HACL* library can compile SIMD128 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC128], [1], [ + HACL* library can compile SIMD128 implementations]) # macOS universal2 builds *support* the -msse etc flags because they're # available on x86_64. However, performance of the HACL SIMD128 implementation @@ -8057,7 +8058,8 @@ if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ then AX_CHECK_COMPILE_FLAG([-mavx2],[ [LIBHACL_SIMD256_FLAGS="-mavx2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD256], [1], [HACL* library can compile SIMD256 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC256], [1], [ + HACL* library can compile SIMD256 implementations]) # macOS universal2 builds *support* the -mavx2 compiler flag because it's # available on x86_64; but the HACL SIMD256 build then fails because the diff --git a/pyconfig.h.in b/pyconfig.h.in index d4f1da7fb10776..d7c496fccc682c 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -50,12 +50,6 @@ /* Define if getpgrp() must be called as getpgrp(0). */ #undef GETPGRP_HAVE_ARG -/* HACL* library can compile SIMD128 implementations */ -#undef HACL_CAN_COMPILE_SIMD128 - -/* HACL* library can compile SIMD256 implementations */ -#undef HACL_CAN_COMPILE_SIMD256 - /* Define if you have the 'accept' function. */ #undef HAVE_ACCEPT @@ -2026,6 +2020,12 @@ /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX +/* HACL* library can compile SIMD128 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC128 + +/* HACL* library can compile SIMD256 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC256 + /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT From 0141e7f9e66e5e5ac3949fb018dd6811cd630dca Mon Sep 17 00:00:00 2001 From: Lee Dogeon Date: Sat, 28 Jun 2025 00:15:11 +0900 Subject: [PATCH 711/989] gh-108765: fix comment about macro definitions in `_stat.c` post GH-108854 (#136027) --- Modules/_stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_stat.c b/Modules/_stat.c index f11ca7d23b440d..1dabf2f6d5b07f 100644 --- a/Modules/_stat.c +++ b/Modules/_stat.c @@ -57,7 +57,7 @@ typedef unsigned short mode_t; * Only the names are defined by POSIX but not their value. All common file * types seems to have the same numeric value on all platforms, though. * - * pyport.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK + * fileutils.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK */ #ifndef S_IFBLK @@ -86,7 +86,7 @@ typedef unsigned short mode_t; /* S_ISXXX() - * pyport.h defines S_ISDIR(), S_ISREG() and S_ISCHR() + * fileutils.h defines S_ISDIR(), S_ISREG() and S_ISCHR() */ #ifndef S_ISBLK From 0e5d09613094f2331a6b1cdb83f98998702d4469 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 27 Jun 2025 08:20:51 -0700 Subject: [PATCH 712/989] GH-135904: Optimize the JIT's assembly control flow (GH-135905) --- ...-06-24-16-46-34.gh-issue-135904.78xfon.rst | 2 + Tools/jit/_optimizers.py | 319 ++++++++++++++++++ Tools/jit/_stencils.py | 67 +--- Tools/jit/_targets.py | 58 ++-- 4 files changed, 352 insertions(+), 94 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst create mode 100644 Tools/jit/_optimizers.py diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst new file mode 100644 index 00000000000000..ecbd8fda9a5e9d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst @@ -0,0 +1,2 @@ +Perform more aggressive control-flow optimizations on the machine code +templates emitted by the experimental JIT compiler. diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py new file mode 100644 index 00000000000000..1077e4106fdfbd --- /dev/null +++ b/Tools/jit/_optimizers.py @@ -0,0 +1,319 @@ +"""Low-level optimization of textual assembly.""" + +import dataclasses +import pathlib +import re +import typing + +# Same as saying "not string.startswith('')": +_RE_NEVER_MATCH = re.compile(r"(?!)") +# Dictionary mapping branch instructions to their inverted branch instructions. +# If a branch cannot be inverted, the value is None: +_X86_BRANCHES = { + # https://www.felixcloutier.com/x86/jcc + "ja": "jna", + "jae": "jnae", + "jb": "jnb", + "jbe": "jnbe", + "jc": "jnc", + "jcxz": None, + "je": "jne", + "jecxz": None, + "jg": "jng", + "jge": "jnge", + "jl": "jnl", + "jle": "jnle", + "jo": "jno", + "jp": "jnp", + "jpe": "jpo", + "jrcxz": None, + "js": "jns", + "jz": "jnz", + # https://www.felixcloutier.com/x86/loop:loopcc + "loop": None, + "loope": None, + "loopne": None, + "loopnz": None, + "loopz": None, +} +# Update with all of the inverted branches, too: +_X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v} + + +@dataclasses.dataclass +class _Block: + label: str | None = None + # Non-instruction lines like labels, directives, and comments: + noninstructions: list[str] = dataclasses.field(default_factory=list) + # Instruction lines: + instructions: list[str] = dataclasses.field(default_factory=list) + # If this block ends in a jump, where to? + target: typing.Self | None = None + # The next block in the linked list: + link: typing.Self | None = None + # Whether control flow can fall through to the linked block above: + fallthrough: bool = True + # Whether this block can eventually reach the next uop (_JIT_CONTINUE): + hot: bool = False + + def resolve(self) -> typing.Self: + """Find the first non-empty block reachable from this one.""" + block = self + while block.link and not block.instructions: + block = block.link + return block + + +@dataclasses.dataclass +class Optimizer: + """Several passes of analysis and optimization for textual assembly.""" + + path: pathlib.Path + _: dataclasses.KW_ONLY + # prefix used to mangle symbols on some platforms: + prefix: str = "" + # The first block in the linked list: + _root: _Block = dataclasses.field(init=False, default_factory=_Block) + _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) + # No groups: + _re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile( + r"\s*(?:\.|#|//|$)" + ) + # One group (label): + _re_label: typing.ClassVar[re.Pattern[str]] = re.compile( + r'\s*(?P
"], "buffered text not properly split") + def test_change_character_data_handler_in_callback(self): + # Test that xmlparse_handler_setter() properly handles + # the special case "parser.CharacterDataHandler = None". + def handler(*args): + parser.CharacterDataHandler = None + + handler_wrapper = mock.Mock(wraps=handler) + parser = expat.ParserCreate() + parser.CharacterDataHandler = handler_wrapper + parser.Parse(b"12345 ", True) + handler_wrapper.assert_called_once() + self.assertIsNone(parser.CharacterDataHandler) + # Test handling of exception from callback: class HandlerExceptionTest(unittest.TestCase): @@ -595,7 +607,7 @@ def test_unchanged_size(self): def test_disabling_buffer(self): xml1 = b"" + b'a' * 512 xml2 = b'b' * 1024 - xml3 = b'c' * 1024 + b''; + xml3 = b'c' * 1024 + b'' parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_text = 1 diff --git a/Misc/NEWS.d/next/C_API/2025-06-19-12-47-18.gh-issue-133157.1WA85f.rst b/Misc/NEWS.d/next/C_API/2025-06-19-12-47-18.gh-issue-133157.1WA85f.rst new file mode 100644 index 00000000000000..1b37d884e57273 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-06-19-12-47-18.gh-issue-133157.1WA85f.rst @@ -0,0 +1 @@ +Remove the private, undocumented macro :c:macro:`!_Py_NO_SANITIZE_UNDEFINED`. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index fa153d86543e99..c449dd848d10c6 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -98,7 +98,11 @@ typedef struct { #define CHARACTER_DATA_BUFFER_SIZE 8192 -typedef const void *xmlhandler; +// A generic function type for storage. +// To avoid undefined behaviors, a handler must be cast to the correct +// function type before it's called; see SETTER_WRAPPER below. +typedef void (*xmlhandler)(void); + typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler); struct HandlerInfo { @@ -110,9 +114,7 @@ struct HandlerInfo { static struct HandlerInfo handler_info[64]; -// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED, rather than using the exact -// handler API for each handler. -static inline void _Py_NO_SANITIZE_UNDEFINED +static inline void CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info, XML_Parser xml_parser, xmlhandler xml_handler) { @@ -1365,7 +1367,7 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure) elaborate system of handlers and state could remove the C handler more effectively. */ if (handlernum == CharacterData && self->in_callback) { - c_handler = noop_character_data_handler; + c_handler = (xmlhandler)noop_character_data_handler; } v = NULL; } @@ -2222,13 +2224,84 @@ clear_handlers(xmlparseobject *self, int initial) } } +/* To avoid undefined behaviors, a function must be *called* via a function + * pointer of the correct type. + * So, for each `XML_Set*` function, we define a wrapper that calls `XML_Set*` + * with its argument cast to the appropriate type. + */ + +typedef void (*parser_only)(void *); +typedef int (*not_standalone)(void *); +typedef void (*parser_and_data)(void *, const XML_Char *); +typedef void (*parser_and_data_and_int)(void *, const XML_Char *, int); +typedef void (*parser_and_data_and_data)( + void *, const XML_Char *, const XML_Char *); +typedef void (*start_element)(void *, const XML_Char *, const XML_Char **); +typedef void (*element_decl)(void *, const XML_Char *, XML_Content *); +typedef void (*xml_decl)( + void *, const XML_Char *, const XML_Char *, int); +typedef void (*start_doctype_decl)( + void *, const XML_Char *, const XML_Char *, const XML_Char *, int); +typedef void (*notation_decl)( + void *, + const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); +typedef void (*attlist_decl)( + void *, + const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, + int); +typedef void (*unparsed_entity_decl)( + void *, + const XML_Char *, const XML_Char *, + const XML_Char *, const XML_Char *, const XML_Char *); +typedef void (*entity_decl)( + void *, + const XML_Char *, int, + const XML_Char *, int, + const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); +typedef int (*external_entity_ref)( + XML_Parser, + const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); + +#define SETTER_WRAPPER(NAME, TYPE) \ + static inline void \ + pyexpat_Set ## NAME (XML_Parser parser, xmlhandler handler) \ + { \ + (void)XML_Set ## NAME (parser, (TYPE)handler); \ + } + +SETTER_WRAPPER(StartElementHandler, start_element) +SETTER_WRAPPER(EndElementHandler, parser_and_data) +SETTER_WRAPPER(ProcessingInstructionHandler, parser_and_data_and_data) +SETTER_WRAPPER(CharacterDataHandler, parser_and_data_and_int) +SETTER_WRAPPER(UnparsedEntityDeclHandler, unparsed_entity_decl) +SETTER_WRAPPER(NotationDeclHandler, notation_decl) +SETTER_WRAPPER(StartNamespaceDeclHandler, parser_and_data_and_data) +SETTER_WRAPPER(EndNamespaceDeclHandler, parser_and_data) +SETTER_WRAPPER(CommentHandler, parser_and_data) +SETTER_WRAPPER(StartCdataSectionHandler, parser_only) +SETTER_WRAPPER(EndCdataSectionHandler, parser_only) +SETTER_WRAPPER(DefaultHandler, parser_and_data_and_int) +SETTER_WRAPPER(DefaultHandlerExpand, parser_and_data_and_int) +SETTER_WRAPPER(NotStandaloneHandler, not_standalone) +SETTER_WRAPPER(ExternalEntityRefHandler, external_entity_ref) +SETTER_WRAPPER(StartDoctypeDeclHandler, start_doctype_decl) +SETTER_WRAPPER(EndDoctypeDeclHandler, parser_only) +SETTER_WRAPPER(EntityDeclHandler, entity_decl) +SETTER_WRAPPER(XmlDeclHandler, xml_decl) +SETTER_WRAPPER(ElementDeclHandler, element_decl) +SETTER_WRAPPER(AttlistDeclHandler, attlist_decl) +#if XML_COMBINED_VERSION >= 19504 +SETTER_WRAPPER(SkippedEntityHandler, parser_and_data_and_int) +#endif +#undef SETTER_WRAPPER + static struct HandlerInfo handler_info[] = { // The cast to `xmlhandlersetter` is needed as the signature of XML // handler functions is not compatible with `xmlhandlersetter` since // their second parameter is narrower than a `const void *`. #define HANDLER_INFO(name) \ - {#name, (xmlhandlersetter)XML_Set##name, my_##name}, + {#name, (xmlhandlersetter)pyexpat_Set##name, (xmlhandler)my_##name}, HANDLER_INFO(StartElementHandler) HANDLER_INFO(EndElementHandler) From fe119a08177feea27611450b0bb3901e330a2d58 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 1 Jul 2025 10:57:42 +0200 Subject: [PATCH 740/989] gh-87135: threading.Lock: Raise rather than hang on Python finalization (GH-135991) After Python finalization gets to the point where no other thread can attach thread state, attempting to acquire a Python lock must hang. Raise PythonFinalizationError instead of hanging. --- Doc/library/exceptions.rst | 9 ++- Include/internal/pycore_lock.h | 5 ++ Lib/test/test_threading.py | 55 +++++++++++++++++++ ...5-06-27-09-26-04.gh-issue-87135.33z0UW.rst | 3 + Modules/_threadmodule.c | 18 ++++-- Python/lock.c | 12 ++++ 6 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-27-09-26-04.gh-issue-87135.33z0UW.rst diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index 9806ae80905ca0..c09e1615a5b733 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -429,7 +429,9 @@ The following exceptions are the exceptions that are usually raised. * Creating a new Python thread. * :meth:`Joining ` a running daemon thread. - * :func:`os.fork`. + * :func:`os.fork`, + * acquiring a lock such as :class:`threading.Lock`, when it is known that + the operation would otherwise deadlock. See also the :func:`sys.is_finalizing` function. @@ -440,6 +442,11 @@ The following exceptions are the exceptions that are usually raised. :meth:`threading.Thread.join` can now raise this exception. + .. versionchanged:: next + + This exception may be raised when acquiring :meth:`threading.Lock` + or :meth:`threading.RLock`. + .. exception:: RecursionError This exception is derived from :exc:`RuntimeError`. It is raised when the diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 32b60cc33a21f1..bd6011b60ac09a 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -51,6 +51,11 @@ typedef enum _PyLockFlags { // Fail if interrupted by a signal while waiting on the lock. _PY_FAIL_IF_INTERRUPTED = 4, + + // Locking & unlocking this lock requires attached thread state. + // If locking returns PY_LOCK_FAILURE, a Python exception *may* be raised. + // (Intended for use with _PY_LOCK_HANDLE_SIGNALS and _PY_LOCK_DETACH.) + _PY_LOCK_PYTHONLOCK = 8, } _PyLockFlags; // Lock a mutex with an optional timeout and additional options. See diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 13b55d0f0a2e73..00a3037c3e1e01 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1247,6 +1247,61 @@ def __del__(self): self.assertEqual(err, b"") self.assertIn(b"all clear", out) + @support.subTests('lock_class_name', ['Lock', 'RLock']) + def test_acquire_daemon_thread_lock_in_finalization(self, lock_class_name): + # gh-123940: Py_Finalize() prevents other threads from running Python + # code (and so, releasing locks), so acquiring a locked lock can not + # succeed. + # We raise an exception rather than hang. + code = textwrap.dedent(f""" + import threading + import time + + thread_started_event = threading.Event() + + lock = threading.{lock_class_name}() + def loop(): + if {lock_class_name!r} == 'RLock': + lock.acquire() + with lock: + thread_started_event.set() + while True: + time.sleep(1) + + uncontested_lock = threading.{lock_class_name}() + + class Cycle: + def __init__(self): + self.self_ref = self + self.thr = threading.Thread( + target=loop, daemon=True) + self.thr.start() + thread_started_event.wait() + + def __del__(self): + assert self.thr.is_alive() + + # We *can* acquire an unlocked lock + uncontested_lock.acquire() + if {lock_class_name!r} == 'RLock': + uncontested_lock.acquire() + + # Acquiring a locked one fails + try: + lock.acquire() + except PythonFinalizationError: + assert self.thr.is_alive() + print('got the correct exception!') + + # Cycle holds a reference to itself, which ensures it is + # cleaned up during the GC that runs after daemon threads + # have been forced to exit during finalization. + Cycle() + """) + rc, out, err = assert_python_ok("-c", code) + self.assertEqual(err, b"") + self.assertIn(b"got the correct exception", out) + def test_start_new_thread_failed(self): # gh-109746: if Python fails to start newly created thread # due to failure of underlying PyThread_start_new_thread() call, diff --git a/Misc/NEWS.d/next/Library/2025-06-27-09-26-04.gh-issue-87135.33z0UW.rst b/Misc/NEWS.d/next/Library/2025-06-27-09-26-04.gh-issue-87135.33z0UW.rst new file mode 100644 index 00000000000000..4b6bc74cad87a4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-27-09-26-04.gh-issue-87135.33z0UW.rst @@ -0,0 +1,3 @@ +Acquiring a :class:`threading.Lock` or :class:`threading.RLock` at interpreter +shutdown will raise :exc:`PythonFinalizationError` if Python can determine +that it would otherwise deadlock. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index b6ccca943f2085..8886a9d6bd0c8d 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -834,9 +834,14 @@ lock_PyThread_acquire_lock(PyObject *op, PyObject *args, PyObject *kwds) return NULL; } - PyLockStatus r = _PyMutex_LockTimed(&self->lock, timeout, - _PY_LOCK_HANDLE_SIGNALS | _PY_LOCK_DETACH); + PyLockStatus r = _PyMutex_LockTimed( + &self->lock, timeout, + _PY_LOCK_PYTHONLOCK | _PY_LOCK_HANDLE_SIGNALS | _PY_LOCK_DETACH); if (r == PY_LOCK_INTR) { + assert(PyErr_Occurred()); + return NULL; + } + if (r == PY_LOCK_FAILURE && PyErr_Occurred()) { return NULL; } @@ -1054,9 +1059,14 @@ rlock_acquire(PyObject *op, PyObject *args, PyObject *kwds) return NULL; } - PyLockStatus r = _PyRecursiveMutex_LockTimed(&self->lock, timeout, - _PY_LOCK_HANDLE_SIGNALS | _PY_LOCK_DETACH); + PyLockStatus r = _PyRecursiveMutex_LockTimed( + &self->lock, timeout, + _PY_LOCK_PYTHONLOCK | _PY_LOCK_HANDLE_SIGNALS | _PY_LOCK_DETACH); if (r == PY_LOCK_INTR) { + assert(PyErr_Occurred()); + return NULL; + } + if (r == PY_LOCK_FAILURE && PyErr_Occurred()) { return NULL; } diff --git a/Python/lock.c b/Python/lock.c index ea6ac00bfeccb4..eb09019e0a236d 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -95,6 +95,18 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) if (timeout == 0) { return PY_LOCK_FAILURE; } + if ((flags & _PY_LOCK_PYTHONLOCK) && Py_IsFinalizing()) { + // At this phase of runtime shutdown, only the finalization thread + // can have attached thread state; others hang if they try + // attaching. And since operations on this lock requires attached + // thread state (_PY_LOCK_PYTHONLOCK), the finalization thread is + // running this code, and no other thread can unlock. + // Raise rather than hang. (_PY_LOCK_PYTHONLOCK allows raising + // exceptons.) + PyErr_SetString(PyExc_PythonFinalizationError, + "cannot acquire lock at interpreter finalization"); + return PY_LOCK_FAILURE; + } uint8_t newv = v; if (!(v & _Py_HAS_PARKED)) { From 31b56df3bb3c5e3ced10a0ad1eaed43e75f36f22 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Tue, 1 Jul 2025 11:30:51 +0100 Subject: [PATCH 741/989] gh-133447: Update `sqlite3` What's New 3.15 entry (GH-136079) --- Doc/whatsnew/3.15.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index e58b4250f356c2..fb3f6312798753 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -140,8 +140,15 @@ shelve sqlite3 ------- -* Support SQL keyword completion in the :mod:`sqlite3` command-line interface. - (Contributed by Long Tan in :gh:`133393`.) +* The :ref:`command-line interface ` has several new features: + + * SQL keyword completion on . + (Contributed by Long Tan in :gh:`133393`.) + + * Prompts, error messages, and help text are now colored. + This is enabled by default, see :ref:`using-on-controlling-color` for + details. + (Contributed by Stan Ulbrych and Łukasz Langa in :gh:`133461`) ssl From 0e19db653dfa1a6e750e9cede1f6922e5fd1e808 Mon Sep 17 00:00:00 2001 From: heliang666s <147408835+heliang666s@users.noreply.github.com> Date: Tue, 1 Jul 2025 19:50:11 +0800 Subject: [PATCH 742/989] gh-135836: Fix `IndexError` in `asyncio.create_connection` with empty exceptions list (#135845) Co-authored-by: Kumar Aditya --- Lib/asyncio/base_events.py | 5 +++- Lib/test/test_asyncio/test_base_events.py | 30 +++++++++++++++++++ ...-06-23-11-04-25.gh-issue-135836.-C-c4v.rst | 1 + 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-23-11-04-25.gh-issue-135836.-C-c4v.rst diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index 04fb961e9985e4..2ff9e4017bb245 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -1161,7 +1161,7 @@ async def create_connection( raise ExceptionGroup("create_connection failed", exceptions) if len(exceptions) == 1: raise exceptions[0] - else: + elif exceptions: # If they all have the same str(), raise one. model = str(exceptions[0]) if all(str(exc) == model for exc in exceptions): @@ -1170,6 +1170,9 @@ async def create_connection( # the various error messages. raise OSError('Multiple exceptions: {}'.format( ', '.join(str(exc) for exc in exceptions))) + else: + # No exceptions were collected, raise a timeout error + raise TimeoutError('create_connection failed') finally: exceptions = None diff --git a/Lib/test/test_asyncio/test_base_events.py b/Lib/test/test_asyncio/test_base_events.py index 2ca5c4c6719c41..bb9f366fc411aa 100644 --- a/Lib/test/test_asyncio/test_base_events.py +++ b/Lib/test/test_asyncio/test_base_events.py @@ -1190,6 +1190,36 @@ def getaddrinfo(*args, **kw): self.loop.run_until_complete(coro) self.assertTrue(sock.close.called) + @patch_socket + def test_create_connection_happy_eyeballs_empty_exceptions(self, m_socket): + # See gh-135836: Fix IndexError when Happy Eyeballs algorithm + # results in empty exceptions list + + async def getaddrinfo(*args, **kw): + return [(socket.AF_INET, socket.SOCK_STREAM, 0, '', ('127.0.0.1', 80)), + (socket.AF_INET6, socket.SOCK_STREAM, 0, '', ('::1', 80))] + + def getaddrinfo_task(*args, **kwds): + return self.loop.create_task(getaddrinfo(*args, **kwds)) + + self.loop.getaddrinfo = getaddrinfo_task + + # Mock staggered_race to return empty exceptions list + # This simulates the scenario where Happy Eyeballs algorithm + # cancels all attempts but doesn't properly collect exceptions + with mock.patch('asyncio.staggered.staggered_race') as mock_staggered: + # Return (None, []) - no winner, empty exceptions list + async def mock_race(coro_fns, delay, loop): + return None, [] + mock_staggered.side_effect = mock_race + + coro = self.loop.create_connection( + MyProto, 'example.com', 80, happy_eyeballs_delay=0.1) + + # Should raise TimeoutError instead of IndexError + with self.assertRaisesRegex(TimeoutError, "create_connection failed"): + self.loop.run_until_complete(coro) + def test_create_connection_host_port_sock(self): coro = self.loop.create_connection( MyProto, 'example.com', 80, sock=object()) diff --git a/Misc/NEWS.d/next/Library/2025-06-23-11-04-25.gh-issue-135836.-C-c4v.rst b/Misc/NEWS.d/next/Library/2025-06-23-11-04-25.gh-issue-135836.-C-c4v.rst new file mode 100644 index 00000000000000..f93c9faee5863a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-23-11-04-25.gh-issue-135836.-C-c4v.rst @@ -0,0 +1 @@ +Fix :exc:`IndexError` in :meth:`asyncio.loop.create_connection` that could occur when the Happy Eyeballs algorithm resulted in an empty exceptions list during connection attempts. From 28940e8e4884eb3444a8ea15a5634f76b154d7ed Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 1 Jul 2025 15:18:17 +0200 Subject: [PATCH 743/989] gh-130396: Move PYOS_LOG2_STACK_MARGIN to internal headers (#135928) Move PYOS_LOG2_STACK_MARGIN, PYOS_STACK_MARGIN, PYOS_STACK_MARGIN_BYTES and PYOS_STACK_MARGIN_SHIFT macros to pycore_pythonrun.h internal header. Add underscore (_) prefix to the names to make them private. Rename _PYOS to _PyOS. --- Include/internal/pycore_pystate.h | 3 ++- Include/internal/pycore_pythonrun.h | 22 ++++++++++++++++++++++ Include/pythonrun.h | 25 ++----------------------- Modules/_testinternalcapi.c | 2 +- Python/ceval.c | 14 +++++++------- 5 files changed, 34 insertions(+), 32 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 633e5cf77db918..ea3dfbd2eef9c1 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -8,6 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_pythonrun.h" // _PyOS_STACK_MARGIN_SHIFT #include "pycore_typedefs.h" // _PyRuntimeState #include "pycore_tstate.h" @@ -325,7 +326,7 @@ _Py_RecursionLimit_GetMargin(PyThreadState *tstate) _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; assert(_tstate->c_stack_hard_limit != 0); intptr_t here_addr = _Py_get_machine_stack_pointer(); - return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, here_addr - (intptr_t)_tstate->c_stack_soft_limit, PYOS_STACK_MARGIN_SHIFT); + return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, here_addr - (intptr_t)_tstate->c_stack_soft_limit, _PyOS_STACK_MARGIN_SHIFT); } #ifdef __cplusplus diff --git a/Include/internal/pycore_pythonrun.h b/Include/internal/pycore_pythonrun.h index 7daed1326af8d5..c2832098ddb3e7 100644 --- a/Include/internal/pycore_pythonrun.h +++ b/Include/internal/pycore_pythonrun.h @@ -33,6 +33,28 @@ extern const char* _Py_SourceAsString( PyCompilerFlags *cf, PyObject **cmd_copy); + +/* Stack size, in "pointers". This must be large enough, so + * no two calls to check recursion depth are more than this far + * apart. In practice, that means it must be larger than the C + * stack consumption of PyEval_EvalDefault */ +#if defined(_Py_ADDRESS_SANITIZER) || defined(_Py_THREAD_SANITIZER) +# define _PyOS_LOG2_STACK_MARGIN 12 +#elif defined(Py_DEBUG) && defined(WIN32) +# define _PyOS_LOG2_STACK_MARGIN 12 +#else +# define _PyOS_LOG2_STACK_MARGIN 11 +#endif +#define _PyOS_STACK_MARGIN (1 << _PyOS_LOG2_STACK_MARGIN) +#define _PyOS_STACK_MARGIN_BYTES (_PyOS_STACK_MARGIN * sizeof(void *)) + +#if SIZEOF_VOID_P == 8 +# define _PyOS_STACK_MARGIN_SHIFT (_PyOS_LOG2_STACK_MARGIN + 3) +#else +# define _PyOS_STACK_MARGIN_SHIFT (_PyOS_LOG2_STACK_MARGIN + 2) +#endif + + #ifdef __cplusplus } #endif diff --git a/Include/pythonrun.h b/Include/pythonrun.h index 716c4ab3c81cae..92b50aa807bf70 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -21,36 +21,15 @@ PyAPI_FUNC(void) PyErr_DisplayException(PyObject *); /* Stuff with no proper home (yet) */ PyAPI_DATA(int) (*PyOS_InputHook)(void); -/* Stack size, in "pointers". This must be large enough, so - * no two calls to check recursion depth are more than this far - * apart. In practice, that means it must be larger than the C - * stack consumption of PyEval_EvalDefault */ -#if defined(_Py_ADDRESS_SANITIZER) || defined(_Py_THREAD_SANITIZER) -# define PYOS_LOG2_STACK_MARGIN 12 -#elif defined(Py_DEBUG) && defined(WIN32) -# define PYOS_LOG2_STACK_MARGIN 12 -#else -# define PYOS_LOG2_STACK_MARGIN 11 -#endif -#define PYOS_STACK_MARGIN (1 << PYOS_LOG2_STACK_MARGIN) -#define PYOS_STACK_MARGIN_BYTES (PYOS_STACK_MARGIN * sizeof(void *)) - -#if SIZEOF_VOID_P == 8 -#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 3) -#else -#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 2) -#endif - - #if defined(WIN32) -#define USE_STACKCHECK +# define USE_STACKCHECK #endif - #ifdef USE_STACKCHECK /* Check that we aren't overflowing our stack */ PyAPI_FUNC(int) PyOS_CheckStack(void); #endif + #ifndef Py_LIMITED_API # define Py_CPYTHON_PYTHONRUN_H # include "cpython/pythonrun.h" diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index fdf22a0c994d3a..8027f0015c7409 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -121,7 +121,7 @@ get_c_recursion_remaining(PyObject *self, PyObject *Py_UNUSED(args)) PyThreadState *tstate = _PyThreadState_GET(); uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - int remaining = (int)((here_addr - _tstate->c_stack_soft_limit)/PYOS_STACK_MARGIN_BYTES * 50); + int remaining = (int)((here_addr - _tstate->c_stack_soft_limit) / _PyOS_STACK_MARGIN_BYTES * 50); return PyLong_FromLong(remaining); } diff --git a/Python/ceval.c b/Python/ceval.c index d1de4875656fd3..50665defd382a2 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -346,13 +346,13 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - if (here_addr > _tstate->c_stack_soft_limit + margin_count * PYOS_STACK_MARGIN_BYTES) { + if (here_addr > _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES) { return 0; } if (_tstate->c_stack_hard_limit == 0) { _Py_InitializeRecursionLimits(tstate); } - return here_addr <= _tstate->c_stack_soft_limit + margin_count * PYOS_STACK_MARGIN_BYTES; + return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES; } void @@ -448,8 +448,8 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate) _tstate->c_stack_top = (uintptr_t)high; ULONG guarantee = 0; SetThreadStackGuarantee(&guarantee); - _tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + PYOS_STACK_MARGIN_BYTES; - _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + PYOS_STACK_MARGIN_BYTES; + _tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + _PyOS_STACK_MARGIN_BYTES; + _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES; #else uintptr_t here_addr = _Py_get_machine_stack_pointer(); # if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && !defined(__NetBSD__) @@ -469,9 +469,9 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate) // Thread sanitizer crashes if we use a bit more than half the stack. _tstate->c_stack_soft_limit = base + (stack_size / 2); #else - _tstate->c_stack_soft_limit = base + PYOS_STACK_MARGIN_BYTES * 2; + _tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2; #endif - _tstate->c_stack_hard_limit = base + PYOS_STACK_MARGIN_BYTES; + _tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES; assert(_tstate->c_stack_soft_limit < here_addr); assert(here_addr < _tstate->c_stack_top); return; @@ -479,7 +479,7 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate) # endif _tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096); _tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE; - _tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + PYOS_STACK_MARGIN_BYTES); + _tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES); #endif } From 9c0cb5beb893284c5d49f947bbd81dfe081a5d71 Mon Sep 17 00:00:00 2001 From: Vladyslav Lazoryk <80263725+lazorikv@users.noreply.github.com> Date: Tue, 1 Jul 2025 16:23:48 +0300 Subject: [PATCH 744/989] gh-136169: Update parameter name in fractions.from_float method (#136172) Update parameter name in fractions.from_float method --- Doc/library/fractions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/fractions.rst b/Doc/library/fractions.rst index fc7f9a6301a915..392b6d40e861fb 100644 --- a/Doc/library/fractions.rst +++ b/Doc/library/fractions.rst @@ -142,7 +142,7 @@ another rational number, or from a string. .. versionadded:: 3.12 - .. classmethod:: from_float(flt) + .. classmethod:: from_float(f) Alternative constructor which only accepts instances of :class:`float` or :class:`numbers.Integral`. Beware that From 12ce16bc134a602d2ac8acde86ae69f70183cb9f Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Tue, 1 Jul 2025 07:55:24 -0700 Subject: [PATCH 745/989] gh-87298: Add tests for find_in_strong_cache() bug in _zoneinfo (GH-24829) Co-authored-by: Paul Ganssle Co-authored-by: Serhiy Storchaka --- Lib/test/test_zoneinfo/test_zoneinfo.py | 43 +++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index f313e394f49e9b..44e87e71c8ee7b 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -58,6 +58,10 @@ def tearDownModule(): shutil.rmtree(TEMP_DIR) +class CustomError(Exception): + pass + + class TzPathUserMixin: """ Adds a setUp() and tearDown() to make TZPATH manipulations thread-safe. @@ -404,6 +408,25 @@ def test_time_fixed_offset(self): self.assertEqual(t.utcoffset(), offset.utcoffset) self.assertEqual(t.dst(), offset.dst) + def test_cache_exception(self): + class Incomparable(str): + eq_called = False + def __eq__(self, other): + self.eq_called = True + raise CustomError + __hash__ = str.__hash__ + + key = "America/Los_Angeles" + tz1 = self.klass(key) + key = Incomparable(key) + try: + tz2 = self.klass(key) + except CustomError: + self.assertTrue(key.eq_called) + else: + self.assertFalse(key.eq_called) + self.assertIs(tz2, tz1) + class CZoneInfoTest(ZoneInfoTest): module = c_zoneinfo @@ -1507,6 +1530,26 @@ def test_clear_cache_two_keys(self): self.assertIsNot(dub0, dub1) self.assertIs(tok0, tok1) + def test_clear_cache_refleak(self): + class Stringy(str): + allow_comparisons = True + def __eq__(self, other): + if not self.allow_comparisons: + raise CustomError + return super().__eq__(other) + __hash__ = str.__hash__ + + key = Stringy("America/Los_Angeles") + self.klass(key) + key.allow_comparisons = False + try: + # Note: This is try/except rather than assertRaises because + # there is no guarantee that the key is even still in the cache, + # or that the key for the cache is the original `key` object. + self.klass.clear_cache(only_keys="America/Los_Angeles") + except CustomError: + pass + class CZoneInfoCacheTest(ZoneInfoCacheTest): module = c_zoneinfo From 93809a918f3a2853e8a191b84ad844c722bee2f5 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Tue, 1 Jul 2025 16:31:07 +0100 Subject: [PATCH 746/989] gh-105456: Remove 3 deprecated `sre_*` modules (#135994) --- Doc/deprecations/pending-removal-in-3.15.rst | 2 ++ .../pending-removal-in-future.rst | 2 -- Doc/whatsnew/3.15.rst | 7 +++++ Lib/sre_compile.py | 7 ----- Lib/sre_constants.py | 7 ----- Lib/sre_parse.py | 7 ----- Lib/test/test_pyclbr.py | 4 --- Lib/test/test_re.py | 28 ------------------- ...-06-26-17-19-36.gh-issue-105456.eR9oHB.rst | 2 ++ Python/stdlib_module_names.h | 3 -- 10 files changed, 11 insertions(+), 58 deletions(-) delete mode 100644 Lib/sre_compile.py delete mode 100644 Lib/sre_constants.py delete mode 100644 Lib/sre_parse.py create mode 100644 Misc/NEWS.d/next/Library/2025-06-26-17-19-36.gh-issue-105456.eR9oHB.rst diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index a76d06cce1278a..c5ca599bb04a6f 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -97,6 +97,8 @@ Pending removal in Python 3.15 After eight years in the :mod:`typing` module, it has yet to be supported by any major type checker. +* :mod:`!sre_compile`, :mod:`!sre_constants` and :mod:`!sre_parse` modules. + * :mod:`wave`: * The ``getmark()``, ``setmark()`` and ``getmarkers()`` methods of diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index 4c4a368baca955..edb672ed8ad9a2 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -89,8 +89,6 @@ although there is currently no date scheduled for their removal. underscore. (Contributed by Serhiy Storchaka in :gh:`91760`.) -* :mod:`!sre_compile`, :mod:`!sre_constants` and :mod:`!sre_parse` modules. - * :mod:`shutil`: :func:`~shutil.rmtree`'s *onerror* parameter is deprecated in Python 3.12; use the *onexc* parameter instead. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index fb3f6312798753..efc5777db884c1 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -252,6 +252,13 @@ platform (Contributed by Alexey Makridenko in :gh:`133604`.) +sre_* +----- + +* Removed :mod:`!sre_compile`, :mod:`!sre_constants` and :mod:`!sre_parse` modules. + (Contributed by Stan Ulbrych in :gh:`135994`.) + + sysconfig --------- diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py deleted file mode 100644 index f9da61e6487869..00000000000000 --- a/Lib/sre_compile.py +++ /dev/null @@ -1,7 +0,0 @@ -import warnings -warnings.warn(f"module {__name__!r} is deprecated", - DeprecationWarning, - stacklevel=2) - -from re import _compiler as _ -globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'}) diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py deleted file mode 100644 index fa09d044292965..00000000000000 --- a/Lib/sre_constants.py +++ /dev/null @@ -1,7 +0,0 @@ -import warnings -warnings.warn(f"module {__name__!r} is deprecated", - DeprecationWarning, - stacklevel=2) - -from re import _constants as _ -globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'}) diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py deleted file mode 100644 index 25a3f557d44c61..00000000000000 --- a/Lib/sre_parse.py +++ /dev/null @@ -1,7 +0,0 @@ -import warnings -warnings.warn(f"module {__name__!r} is deprecated", - DeprecationWarning, - stacklevel=2) - -from re import _parser as _ -globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'}) diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py index 3e7b2cd0dc9912..bce68e6cd7a57e 100644 --- a/Lib/test/test_pyclbr.py +++ b/Lib/test/test_pyclbr.py @@ -11,7 +11,6 @@ import pyclbr from unittest import TestCase, main as unittest_main from test.test_importlib import util as test_importlib_util -import warnings StaticMethodType = type(staticmethod(lambda: None)) @@ -246,9 +245,6 @@ def test_others(self): # These were once some of the longest modules. cm('random', ignore=('Random',)) # from _random import Random as CoreGenerator cm('pickle', ignore=('partial', 'PickleBuffer')) - with warnings.catch_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property with temporary_main_spec(): cm( 'pdb', diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index e9128ac1d9762d..993652d2e882cc 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -5,7 +5,6 @@ import locale import re import string -import sys import unittest import warnings from re import Scanner @@ -2927,33 +2926,6 @@ def test_disallow_instantiation(self): pat = re.compile("") check_disallow_instantiation(self, type(pat.scanner(""))) - def test_deprecated_modules(self): - deprecated = { - 'sre_compile': ['compile', 'error', - 'SRE_FLAG_IGNORECASE', 'SUBPATTERN', - '_compile_info'], - 'sre_constants': ['error', 'SRE_FLAG_IGNORECASE', 'SUBPATTERN', - '_NamedIntConstant'], - 'sre_parse': ['SubPattern', 'parse', - 'SRE_FLAG_IGNORECASE', 'SUBPATTERN', - '_parse_sub'], - } - for name in deprecated: - with self.subTest(module=name): - sys.modules.pop(name, None) - with self.assertWarns(DeprecationWarning) as w: - __import__(name) - self.assertEqual(str(w.warning), - f"module {name!r} is deprecated") - self.assertEqual(w.filename, __file__) - self.assertIn(name, sys.modules) - mod = sys.modules[name] - self.assertEqual(mod.__name__, name) - self.assertEqual(mod.__package__, '') - for attr in deprecated[name]: - self.assertHasAttr(mod, attr) - del sys.modules[name] - @cpython_only def test_case_helpers(self): import _sre diff --git a/Misc/NEWS.d/next/Library/2025-06-26-17-19-36.gh-issue-105456.eR9oHB.rst b/Misc/NEWS.d/next/Library/2025-06-26-17-19-36.gh-issue-105456.eR9oHB.rst new file mode 100644 index 00000000000000..772403a240aa00 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-26-17-19-36.gh-issue-105456.eR9oHB.rst @@ -0,0 +1,2 @@ +Removed :mod:`!sre_compile`, :mod:`!sre_constants` and :mod:`!sre_parse` +modules. diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index 56e349a544c079..63e4599c31efc3 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -245,9 +245,6 @@ static const char* _Py_stdlib_module_names[] = { "socket", "socketserver", "sqlite3", -"sre_compile", -"sre_constants", -"sre_parse", "ssl", "stat", "statistics", From 17cf0a343b13b6f38a8ecf28b0dc892a4b456ca7 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 1 Jul 2025 18:50:51 +0300 Subject: [PATCH 747/989] gh-115119: Remove implicit fallback to the bundled libmpdec (#134078) Co-authored-by: Victor Stinner Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .github/workflows/posix-deps-apt.sh | 7 +++ Doc/whatsnew/3.15.rst | 5 ++ ...-05-16-07-46-06.gh-issue-115119.ALBgS_.rst | 4 ++ configure | 46 +++++++++---------- configure.ac | 40 ++++++++-------- 5 files changed, 57 insertions(+), 45 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-05-16-07-46-06.gh-issue-115119.ALBgS_.rst diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index 7773222af5d26f..44e6a9ce2d0cd1 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -25,3 +25,10 @@ apt-get -yq install \ uuid-dev \ xvfb \ zlib1g-dev + +# Workaround missing libmpdec-dev on ubuntu 24.04: +# https://launchpad.net/~ondrej/+archive/ubuntu/php +# https://deb.sury.org/ +sudo add-apt-repository ppa:ondrej/php +apt-get update +apt-get -yq install libmpdec-dev diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index efc5777db884c1..df636dc9051f0d 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -310,6 +310,11 @@ that may require changes to your code. Build changes ============= +* Removed implicit fallback to the bundled copy of the ``libmpdec`` library. + Now this should be explicitly enabled with :option:`--with-system-libmpdec` + set to ``no`` or with :option:`!--without-system-libmpdec`. + (Contributed by Sergey B Kirpichev in :gh:`115119`.) + C API changes ============= diff --git a/Misc/NEWS.d/next/Build/2025-05-16-07-46-06.gh-issue-115119.ALBgS_.rst b/Misc/NEWS.d/next/Build/2025-05-16-07-46-06.gh-issue-115119.ALBgS_.rst new file mode 100644 index 00000000000000..8c2d15a3228fcf --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-05-16-07-46-06.gh-issue-115119.ALBgS_.rst @@ -0,0 +1,4 @@ +Removed implicit fallback to the bundled copy of the ``libmpdec`` library. +Now this should be explicitly enabled via :option:`--with-system-libmpdec` +set to ``no`` or :option:`!--without-system-libmpdec`. Patch by Sergey +B Kirpichev. diff --git a/configure b/configure index 75ae1699a8e451..3a103c39b62076 100755 --- a/configure +++ b/configure @@ -15750,10 +15750,18 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-system-libmpdec" >&5 printf %s "checking for --with-system-libmpdec... " >&6; } + # Check whether --with-system_libmpdec was given. if test ${with_system_libmpdec+y} then : - withval=$with_system_libmpdec; + withval=$with_system_libmpdec; if test "x$with_system_libmpdec" = xno +then : + LIBMPDEC_CFLAGS="-I\$(srcdir)/Modules/_decimal/libmpdec" + LIBMPDEC_LIBS="-lm \$(LIBMPDEC_A)" + LIBMPDEC_INTERNAL="\$(LIBMPDEC_HEADERS) \$(LIBMPDEC_A)" + have_mpdec=yes + with_system_libmpdec=no +fi else case e in #( e) with_system_libmpdec="yes" ;; esac @@ -15762,8 +15770,6 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_system_libmpdec" >&5 printf "%s\n" "$with_system_libmpdec" >&6; } - - if test "x$with_system_libmpdec" = xyes then : @@ -15841,13 +15847,6 @@ else printf "%s\n" "yes" >&6; } fi -else case e in #( - e) LIBMPDEC_CFLAGS="-I\$(srcdir)/Modules/_decimal/libmpdec" - LIBMPDEC_LIBS="-lm \$(LIBMPDEC_A)" - LIBMPDEC_INTERNAL="\$(LIBMPDEC_HEADERS) \$(LIBMPDEC_A)" - have_mpdec=yes - with_system_libmpdec=no ;; -esac fi if test "x$with_system_libmpdec" = xyes @@ -15894,21 +15893,6 @@ LDFLAGS=$save_LDFLAGS LIBS=$save_LIBS -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: the bundled copy of libmpdecimal is scheduled for removal in Python 3.15; consider using a system installed mpdecimal library." >&5 -printf "%s\n" "$as_me: WARNING: the bundled copy of libmpdecimal is scheduled for removal in Python 3.15; consider using a system installed mpdecimal library." >&2;} ;; -esac -fi - -if test "$with_system_libmpdec" = "yes" && test "$have_mpdec" = "no" -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: no system libmpdecimal found; falling back to bundled libmpdecimal (deprecated and scheduled for removal in Python 3.15)" >&5 -printf "%s\n" "$as_me: WARNING: no system libmpdecimal found; falling back to bundled libmpdecimal (deprecated and scheduled for removal in Python 3.15)" >&2;} - LIBMPDEC_CFLAGS="-I\$(srcdir)/Modules/_decimal/libmpdec" - LIBMPDEC_LIBS="-lm \$(LIBMPDEC_A)" - LIBMPDEC_INTERNAL="\$(LIBMPDEC_HEADERS) \$(LIBMPDEC_A)" - have_mpdec=yes - with_system_libmpdec=no fi # Disable forced inlining in debug builds, see GH-94847 @@ -33194,6 +33178,18 @@ fi printf "%s\n" "$py_cv_module__decimal" >&6; } +if test "x$with_system_libmpdec" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: the bundled copy of libmpdecimal is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library." >&5 +printf "%s\n" "$as_me: WARNING: the bundled copy of libmpdecimal is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library." >&2;} +fi +if test "$with_system_libmpdec" = "yes" && test "$have_mpdec" = "no" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: no system libmpdecimal found; falling back to pure-Python version for the decimal module" >&5 +printf "%s\n" "$as_me: WARNING: no system libmpdecimal found; falling back to pure-Python version for the decimal module" >&2;} +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for stdlib extension module _dbm" >&5 printf %s "checking for stdlib extension module _dbm... " >&6; } if test "$py_cv_module__dbm" != "n/a" diff --git a/configure.ac b/configure.ac index 4da1ba78b54b0d..761d1dbd45cd8e 100644 --- a/configure.ac +++ b/configure.ac @@ -4169,31 +4169,30 @@ fi # Check for use of the system libmpdec library AC_MSG_CHECKING([for --with-system-libmpdec]) +AC_DEFUN([USE_BUNDLED_LIBMPDEC], + [LIBMPDEC_CFLAGS="-I\$(srcdir)/Modules/_decimal/libmpdec" + LIBMPDEC_LIBS="-lm \$(LIBMPDEC_A)" + LIBMPDEC_INTERNAL="\$(LIBMPDEC_HEADERS) \$(LIBMPDEC_A)" + have_mpdec=yes + with_system_libmpdec=no]) AC_ARG_WITH( [system_libmpdec], [AS_HELP_STRING( [--with-system-libmpdec], [build _decimal module using an installed mpdecimal library, see Doc/library/decimal.rst (default is yes)] )], - [], + [AS_IF([test "x$with_system_libmpdec" = xno], + [USE_BUNDLED_LIBMPDEC()])], [with_system_libmpdec="yes"]) AC_MSG_RESULT([$with_system_libmpdec]) -AC_DEFUN([USE_BUNDLED_LIBMPDEC], - [LIBMPDEC_CFLAGS="-I\$(srcdir)/Modules/_decimal/libmpdec" - LIBMPDEC_LIBS="-lm \$(LIBMPDEC_A)" - LIBMPDEC_INTERNAL="\$(LIBMPDEC_HEADERS) \$(LIBMPDEC_A)" - have_mpdec=yes - with_system_libmpdec=no]) - AS_VAR_IF( [with_system_libmpdec], [yes], [PKG_CHECK_MODULES( [LIBMPDEC], [libmpdec >= 2.5.0], [], [LIBMPDEC_CFLAGS=${LIBMPDEC_CFLAGS-""} LIBMPDEC_LIBS=${LIBMPDEC_LIBS-"-lmpdec -lm"} - LIBMPDEC_INTERNAL=])], - [USE_BUNDLED_LIBMPDEC()]) + LIBMPDEC_INTERNAL=])]) AS_VAR_IF([with_system_libmpdec], [yes], [WITH_SAVE_ENV([ @@ -4209,16 +4208,7 @@ AS_VAR_IF([with_system_libmpdec], [yes], ], [const char *x = mpd_version();])], [have_mpdec=yes], [have_mpdec=no]) - ])], - [AC_MSG_WARN([m4_normalize([ - the bundled copy of libmpdecimal is scheduled for removal in Python 3.15; - consider using a system installed mpdecimal library.])])]) - -AS_IF([test "$with_system_libmpdec" = "yes" && test "$have_mpdec" = "no"], - [AC_MSG_WARN([m4_normalize([ - no system libmpdecimal found; falling back to bundled libmpdecimal - (deprecated and scheduled for removal in Python 3.15)])]) - USE_BUNDLED_LIBMPDEC()]) + ])]) # Disable forced inlining in debug builds, see GH-94847 AS_VAR_IF( @@ -8136,6 +8126,16 @@ PY_STDLIB_MOD([_curses_panel], PY_STDLIB_MOD([_decimal], [], [test "$have_mpdec" = "yes"], [$LIBMPDEC_CFLAGS], [$LIBMPDEC_LIBS]) + +AS_VAR_IF([with_system_libmpdec], [no], + [AC_MSG_WARN([m4_normalize([ + the bundled copy of libmpdecimal is scheduled for removal in Python 3.16; + consider using a system installed mpdecimal library.])])]) +AS_IF([test "$with_system_libmpdec" = "yes" && test "$have_mpdec" = "no"], + [AC_MSG_WARN([m4_normalize([ + no system libmpdecimal found; falling back to pure-Python version + for the decimal module])])]) + PY_STDLIB_MOD([_dbm], [test -n "$with_dbmliborder"], [test "$have_dbm" != "no"], [$DBM_CFLAGS], [$DBM_LIBS]) From e0d6500b2d9a08beb7b607b846d1eeaa26706667 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 2 Jul 2025 01:20:46 +0800 Subject: [PATCH 748/989] gh-136125: Use `_PyObject_GetMethodStackRef` for `LOAD_ATTR` (GH-136127) --- Include/internal/pycore_object.h | 2 +- Python/bytecodes.c | 15 +++++++-------- Python/executor_cases.c.h | 17 ++++++++--------- Python/generated_cases.c.h | 17 ++++++++--------- Python/optimizer_bytecodes.c | 4 ++-- Python/optimizer_cases.c.h | 6 +++--- 6 files changed, 29 insertions(+), 32 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 50807e68e9a70c..d0fe1f63250eaa 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -918,7 +918,7 @@ extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, extern unsigned int _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out); -extern int _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, +PyAPI_FUNC(int) _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, PyObject *name, _PyStackRef *method); // Cache the provided init method in the specialization cache of type if the diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1a5a9ff13a23a5..a5b74d88d7d4fe 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2327,19 +2327,18 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION_FT */ } - op(_LOAD_ATTR, (owner -- attr, self_or_null[oparg&1])) { + op(_LOAD_ATTR, (owner -- attr[1], self_or_null[oparg&1])) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1); - PyObject *attr_o; if (oparg & 1) { /* Designed to work in tandem with CALL, pushes two values. */ - attr_o = NULL; - int is_meth = _PyObject_GetMethod(PyStackRef_AsPyObjectBorrow(owner), name, &attr_o); + *attr = PyStackRef_NULL; + int is_meth = _PyObject_GetMethodStackRef(tstate, PyStackRef_AsPyObjectBorrow(owner), name, attr); if (is_meth) { /* We can bypass temporary bound method object. meth is unbound method and obj is self. meth | self | arg1 | ... | argN */ - assert(attr_o != NULL); // No errors on this branch + assert(!PyStackRef_IsNull(*attr)); // No errors on this branch self_or_null[0] = owner; // Transfer ownership DEAD(owner); } @@ -2351,17 +2350,17 @@ dummy_func( meth | NULL | arg1 | ... | argN */ PyStackRef_CLOSE(owner); - ERROR_IF(attr_o == NULL); + ERROR_IF(PyStackRef_IsNull(*attr)); self_or_null[0] = PyStackRef_NULL; } } else { /* Classic, pushes one value. */ - attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name); + PyObject *attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name); PyStackRef_CLOSE(owner); ERROR_IF(attr_o == NULL); + *attr = PyStackRef_FromPyObjectSteal(attr_o); } - attr = PyStackRef_FromPyObjectSteal(attr_o); } macro(LOAD_ATTR) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 46fc164a5b3bc2..276c320c5f4d90 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3301,20 +3301,20 @@ case _LOAD_ATTR: { _PyStackRef owner; - _PyStackRef attr; + _PyStackRef *attr; _PyStackRef *self_or_null; oparg = CURRENT_OPARG(); owner = stack_pointer[-1]; + attr = &stack_pointer[-1]; self_or_null = &stack_pointer[0]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1); - PyObject *attr_o; if (oparg & 1) { - attr_o = NULL; + *attr = PyStackRef_NULL; _PyFrame_SetStackPointer(frame, stack_pointer); - int is_meth = _PyObject_GetMethod(PyStackRef_AsPyObjectBorrow(owner), name, &attr_o); + int is_meth = _PyObject_GetMethodStackRef(tstate, PyStackRef_AsPyObjectBorrow(owner), name, attr); stack_pointer = _PyFrame_GetStackPointer(frame); if (is_meth) { - assert(attr_o != NULL); + assert(!PyStackRef_IsNull(*attr)); self_or_null[0] = owner; } else { @@ -3323,7 +3323,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(owner); stack_pointer = _PyFrame_GetStackPointer(frame); - if (attr_o == NULL) { + if (PyStackRef_IsNull(*attr)) { JUMP_TO_ERROR(); } self_or_null[0] = PyStackRef_NULL; @@ -3332,7 +3332,7 @@ } else { _PyFrame_SetStackPointer(frame, stack_pointer); - attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name); + PyObject *attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -3342,10 +3342,9 @@ if (attr_o == NULL) { JUMP_TO_ERROR(); } + *attr = PyStackRef_FromPyObjectSteal(attr_o); stack_pointer += 1; } - attr = PyStackRef_FromPyObjectSteal(attr_o); - stack_pointer[-1] = attr; stack_pointer += (oparg&1); assert(WITHIN_STACK_BOUNDS()); break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8f7932f0033c6f..bb153bc1c0e11a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7941,7 +7941,7 @@ _Py_CODEUNIT* const this_instr = next_instr - 10; (void)this_instr; _PyStackRef owner; - _PyStackRef attr; + _PyStackRef *attr; _PyStackRef *self_or_null; // _SPECIALIZE_LOAD_ATTR { @@ -7964,16 +7964,16 @@ /* Skip 8 cache entries */ // _LOAD_ATTR { + attr = &stack_pointer[-1]; self_or_null = &stack_pointer[0]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1); - PyObject *attr_o; if (oparg & 1) { - attr_o = NULL; + *attr = PyStackRef_NULL; _PyFrame_SetStackPointer(frame, stack_pointer); - int is_meth = _PyObject_GetMethod(PyStackRef_AsPyObjectBorrow(owner), name, &attr_o); + int is_meth = _PyObject_GetMethodStackRef(tstate, PyStackRef_AsPyObjectBorrow(owner), name, attr); stack_pointer = _PyFrame_GetStackPointer(frame); if (is_meth) { - assert(attr_o != NULL); + assert(!PyStackRef_IsNull(*attr)); self_or_null[0] = owner; } else { @@ -7982,7 +7982,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(owner); stack_pointer = _PyFrame_GetStackPointer(frame); - if (attr_o == NULL) { + if (PyStackRef_IsNull(*attr)) { JUMP_TO_LABEL(error); } self_or_null[0] = PyStackRef_NULL; @@ -7991,7 +7991,7 @@ } else { _PyFrame_SetStackPointer(frame, stack_pointer); - attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name); + PyObject *attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -8001,11 +8001,10 @@ if (attr_o == NULL) { JUMP_TO_LABEL(error); } + *attr = PyStackRef_FromPyObjectSteal(attr_o); stack_pointer += 1; } - attr = PyStackRef_FromPyObjectSteal(attr_o); } - stack_pointer[-1] = attr; stack_pointer += (oparg&1); assert(WITHIN_STACK_BOUNDS()); DISPATCH(); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 3182e8b3b70144..aeff76affd8ace 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -590,9 +590,9 @@ dummy_func(void) { } } - op(_LOAD_ATTR, (owner -- attr, self_or_null[oparg&1])) { + op(_LOAD_ATTR, (owner -- attr[1], self_or_null[oparg&1])) { (void)owner; - attr = sym_new_not_null(ctx); + *attr = sym_new_not_null(ctx); if (oparg & 1) { self_or_null[0] = sym_new_unknown(ctx); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 8d30df3aa7d429..82660d02a4e2c4 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1414,16 +1414,16 @@ case _LOAD_ATTR: { JitOptRef owner; - JitOptRef attr; + JitOptRef *attr; JitOptRef *self_or_null; owner = stack_pointer[-1]; + attr = &stack_pointer[-1]; self_or_null = &stack_pointer[0]; (void)owner; - attr = sym_new_not_null(ctx); + *attr = sym_new_not_null(ctx); if (oparg & 1) { self_or_null[0] = sym_new_unknown(ctx); } - stack_pointer[-1] = attr; stack_pointer += (oparg&1); assert(WITHIN_STACK_BOUNDS()); break; From 86c3316183a79867e3c666d0830f897e16f0f339 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 1 Jul 2025 20:24:04 +0300 Subject: [PATCH 749/989] gh-134280: Disable constant folding for ~ with a boolean argument (GH-134982) This moves the deprecation warning from compile time to run time. --- Lib/test/test_peepholer.py | 1 + .../2025-05-31-19-24-54.gh-issue-134280.NDVbzY.rst | 2 ++ Python/flowgraph.c | 4 ++++ 3 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-31-19-24-54.gh-issue-134280.NDVbzY.rst diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 3d7300e1480256..98629df457460e 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -292,6 +292,7 @@ def test_constant_folding_unaryop(self): ('---x', 'UNARY_NEGATIVE', None, False, None, None), ('~~~x', 'UNARY_INVERT', None, False, None, None), ('+++x', 'CALL_INTRINSIC_1', intrinsic_positive, False, None, None), + ('~True', 'UNARY_INVERT', None, False, None, None), ] for ( diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-31-19-24-54.gh-issue-134280.NDVbzY.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-31-19-24-54.gh-issue-134280.NDVbzY.rst new file mode 100644 index 00000000000000..f822721690975a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-31-19-24-54.gh-issue-134280.NDVbzY.rst @@ -0,0 +1,2 @@ +Disable constant folding for ``~`` with a boolean argument. +This moves the deprecation warning from compile time to runtime. diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 2adc8c84d83974..1cb6f03169e3b5 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1892,6 +1892,10 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg) result = PyNumber_Negative(operand); break; case UNARY_INVERT: + // XXX: This should be removed once the ~bool depreciation expires. + if (PyBool_Check(operand)) { + return NULL; + } result = PyNumber_Invert(operand); break; case UNARY_NOT: { From f41e9c750e6971c165e055374a1014d6afd2d50e Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 1 Jul 2025 13:26:13 -0400 Subject: [PATCH 750/989] gh-134009: Expose `PyMutex_IsLocked` in the public C API (gh-134365) The `PyMutex_IsLocked()` function is useful in assertions for verifying that code maintains certain locking invariants. --- Doc/c-api/init.rst | 12 ++++++++++++ Doc/whatsnew/3.15.rst | 7 +++++++ Include/cpython/lock.h | 11 +++++++++++ Include/internal/pycore_lock.h | 7 ------- .../2025-05-20-17-13-51.gh-issue-134009.CpCmry.rst | 1 + Python/lock.c | 8 ++++++++ 6 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-05-20-17-13-51.gh-issue-134009.CpCmry.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 41fd4ea14ef12f..409539dec17d73 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -2277,6 +2277,18 @@ The C-API provides a basic mutual exclusion lock. .. versionadded:: 3.13 +.. c:function:: int PyMutex_IsLocked(PyMutex *m) + + Returns non-zero if the mutex *m* is currently locked, zero otherwise. + + .. note:: + + This function is intended for use in assertions and debugging only and + should not be used to make concurrency control decisions, as the lock + state may change immediately after the check. + + .. versionadded:: next + .. _python-critical-section-api: Python Critical Section API diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index df636dc9051f0d..f06d4c84ea53d1 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -346,6 +346,13 @@ Porting to Python 3.15 (Contributed by Serhiy Storchaka in :gh:`133595`.) +* Private functions promoted to public C APIs: + + * ``PyMutex_IsLocked()`` : :c:func:`PyMutex_IsLocked` + + The |pythoncapi_compat_project| can be used to get most of these new + functions on Python 3.14 and older. + Deprecated C APIs ----------------- diff --git a/Include/cpython/lock.h b/Include/cpython/lock.h index 8ee03e82f74dfd..63886fca28eae2 100644 --- a/Include/cpython/lock.h +++ b/Include/cpython/lock.h @@ -36,6 +36,9 @@ PyAPI_FUNC(void) PyMutex_Lock(PyMutex *m); // exported function for unlocking the mutex PyAPI_FUNC(void) PyMutex_Unlock(PyMutex *m); +// exported function for checking if the mutex is locked +PyAPI_FUNC(int) PyMutex_IsLocked(PyMutex *m); + // Locks the mutex. // // If the mutex is currently locked, the calling thread will be parked until @@ -61,3 +64,11 @@ _PyMutex_Unlock(PyMutex *m) } } #define PyMutex_Unlock _PyMutex_Unlock + +// Checks if the mutex is currently locked. +static inline int +_PyMutex_IsLocked(PyMutex *m) +{ + return (_Py_atomic_load_uint8(&m->_bits) & _Py_LOCKED) != 0; +} +#define PyMutex_IsLocked _PyMutex_IsLocked diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index bd6011b60ac09a..585120108cf342 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -25,13 +25,6 @@ PyMutex_LockFast(PyMutex *m) return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED); } -// Checks if the mutex is currently locked. -static inline int -PyMutex_IsLocked(PyMutex *m) -{ - return (_Py_atomic_load_uint8(&m->_bits) & _Py_LOCKED) != 0; -} - // Re-initializes the mutex after a fork to the unlocked state. static inline void _PyMutex_at_fork_reinit(PyMutex *m) diff --git a/Misc/NEWS.d/next/C_API/2025-05-20-17-13-51.gh-issue-134009.CpCmry.rst b/Misc/NEWS.d/next/C_API/2025-05-20-17-13-51.gh-issue-134009.CpCmry.rst new file mode 100644 index 00000000000000..f060f09de19bb8 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-05-20-17-13-51.gh-issue-134009.CpCmry.rst @@ -0,0 +1 @@ +Expose :c:func:`PyMutex_IsLocked` as part of the public C API. diff --git a/Python/lock.c b/Python/lock.c index eb09019e0a236d..a49d587a1686d9 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -634,3 +634,11 @@ PyMutex_Unlock(PyMutex *m) Py_FatalError("unlocking mutex that is not locked"); } } + + +#undef PyMutex_IsLocked +int +PyMutex_IsLocked(PyMutex *m) +{ + return _PyMutex_IsLocked(m); +} From b3308973e382d1a3c1627903779fcb89a40d7645 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 2 Jul 2025 14:08:25 +0800 Subject: [PATCH 751/989] gh-136183: Deal with escapes in JIT optimizer's constant evaluator (GH-136184) --- .github/workflows/jit.yml | 4 ++ Lib/test/test_capi/test_opt.py | 15 +++++++ Lib/test/test_generated_cases.py | 47 ++++++++++++++++++++ Python/optimizer_cases.c.h | 6 +-- Tools/cases_generator/generators_common.py | 11 +++-- Tools/cases_generator/optimizer_generator.py | 1 + 6 files changed, 77 insertions(+), 7 deletions(-) diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 116e0c1e945e38..947badff816c09 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -5,6 +5,8 @@ on: - '**jit**' - 'Python/bytecodes.c' - 'Python/optimizer*.c' + - 'Python/executor_cases.c.h' + - 'Python/optimizer_cases.c.h' - '!Python/perf_jit_trampoline.c' - '!**/*.md' - '!**/*.ini' @@ -13,6 +15,8 @@ on: - '**jit**' - 'Python/bytecodes.c' - 'Python/optimizer*.c' + - 'Python/executor_cases.c.h' + - 'Python/optimizer_cases.c.h' - '!Python/perf_jit_trampoline.c' - '!**/*.md' - '!**/*.ini' diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e4c9a463855a69..7be1c9eebb3bf9 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2451,6 +2451,21 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_FLOAT", uops) self.assertNotIn("_GUARD_NOS_FLOAT", uops) + def test_binary_op_constant_evaluate(self): + def testfunc(n): + for _ in range(n): + 2 ** 65 + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # For now... until we constant propagate it away. + self.assertIn("_BINARY_OP", uops) + + def global_identity(x): return x diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index eb01328b6ea946..81d4e39f5be1ee 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -2398,6 +2398,53 @@ def test_replace_opcode_uop_body_copied_in_complex(self): """ self.run_cases_test(input, input2, output) + def test_replace_opcode_escaping_uop_body_copied_in_complex(self): + input = """ + pure op(OP, (foo -- res)) { + if (foo) { + res = ESCAPING_CODE(foo); + } + else { + res = 1; + } + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_known(ctx, foo); + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (foo) { + res_stackref = ESCAPING_CODE(foo); + } + else { + res_stackref = 1; + } + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = sym_new_known(ctx, foo); + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + def test_replace_opocode_uop_reject_array_effects(self): input = """ pure op(OP, (foo[2] -- res)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 82660d02a4e2c4..41402200c1683e 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2723,9 +2723,6 @@ PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); assert(_PyEval_BinaryOps[oparg]); - stack_pointer[-2] = res; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); PyObject *res_o = _PyEval_BinaryOps[oparg](lhs_o, rhs_o); if (res_o == NULL) { JUMP_TO_LABEL(error); @@ -2733,6 +2730,9 @@ res_stackref = PyStackRef_FromPyObjectSteal(res_o); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } bool lhs_int = sym_matches_type(lhs, &PyLong_Type); diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 47de205c0e9120..4c210fbf8d28e9 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -106,8 +106,9 @@ class Emitter: out: CWriter labels: dict[str, Label] _replacers: dict[str, ReplacementFunctionType] + cannot_escape: bool - def __init__(self, out: CWriter, labels: dict[str, Label]): + def __init__(self, out: CWriter, labels: dict[str, Label], cannot_escape: bool = False): self._replacers = { "EXIT_IF": self.exit_if, "DEOPT_IF": self.deopt_if, @@ -127,6 +128,7 @@ def __init__(self, out: CWriter, labels: dict[str, Label]): } self.out = out self.labels = labels + self.cannot_escape = cannot_escape def dispatch( self, @@ -238,7 +240,8 @@ def decref_inputs( next(tkn_iter) self._print_storage("DECREF_INPUTS", storage) try: - storage.close_inputs(self.out) + if not self.cannot_escape: + storage.close_inputs(self.out) except StackError as ex: raise analysis_error(ex.args[0], tkn) except Exception as ex: @@ -476,7 +479,7 @@ def emit_SimpleStmt( reachable = True tkn = stmt.contents[-1] try: - if stmt in uop.properties.escaping_calls: + if stmt in uop.properties.escaping_calls and not self.cannot_escape: escape = uop.properties.escaping_calls[stmt] if escape.kills is not None: self.stackref_kill(escape.kills, storage, True) @@ -513,7 +516,7 @@ def emit_SimpleStmt( self.out.emit(tkn) else: self.out.emit(tkn) - if stmt in uop.properties.escaping_calls: + if stmt in uop.properties.escaping_calls and not self.cannot_escape: self.emit_reload(storage) return reachable, None, storage except StackError as ex: diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index 4556b6d5a74f37..81ae534bddae5c 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -245,6 +245,7 @@ def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, st outp.name: self.emit_stackref_override for outp in self.original_uop.stack.outputs } self._replacers = {**self._replacers, **overrides} + self.cannot_escape = True def emit_to_with_replacement( self, From b19c9da401b9e81078103861f55e0762b93453f0 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Wed, 2 Jul 2025 12:20:00 +0300 Subject: [PATCH 752/989] gh-115119: Defer removal of bundled libmpdec to 3.16 (#133997) Rename libmpdecimal -> libmpdec see https://www.bytereef.org/mpdecimal/doc/libmpdec/index.html --- Doc/deprecations/c-api-pending-removal-in-3.15.rst | 1 - Doc/deprecations/c-api-pending-removal-in-3.16.rst | 4 ++++ Doc/whatsnew/3.12.rst | 2 ++ Doc/whatsnew/3.13.rst | 4 +++- Doc/whatsnew/3.14.rst | 2 ++ configure | 8 ++++---- configure.ac | 4 ++-- 7 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 Doc/deprecations/c-api-pending-removal-in-3.16.rst diff --git a/Doc/deprecations/c-api-pending-removal-in-3.15.rst b/Doc/deprecations/c-api-pending-removal-in-3.15.rst index b87f0a5ecde06f..a3e335ecaf4324 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.15.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.15.rst @@ -1,7 +1,6 @@ Pending removal in Python 3.15 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* The bundled copy of ``libmpdecimal``. * The :c:func:`!PyImport_ImportModuleNoBlock`: Use :c:func:`PyImport_ImportModule` instead. * :c:func:`PyWeakref_GetObject` and :c:func:`PyWeakref_GET_OBJECT`: diff --git a/Doc/deprecations/c-api-pending-removal-in-3.16.rst b/Doc/deprecations/c-api-pending-removal-in-3.16.rst new file mode 100644 index 00000000000000..9453f83799c43d --- /dev/null +++ b/Doc/deprecations/c-api-pending-removal-in-3.16.rst @@ -0,0 +1,4 @@ +Pending removal in Python 3.16 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* The bundled copy of ``libmpdec``. diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index a65f59c0a72315..7cfdc287b7fad7 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -2233,6 +2233,8 @@ Deprecated .. include:: ../deprecations/c-api-pending-removal-in-3.15.rst +.. include:: ../deprecations/c-api-pending-removal-in-3.16.rst + .. include:: ../deprecations/c-api-pending-removal-in-future.rst Removed diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index ef7c36d8539dfd..0a3b3b30e016da 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -2546,6 +2546,8 @@ Deprecated C APIs .. include:: ../deprecations/c-api-pending-removal-in-3.15.rst +.. include:: ../deprecations/c-api-pending-removal-in-3.16.rst + .. include:: ../deprecations/c-api-pending-removal-in-3.18.rst .. include:: ../deprecations/c-api-pending-removal-in-future.rst @@ -2592,7 +2594,7 @@ Build Changes * The :file:`configure` option :option:`--with-system-libmpdec` now defaults to ``yes``. - The bundled copy of ``libmpdecimal`` will be removed in Python 3.15. + The bundled copy of ``libmpdec`` will be removed in Python 3.16. * Python built with :file:`configure` :option:`--with-trace-refs` (tracing references) is now ABI compatible with the Python release build diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 8260f8ddc4e08c..c108a94692dca7 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -3053,6 +3053,8 @@ Deprecated .. include:: ../deprecations/c-api-pending-removal-in-3.15.rst +.. include:: ../deprecations/c-api-pending-removal-in-3.16.rst + .. include:: ../deprecations/c-api-pending-removal-in-3.18.rst .. include:: ../deprecations/c-api-pending-removal-in-future.rst diff --git a/configure b/configure index 3a103c39b62076..9df366697b8546 100755 --- a/configure +++ b/configure @@ -33180,13 +33180,13 @@ printf "%s\n" "$py_cv_module__decimal" >&6; } if test "x$with_system_libmpdec" = xno then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: the bundled copy of libmpdecimal is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library." >&5 -printf "%s\n" "$as_me: WARNING: the bundled copy of libmpdecimal is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library." >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: the bundled copy of libmpdec is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library." >&5 +printf "%s\n" "$as_me: WARNING: the bundled copy of libmpdec is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library." >&2;} fi if test "$with_system_libmpdec" = "yes" && test "$have_mpdec" = "no" then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: no system libmpdecimal found; falling back to pure-Python version for the decimal module" >&5 -printf "%s\n" "$as_me: WARNING: no system libmpdecimal found; falling back to pure-Python version for the decimal module" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: no system libmpdec found; falling back to pure-Python version for the decimal module" >&5 +printf "%s\n" "$as_me: WARNING: no system libmpdec found; falling back to pure-Python version for the decimal module" >&2;} fi diff --git a/configure.ac b/configure.ac index 761d1dbd45cd8e..cb7f2144345b37 100644 --- a/configure.ac +++ b/configure.ac @@ -8129,11 +8129,11 @@ PY_STDLIB_MOD([_decimal], AS_VAR_IF([with_system_libmpdec], [no], [AC_MSG_WARN([m4_normalize([ - the bundled copy of libmpdecimal is scheduled for removal in Python 3.16; + the bundled copy of libmpdec is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library.])])]) AS_IF([test "$with_system_libmpdec" = "yes" && test "$have_mpdec" = "no"], [AC_MSG_WARN([m4_normalize([ - no system libmpdecimal found; falling back to pure-Python version + no system libmpdec found; falling back to pure-Python version for the decimal module])])]) PY_STDLIB_MOD([_dbm], From 51ab66b3d519d3802430b3a31a135d2670e37408 Mon Sep 17 00:00:00 2001 From: Garry Cairns <2401853+garry-cairns@users.noreply.github.com> Date: Wed, 2 Jul 2025 10:51:19 +0100 Subject: [PATCH 753/989] gh-134567: Add the formatter parameter in unittest.TestCase.assertLogs (GH-134570) --- Doc/library/unittest.rst | 9 ++++++++- Doc/whatsnew/3.15.rst | 9 +++++++++ Lib/test/test_unittest/test_case.py | 16 ++++++++++++++++ Lib/unittest/_log.py | 5 +++-- Lib/unittest/case.py | 6 ++++-- ...025-05-23-09-19-52.gh-issue-134567.hwEIMb.rst | 2 ++ 6 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2025-05-23-09-19-52.gh-issue-134567.hwEIMb.rst diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index dcdda1719bf593..d526e835caa18c 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -1131,7 +1131,7 @@ Test cases .. versionchanged:: 3.3 Added the *msg* keyword argument when used as a context manager. - .. method:: assertLogs(logger=None, level=None) + .. method:: assertLogs(logger=None, level=None, formatter=None) A context manager to test that at least one message is logged on the *logger* or one of its children, with at least the given @@ -1146,6 +1146,10 @@ Test cases its string equivalent (for example either ``"ERROR"`` or :const:`logging.ERROR`). The default is :const:`logging.INFO`. + If given, *formatter* should be a :class:`logging.Formatter` object. + The default is a formatter with format string + ``"%(levelname)s:%(name)s:%(message)s"`` + The test passes if at least one message emitted inside the ``with`` block matches the *logger* and *level* conditions, otherwise it fails. @@ -1173,6 +1177,9 @@ Test cases .. versionadded:: 3.4 + .. versionchanged:: next + Now accepts a *formatter* to control how messages are formatted. + .. method:: assertNoLogs(logger=None, level=None) A context manager to test that no messages are logged on diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index f06d4c84ea53d1..706a816f888b30 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -291,6 +291,15 @@ typing (Contributed by Bénédikt Tran in :gh:`133823`.) +unittest +-------- + +* Lets users specify formatter in TestCase.assertLogs. + :func:`unittest.TestCase.assertLogs` will now accept a formatter + to control how messages are formatted. + (Contributed by Garry Cairns in :gh:`134567`.) + + wave ---- diff --git a/Lib/test/test_unittest/test_case.py b/Lib/test/test_unittest/test_case.py index d66cab146af246..cf10e956bf2bdc 100644 --- a/Lib/test/test_unittest/test_case.py +++ b/Lib/test/test_unittest/test_case.py @@ -1920,6 +1920,22 @@ def testAssertLogsUnexpectedException(self): with self.assertLogs(): raise ZeroDivisionError("Unexpected") + def testAssertLogsWithFormatter(self): + # Check alternative formats will be respected + format = "[No.1: the larch] %(levelname)s:%(name)s:%(message)s" + formatter = logging.Formatter(format) + with self.assertNoStderr(): + with self.assertLogs() as cm: + log_foo.info("1") + log_foobar.debug("2") + self.assertEqual(cm.output, ["INFO:foo:1"]) + self.assertLogRecords(cm.records, [{'name': 'foo'}]) + with self.assertLogs(formatter=formatter) as cm: + log_foo.info("1") + log_foobar.debug("2") + self.assertEqual(cm.output, ["[No.1: the larch] INFO:foo:1"]) + self.assertLogRecords(cm.records, [{'name': 'foo'}]) + def testAssertNoLogsDefault(self): with self.assertRaises(self.failureException) as cm: with self.assertNoLogs(): diff --git a/Lib/unittest/_log.py b/Lib/unittest/_log.py index 94868e5bb95eb3..3d69385ea243e7 100644 --- a/Lib/unittest/_log.py +++ b/Lib/unittest/_log.py @@ -30,7 +30,7 @@ class _AssertLogsContext(_BaseTestCaseContext): LOGGING_FORMAT = "%(levelname)s:%(name)s:%(message)s" - def __init__(self, test_case, logger_name, level, no_logs): + def __init__(self, test_case, logger_name, level, no_logs, formatter=None): _BaseTestCaseContext.__init__(self, test_case) self.logger_name = logger_name if level: @@ -39,13 +39,14 @@ def __init__(self, test_case, logger_name, level, no_logs): self.level = logging.INFO self.msg = None self.no_logs = no_logs + self.formatter = formatter def __enter__(self): if isinstance(self.logger_name, logging.Logger): logger = self.logger = self.logger_name else: logger = self.logger = logging.getLogger(self.logger_name) - formatter = logging.Formatter(self.LOGGING_FORMAT) + formatter = self.formatter or logging.Formatter(self.LOGGING_FORMAT) handler = _CapturingHandler() handler.setLevel(self.level) handler.setFormatter(formatter) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index db10de68e4ac73..eba50839cd33ae 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -849,7 +849,7 @@ def _assertNotWarns(self, expected_warning, *args, **kwargs): context = _AssertNotWarnsContext(expected_warning, self) return context.handle('_assertNotWarns', args, kwargs) - def assertLogs(self, logger=None, level=None): + def assertLogs(self, logger=None, level=None, formatter=None): """Fail unless a log message of level *level* or higher is emitted on *logger_name* or its children. If omitted, *level* defaults to INFO and *logger* defaults to the root logger. @@ -861,6 +861,8 @@ def assertLogs(self, logger=None, level=None): `records` attribute will be a list of the corresponding LogRecord objects. + Optionally supply `formatter` to control how messages are formatted. + Example:: with self.assertLogs('foo', level='INFO') as cm: @@ -871,7 +873,7 @@ def assertLogs(self, logger=None, level=None): """ # Lazy import to avoid importing logging if it is not needed. from ._log import _AssertLogsContext - return _AssertLogsContext(self, logger, level, no_logs=False) + return _AssertLogsContext(self, logger, level, no_logs=False, formatter=formatter) def assertNoLogs(self, logger=None, level=None): """ Fail unless no log messages of level *level* or higher are emitted diff --git a/Misc/NEWS.d/next/Tests/2025-05-23-09-19-52.gh-issue-134567.hwEIMb.rst b/Misc/NEWS.d/next/Tests/2025-05-23-09-19-52.gh-issue-134567.hwEIMb.rst new file mode 100644 index 00000000000000..42e4a01c0ccb4b --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-05-23-09-19-52.gh-issue-134567.hwEIMb.rst @@ -0,0 +1,2 @@ +Expose log formatter to users in TestCase.assertLogs. +:func:`unittest.TestCase.assertLogs` will now optionally accept a formatter that will be used to format the strings in output if provided. From ab7196a2f535f181abd50ccbf1b3b224bbd5eee0 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 2 Jul 2025 14:32:41 +0300 Subject: [PATCH 754/989] gh-136193: Improve `TypeError` msg when comparing two `SimpleNamespace`s (#136195) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_types.py | 19 +++++++++++++++++++ ...-07-02-10-48-21.gh-issue-136193.xfvras.rst | 2 ++ Objects/namespaceobject.c | 8 ++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-02-10-48-21.gh-issue-136193.xfvras.rst diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index fc26e71ffcb67b..116f55051ddb5a 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -21,6 +21,7 @@ import unittest.mock import weakref import typing +import re c_types = import_fresh_module('types', fresh=['_types']) py_types = import_fresh_module('types', blocked=['_types']) @@ -2009,6 +2010,24 @@ def test_equal(self): self.assertEqual(ns1, ns2) self.assertNotEqual(ns2, types.SimpleNamespace()) + def test_richcompare_unsupported(self): + ns1 = types.SimpleNamespace(x=1) + ns2 = types.SimpleNamespace(y=2) + + msg = re.escape( + "not supported between instances of " + "'types.SimpleNamespace' and 'types.SimpleNamespace'" + ) + + with self.assertRaisesRegex(TypeError, msg): + ns1 > ns2 + with self.assertRaisesRegex(TypeError, msg): + ns1 >= ns2 + with self.assertRaisesRegex(TypeError, msg): + ns1 < ns2 + with self.assertRaisesRegex(TypeError, msg): + ns1 <= ns2 + def test_nested(self): ns1 = types.SimpleNamespace(a=1, b=2) ns2 = types.SimpleNamespace() diff --git a/Misc/NEWS.d/next/Library/2025-07-02-10-48-21.gh-issue-136193.xfvras.rst b/Misc/NEWS.d/next/Library/2025-07-02-10-48-21.gh-issue-136193.xfvras.rst new file mode 100644 index 00000000000000..801115202d0c95 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-02-10-48-21.gh-issue-136193.xfvras.rst @@ -0,0 +1,2 @@ +Improve :exc:`TypeError` error message, when richcomparing two +:class:`types.SimpleNamespace` objects. diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index 0fc2bcea4cb06e..201cb8a7df8da1 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -194,10 +194,14 @@ namespace_clear(PyObject *op) static PyObject * namespace_richcompare(PyObject *self, PyObject *other, int op) { - if (PyObject_TypeCheck(self, &_PyNamespace_Type) && - PyObject_TypeCheck(other, &_PyNamespace_Type)) + if ( + (op == Py_EQ || op == Py_NE) && + PyObject_TypeCheck(self, &_PyNamespace_Type) && + PyObject_TypeCheck(other, &_PyNamespace_Type) + ) { return PyObject_RichCompare(((_PyNamespaceObject *)self)->ns_dict, ((_PyNamespaceObject *)other)->ns_dict, op); + } Py_RETURN_NOTIMPLEMENTED; } From fa43a1e0f8caf00a15898fa719e31767c866bd90 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 2 Jul 2025 15:40:41 +0200 Subject: [PATCH 755/989] gh-127705: Move Py_INCREF_MORTAL() to the internal C API (GH-136178) Rename Py_INCREF_MORTAL() to _Py_INCREF_MORTAL() and move it to pycore_object.h internal header. --- Include/internal/pycore_object.h | 14 ++++++++++++++ Include/internal/pycore_stackref.h | 8 ++++---- Include/refcount.h | 14 -------------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index d0fe1f63250eaa..8fe9875fae0687 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -1033,6 +1033,20 @@ enum _PyAnnotateFormat { int _PyObject_SetDict(PyObject *obj, PyObject *value); +#ifndef Py_GIL_DISABLED +static inline Py_ALWAYS_INLINE void _Py_INCREF_MORTAL(PyObject *op) +{ + assert(!_Py_IsStaticImmortal(op)); + op->ob_refcnt++; + _Py_INCREF_STAT_INC(); +#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) + if (!_Py_IsImmortal(op)) { + _Py_INCREF_IncRefTotal(); + } +#endif +} +#endif + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 10e7199269eb40..48a40a4c3479c7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -626,7 +626,7 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) if (_Py_IsImmortal(obj)) { return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; } - Py_INCREF_MORTAL(obj); + _Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; PyStackRef_CheckValid(ref); return ref; @@ -637,7 +637,7 @@ static inline _PyStackRef _PyStackRef_FromPyObjectNewMortal(PyObject *obj) { assert(obj != NULL); - Py_INCREF_MORTAL(obj); + _Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; PyStackRef_CheckValid(ref); return ref; @@ -654,14 +654,14 @@ PyStackRef_FromPyObjectBorrow(PyObject *obj) /* WARNING: This macro evaluates its argument more than once */ #ifdef _WIN32 #define PyStackRef_DUP(REF) \ - (PyStackRef_RefcountOnObject(REF) ? (Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) + (PyStackRef_RefcountOnObject(REF) ? (_Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) #else static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) { assert(!PyStackRef_IsNull(ref)); if (PyStackRef_RefcountOnObject(ref)) { - Py_INCREF_MORTAL(BITS_TO_PTR(ref)); + _Py_INCREF_MORTAL(BITS_TO_PTR(ref)); } return ref; } diff --git a/Include/refcount.h b/Include/refcount.h index 65a4e63a8b0952..457972b6dcfd9f 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -244,20 +244,6 @@ PyAPI_FUNC(void) Py_DecRef(PyObject *); PyAPI_FUNC(void) _Py_IncRef(PyObject *); PyAPI_FUNC(void) _Py_DecRef(PyObject *); -#ifndef Py_GIL_DISABLED -static inline Py_ALWAYS_INLINE void Py_INCREF_MORTAL(PyObject *op) -{ - assert(!_Py_IsStaticImmortal(op)); - op->ob_refcnt++; - _Py_INCREF_STAT_INC(); -#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) - if (!_Py_IsImmortal(op)) { - _Py_INCREF_IncRefTotal(); - } -#endif -} -#endif - static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) { #if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) From 41a9b46ad4787b58b51227929fd1876286448523 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 2 Jul 2025 19:05:28 +0300 Subject: [PATCH 756/989] gh-136203: Improve `TypeError` msg when comparing two `MappingProxyType`s (#136204) Co-authored-by: Victor Stinner --- Lib/test/test_types.py | 21 +++++++++++++++++++ ...-07-02-15-18-41.gh-issue-136203.Y934sC.rst | 2 ++ Objects/descrobject.c | 5 ++++- 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-02-15-18-41.gh-issue-136203.Y934sC.rst diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 116f55051ddb5a..fccdcc975e6c43 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -1376,6 +1376,27 @@ def __hash__(self): view = self.mappingproxy(mapping) self.assertEqual(hash(view), hash(mapping)) + def test_richcompare(self): + mp1 = self.mappingproxy({'a': 1}) + mp1_2 = self.mappingproxy({'a': 1}) + mp2 = self.mappingproxy({'a': 2}) + + self.assertTrue(mp1 == mp1_2) + self.assertFalse(mp1 != mp1_2) + self.assertFalse(mp1 == mp2) + self.assertTrue(mp1 != mp2) + + msg = "not supported between instances of 'mappingproxy' and 'mappingproxy'" + + with self.assertRaisesRegex(TypeError, msg): + mp1 > mp2 + with self.assertRaisesRegex(TypeError, msg): + mp1 < mp1_2 + with self.assertRaisesRegex(TypeError, msg): + mp2 >= mp2 + with self.assertRaisesRegex(TypeError, msg): + mp1_2 <= mp1 + class ClassCreationTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-02-15-18-41.gh-issue-136203.Y934sC.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-02-15-18-41.gh-issue-136203.Y934sC.rst new file mode 100644 index 00000000000000..5a622bab8b893d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-02-15-18-41.gh-issue-136203.Y934sC.rst @@ -0,0 +1,2 @@ +Improve :exc:`TypeError` error message, when richcomparing two +:class:`types.MappingProxyType` objects. diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 10c465b95ac192..d3d17e92b6d1e8 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -1233,7 +1233,10 @@ static PyObject * mappingproxy_richcompare(PyObject *self, PyObject *w, int op) { mappingproxyobject *v = (mappingproxyobject *)self; - return PyObject_RichCompare(v->mapping, w, op); + if (op == Py_EQ || op == Py_NE) { + return PyObject_RichCompare(v->mapping, w, op); + } + Py_RETURN_NOTIMPLEMENTED; } static int From 7afe1adb0089d0f2df2add79bbc2300c2f236d90 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 2 Jul 2025 20:43:43 +0300 Subject: [PATCH 757/989] Replace `capi-sig` mailing list with `discuss.python.org` (#136211) --- Doc/howto/isolating-extensions.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index fbc426ba1d7d9a..7da6dc8a39795e 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -626,8 +626,7 @@ Open Issues Several issues around per-module state and heap types are still open. -Discussions about improving the situation are best held on the `capi-sig -mailing list `__. +Discussions about improving the situation are best held on the `discuss forum under c-api tag `__. Per-Class Scope From 135ba86212ad116af3cc4a6ba656bc8cfaab131a Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Thu, 3 Jul 2025 08:51:31 +0800 Subject: [PATCH 758/989] gh-136135: Doc: Fix some broken links (GH-136137) --- Doc/howto/functional.rst | 2 +- Doc/installing/index.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index 78e56e0c64fb10..053558e389030a 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -1217,7 +1217,7 @@ flow inside a program. The book uses Scheme for its examples, but many of the design approaches described in these chapters are applicable to functional-style Python code. -https://www.defmacro.org/ramblings/fp.html: A general introduction to functional +https://defmacro.org/2006/06/19/fp.html: A general introduction to functional programming that uses Java examples and has a lengthy historical introduction. https://en.wikipedia.org/wiki/Functional_programming: General Wikipedia entry diff --git a/Doc/installing/index.rst b/Doc/installing/index.rst index a46c1caefe4d8a..3a485a43a5a751 100644 --- a/Doc/installing/index.rst +++ b/Doc/installing/index.rst @@ -188,7 +188,7 @@ switch:: Once the Development & Deployment part of PPUG is fleshed out, some of those sections should be linked from new questions here (most notably, we should have a question about avoiding depending on PyPI that links to - https://packaging.python.org/en/latest/mirrors/) + https://packaging.python.org/en/latest/guides/index-mirrors-and-caches/) Common installation issues From 9084b151567d02936ea1374961809b69b4cd883d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 3 Jul 2025 07:08:39 +0300 Subject: [PATCH 759/989] gh-135836: Fix `IndexError` in `asyncio.create_connection()` (#135875) --- Lib/asyncio/base_events.py | 65 ++++++++++--------- Lib/test/test_asyncio/test_base_events.py | 29 +++++++++ ...-06-24-10-52-35.gh-issue-135836.s37351.rst | 3 + 3 files changed, 67 insertions(+), 30 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-24-10-52-35.gh-issue-135836.s37351.rst diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index 2ff9e4017bb245..520d4b398545bf 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -1016,38 +1016,43 @@ async def _connect_sock(self, exceptions, addr_info, local_addr_infos=None): family, type_, proto, _, address = addr_info sock = None try: - sock = socket.socket(family=family, type=type_, proto=proto) - sock.setblocking(False) - if local_addr_infos is not None: - for lfamily, _, _, _, laddr in local_addr_infos: - # skip local addresses of different family - if lfamily != family: - continue - try: - sock.bind(laddr) - break - except OSError as exc: - msg = ( - f'error while attempting to bind on ' - f'address {laddr!r}: {str(exc).lower()}' - ) - exc = OSError(exc.errno, msg) - my_exceptions.append(exc) - else: # all bind attempts failed - if my_exceptions: - raise my_exceptions.pop() - else: - raise OSError(f"no matching local address with {family=} found") - await self.sock_connect(sock, address) - return sock - except OSError as exc: - my_exceptions.append(exc) - if sock is not None: - sock.close() - raise + try: + sock = socket.socket(family=family, type=type_, proto=proto) + sock.setblocking(False) + if local_addr_infos is not None: + for lfamily, _, _, _, laddr in local_addr_infos: + # skip local addresses of different family + if lfamily != family: + continue + try: + sock.bind(laddr) + break + except OSError as exc: + msg = ( + f'error while attempting to bind on ' + f'address {laddr!r}: {str(exc).lower()}' + ) + exc = OSError(exc.errno, msg) + my_exceptions.append(exc) + else: # all bind attempts failed + if my_exceptions: + raise my_exceptions.pop() + else: + raise OSError(f"no matching local address with {family=} found") + await self.sock_connect(sock, address) + return sock + except OSError as exc: + my_exceptions.append(exc) + raise except: if sock is not None: - sock.close() + try: + sock.close() + except OSError: + # An error when closing a newly created socket is + # not important, but it can overwrite more important + # non-OSError error. So ignore it. + pass raise finally: exceptions = my_exceptions = None diff --git a/Lib/test/test_asyncio/test_base_events.py b/Lib/test/test_asyncio/test_base_events.py index bb9f366fc411aa..12179eb0c9e274 100644 --- a/Lib/test/test_asyncio/test_base_events.py +++ b/Lib/test/test_asyncio/test_base_events.py @@ -24,6 +24,10 @@ MOCK_ANY = mock.ANY +class CustomError(Exception): + pass + + def tearDownModule(): asyncio._set_event_loop_policy(None) @@ -1326,6 +1330,31 @@ def getaddrinfo_task(*args, **kwds): self.assertEqual(len(cm.exception.exceptions), 1) self.assertIsInstance(cm.exception.exceptions[0], OSError) + @patch_socket + def test_create_connection_connect_non_os_err_close_err(self, m_socket): + # Test the case when sock_connect() raises non-OSError exception + # and sock.close() raises OSError. + async def getaddrinfo(*args, **kw): + return [(2, 1, 6, '', ('107.6.106.82', 80))] + + def getaddrinfo_task(*args, **kwds): + return self.loop.create_task(getaddrinfo(*args, **kwds)) + + self.loop.getaddrinfo = getaddrinfo_task + self.loop.sock_connect = mock.Mock() + self.loop.sock_connect.side_effect = CustomError + sock = mock.Mock() + m_socket.socket.return_value = sock + sock.close.side_effect = OSError + + coro = self.loop.create_connection(MyProto, 'example.com', 80) + self.assertRaises( + CustomError, self.loop.run_until_complete, coro) + + coro = self.loop.create_connection(MyProto, 'example.com', 80, all_errors=True) + self.assertRaises( + CustomError, self.loop.run_until_complete, coro) + def test_create_connection_multiple(self): async def getaddrinfo(*args, **kw): return [(2, 1, 6, '', ('0.0.0.1', 80)), diff --git a/Misc/NEWS.d/next/Library/2025-06-24-10-52-35.gh-issue-135836.s37351.rst b/Misc/NEWS.d/next/Library/2025-06-24-10-52-35.gh-issue-135836.s37351.rst new file mode 100644 index 00000000000000..1d1e7a2298c085 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-24-10-52-35.gh-issue-135836.s37351.rst @@ -0,0 +1,3 @@ +Fix :exc:`IndexError` in :meth:`asyncio.loop.create_connection` that could +occur when non-\ :exc:`OSError` exception is raised during connection and +socket's ``close()`` raises :exc:`!OSError`. From e0245c789f54b63d461717a91eec8ffccbe18966 Mon Sep 17 00:00:00 2001 From: Kira Date: Thu, 3 Jul 2025 03:48:47 -0400 Subject: [PATCH 760/989] gh-135640: Adds more type checking to ElementTree (GH-135643) --- Lib/test/test_xml_etree.py | 27 +++++++++++++++++++ Lib/xml/etree/ElementTree.py | 10 +++++-- ...-06-22-02-16-17.gh-issue-135640.FXyFL6.rst | 4 +++ 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-22-02-16-17.gh-issue-135640.FXyFL6.rst diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 38be2cd437f200..bf6d5074fdebd8 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -218,6 +218,33 @@ class ElementTreeTest(unittest.TestCase): def serialize_check(self, elem, expected): self.assertEqual(serialize(elem), expected) + def test_constructor(self): + # Test constructor behavior. + + with self.assertRaises(TypeError): + tree = ET.ElementTree("") + with self.assertRaises(TypeError): + tree = ET.ElementTree(ET.ElementTree()) + + def test_setroot(self): + # Test _setroot behavior. + + tree = ET.ElementTree() + element = ET.Element("tag") + tree._setroot(element) + self.assertEqual(tree.getroot().tag, "tag") + self.assertEqual(tree.getroot(), element) + + # Test behavior with an invalid root element + + tree = ET.ElementTree() + with self.assertRaises(TypeError): + tree._setroot("") + with self.assertRaises(TypeError): + tree._setroot(ET.ElementTree()) + with self.assertRaises(TypeError): + tree._setroot(None) + def test_interface(self): # Test element tree interface. diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 44ab5d18624e73..dafe5b1b8a0c3f 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -527,7 +527,9 @@ class ElementTree: """ def __init__(self, element=None, file=None): - # assert element is None or iselement(element) + if element is not None and not iselement(element): + raise TypeError('expected an Element, not %s' % + type(element).__name__) self._root = element # first node if file: self.parse(file) @@ -543,7 +545,9 @@ def _setroot(self, element): with the given element. Use with care! """ - # assert iselement(element) + if not iselement(element): + raise TypeError('expected an Element, not %s' + % type(element).__name__) self._root = element def parse(self, source, parser=None): @@ -709,6 +713,8 @@ def write(self, file_or_filename, of start/end tags """ + if self._root is None: + raise TypeError('ElementTree not initialized') if not method: method = "xml" elif method not in _serialize: diff --git a/Misc/NEWS.d/next/Library/2025-06-22-02-16-17.gh-issue-135640.FXyFL6.rst b/Misc/NEWS.d/next/Library/2025-06-22-02-16-17.gh-issue-135640.FXyFL6.rst new file mode 100644 index 00000000000000..ad217b57b4b510 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-22-02-16-17.gh-issue-135640.FXyFL6.rst @@ -0,0 +1,4 @@ +Address bug where it was possible to call +:func:`xml.etree.ElementTree.ElementTree.write` on an ElementTree object with +an invalid root element. This behavior blanked the file passed to ``write`` +if it already existed. From 8f8bdf251a5f79d15ac2b1a6d19860033bf50c79 Mon Sep 17 00:00:00 2001 From: Alper Date: Thu, 3 Jul 2025 02:51:41 -0700 Subject: [PATCH 761/989] Fix comments for `heapq.siftup_max` (#135359) Co-authored-by: mpage --- Modules/_heapqmodule.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 560fe431fcac99..05d01acd77109b 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -463,11 +463,11 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) return -1; } - /* Bubble up the smaller child until hitting a leaf. */ + /* Bubble up the larger child until hitting a leaf. */ arr = _PyList_ITEMS(heap); limit = endpos >> 1; /* smallest pos that has no child */ while (pos < limit) { - /* Set childpos to index of smaller child. */ + /* Set childpos to index of larger child. */ childpos = 2*pos + 1; /* leftmost child position */ if (childpos + 1 < endpos) { PyObject* a = arr[childpos + 1]; @@ -487,7 +487,7 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) return -1; } } - /* Move the smaller child up. */ + /* Move the larger child up. */ tmp1 = arr[childpos]; tmp2 = arr[pos]; FT_ATOMIC_STORE_PTR_RELAXED(arr[childpos], tmp2); From 8dc3383abea72ee3deafec60818aeb817d8fec09 Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Thu, 3 Jul 2025 10:53:37 +0100 Subject: [PATCH 762/989] gh-135069: Fix exception message in encodings.idna module (#135071) --- Lib/encodings/idna.py | 2 +- .../Library/2025-06-03-12-59-17.gh-issue-135069.xop30V.rst | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-03-12-59-17.gh-issue-135069.xop30V.rst diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index 60a8d5eb227f82..0c90b4c9fe18fa 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -316,7 +316,7 @@ def _buffer_encode(self, input, errors, final): class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): if errors != 'strict': - raise UnicodeError("Unsupported error handling: {errors}") + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return ("", 0) diff --git a/Misc/NEWS.d/next/Library/2025-06-03-12-59-17.gh-issue-135069.xop30V.rst b/Misc/NEWS.d/next/Library/2025-06-03-12-59-17.gh-issue-135069.xop30V.rst new file mode 100644 index 00000000000000..1affb5e2aadab7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-03-12-59-17.gh-issue-135069.xop30V.rst @@ -0,0 +1,3 @@ +Fix the "Invalid error handling" exception in +:class:`!encodings.idna.IncrementalDecoder` to correctly replace the +'errors' parameter. From 5c984ae35e30f0533e6cc727d23cc158decf001c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 3 Jul 2025 13:39:14 +0300 Subject: [PATCH 763/989] gh-133740: Fix regression in locale.nl_langinfo(ALT_DIGITS) (GH-136237) There is no need to temporary switch locale for items ALT_DIGITS and ERA if the nl_langinfo() result is empty (most locales). --- Modules/_localemodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index c1f56008b7c49e..41e6d48b1dbd9b 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -692,7 +692,7 @@ _locale_nl_langinfo_impl(PyObject *module, int item) result = result != NULL ? result : ""; char *oldloc = NULL; if (langinfo_constants[i].category != LC_CTYPE - && ( + && *result && ( #ifdef __GLIBC__ // gh-133740: Always change the locale for ALT_DIGITS and ERA # ifdef ALT_DIGITS From c113a8e5236b31217d82ce289d3df6ec9e8411cd Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Thu, 3 Jul 2025 13:57:31 +0300 Subject: [PATCH 764/989] gh-130664: Treat '0' fill character with align '=' as zero-padding for Fraction's (GH-131067) --- Lib/fractions.py | 3 +++ Lib/test/test_fractions.py | 7 ++----- .../Library/2025-03-11-05-24-14.gh-issue-130664.g0yNMm.rst | 4 ++++ 3 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-03-11-05-24-14.gh-issue-130664.g0yNMm.rst diff --git a/Lib/fractions.py b/Lib/fractions.py index cb05ae7c2003f0..a8c670685228d3 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -504,6 +504,9 @@ def _format_float_style(self, match): trim_point = not alternate_form exponent_indicator = "E" if presentation_type in "EFG" else "e" + if align == '=' and fill == '0': + zeropad = True + # Round to get the digits we need, figure out where to place the point, # and decide whether to use scientific notation. 'point_pos' is the # relative to the _end_ of the digit string: that is, it's the number diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index d1d2739856c092..1875a2f529c957 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -1480,11 +1480,8 @@ def test_format_f_presentation_type(self): (F('-1234.5678'), '08,.0f', '-001,235'), (F('-1234.5678'), '09,.0f', '-0,001,235'), # Corner-case - zero-padding specified through fill and align - # instead of the zero-pad character - in this case, treat '0' as a - # regular fill character and don't attempt to insert commas into - # the filled portion. This differs from the int and float - # behaviour. - (F('1234.5678'), '0=12,.2f', '00001,234.57'), + # instead of the zero-pad character. + (F('1234.5678'), '0=12,.2f', '0,001,234.57'), # Corner case where it's not clear whether the '0' indicates zero # padding or gives the minimum width, but there's still an obvious # answer to give. We want this to work in case the minimum width diff --git a/Misc/NEWS.d/next/Library/2025-03-11-05-24-14.gh-issue-130664.g0yNMm.rst b/Misc/NEWS.d/next/Library/2025-03-11-05-24-14.gh-issue-130664.g0yNMm.rst new file mode 100644 index 00000000000000..dbe783a2a99e0a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-11-05-24-14.gh-issue-130664.g0yNMm.rst @@ -0,0 +1,4 @@ +Handle corner-case for :class:`~fractions.Fraction`'s formatting: treat +zero-padding (preceding the width field by a zero (``'0'``) character) as an +equivalent to a fill character of ``'0'`` with an alignment type of ``'='``, +just as in case of :class:`float`'s. From a525ba514a5ab9df8add156f08b4c137da4783fd Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 3 Jul 2025 15:14:14 +0300 Subject: [PATCH 765/989] `Python/gc.c`: Refer to `InternalDocs` instead of devguide. (#136243) --- Python/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index 7b0e6d6e803504..02135a3fb442d6 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1,6 +1,6 @@ // This implements the reference cycle garbage collector. // The Python module interface to the collector is in gcmodule.c. -// See https://devguide.python.org/internals/garbage-collector/ +// See InternalDocs/garbage_collector.md for more infromation. #include "Python.h" #include "pycore_ceval.h" // _Py_set_eval_breaker_bit() From b2e498ac26ba4720fa8a6a466aa8416ab69754d7 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Thu, 3 Jul 2025 15:44:59 +0300 Subject: [PATCH 766/989] gh-115119: Recommend upstream libmpdec in build requirements (#136205) Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Co-authored-by: Victor Stinner --- Doc/using/configure.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index df81a330549147..a26d48d7d0a345 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -29,6 +29,9 @@ Features and minimum versions required to build CPython: * Tcl/Tk 8.5.12 for the :mod:`tkinter` module. +* `libmpdec `_ 2.5.0 + for the :mod:`decimal` module. + * Autoconf 2.72 and aclocal 1.16.5 are required to regenerate the :file:`configure` script. From da79ac9d26860db62500762c95b7ae534638f9a7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 3 Jul 2025 14:51:44 +0200 Subject: [PATCH 767/989] gh-135075: Make PyObject_SetAttr() fail with NULL value and exception (#136180) Make PyObject_SetAttr() and PyObject_SetAttrString() fail if called with NULL value and an exception set. --- Doc/c-api/object.rst | 15 ++++++++ Lib/test/test_capi/test_abstract.py | 25 ++++++++++++ ...-07-01-16-22-39.gh-issue-135075.angu3J.rst | 2 + Modules/_testcapi/abstract.c | 38 +++++++++++++++++++ Objects/object.c | 38 ++++++++++++++----- 5 files changed, 109 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-07-01-16-22-39.gh-issue-135075.angu3J.rst diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 0fd159f1eb87f8..21fa1491b33d86 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -197,6 +197,13 @@ Object Protocol in favour of using :c:func:`PyObject_DelAttr`, but there are currently no plans to remove it. + The function must not be called with ``NULL`` *v* and an an exception set. + This case can arise from forgetting ``NULL`` checks and would delete the + attribute. + + .. versionchanged:: next + Must not be called with NULL value if an exception is set. + .. c:function:: int PyObject_SetAttrString(PyObject *o, const char *attr_name, PyObject *v) @@ -207,6 +214,10 @@ Object Protocol If *v* is ``NULL``, the attribute is deleted, but this feature is deprecated in favour of using :c:func:`PyObject_DelAttrString`. + The function must not be called with ``NULL`` *v* and an an exception set. + This case can arise from forgetting ``NULL`` checks and would delete the + attribute. + The number of different attribute names passed to this function should be kept small, usually by using a statically allocated string as *attr_name*. @@ -215,6 +226,10 @@ Object Protocol For more details, see :c:func:`PyUnicode_InternFromString`, which may be used internally to create a key object. + .. versionchanged:: next + Must not be called with NULL value if an exception is set. + + .. c:function:: int PyObject_GenericSetAttr(PyObject *o, PyObject *name, PyObject *value) Generic attribute setter and deleter function that is meant diff --git a/Lib/test/test_capi/test_abstract.py b/Lib/test/test_capi/test_abstract.py index 7d548ae87c0fa6..3a2ed9f5db82f0 100644 --- a/Lib/test/test_capi/test_abstract.py +++ b/Lib/test/test_capi/test_abstract.py @@ -1077,6 +1077,31 @@ def test_iter_nextitem(self): with self.assertRaisesRegex(TypeError, regex): PyIter_NextItem(10) + def test_object_setattr_null_exc(self): + class Obj: + pass + obj = Obj() + obj.attr = 123 + + exc = ValueError("error") + with self.assertRaises(SystemError) as cm: + _testcapi.object_setattr_null_exc(obj, 'attr', exc) + self.assertIs(cm.exception.__context__, exc) + self.assertIsNone(cm.exception.__cause__) + self.assertHasAttr(obj, 'attr') + + with self.assertRaises(SystemError) as cm: + _testcapi.object_setattrstring_null_exc(obj, 'attr', exc) + self.assertIs(cm.exception.__context__, exc) + self.assertIsNone(cm.exception.__cause__) + self.assertHasAttr(obj, 'attr') + + with self.assertRaises(SystemError) as cm: + # undecodable name + _testcapi.object_setattrstring_null_exc(obj, b'\xff', exc) + self.assertIs(cm.exception.__context__, exc) + self.assertIsNone(cm.exception.__cause__) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C_API/2025-07-01-16-22-39.gh-issue-135075.angu3J.rst b/Misc/NEWS.d/next/C_API/2025-07-01-16-22-39.gh-issue-135075.angu3J.rst new file mode 100644 index 00000000000000..88e0fa65f45dd0 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-07-01-16-22-39.gh-issue-135075.angu3J.rst @@ -0,0 +1,2 @@ +Make :c:func:`PyObject_SetAttr` and :c:func:`PyObject_SetAttrString` fail if +called with ``NULL`` value and an exception set. Patch by Victor Stinner. diff --git a/Modules/_testcapi/abstract.c b/Modules/_testcapi/abstract.c index d4045afd515909..c1f769456accef 100644 --- a/Modules/_testcapi/abstract.c +++ b/Modules/_testcapi/abstract.c @@ -178,6 +178,42 @@ sequence_fast_get_item(PyObject *self, PyObject *args) } +static PyObject * +object_setattr_null_exc(PyObject *self, PyObject *args) +{ + PyObject *obj, *name, *exc; + if (!PyArg_ParseTuple(args, "OOO", &obj, &name, &exc)) { + return NULL; + } + + PyErr_SetObject((PyObject*)Py_TYPE(exc), exc); + if (PyObject_SetAttr(obj, name, NULL) < 0) { + return NULL; + } + assert(PyErr_Occurred()); + return NULL; +} + + +static PyObject * +object_setattrstring_null_exc(PyObject *self, PyObject *args) +{ + PyObject *obj, *exc; + const char *name; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "Oz#O", &obj, &name, &size, &exc)) { + return NULL; + } + + PyErr_SetObject((PyObject*)Py_TYPE(exc), exc); + if (PyObject_SetAttrString(obj, name, NULL) < 0) { + return NULL; + } + assert(PyErr_Occurred()); + return NULL; +} + + static PyMethodDef test_methods[] = { {"object_getoptionalattr", object_getoptionalattr, METH_VARARGS}, {"object_getoptionalattrstring", object_getoptionalattrstring, METH_VARARGS}, @@ -191,6 +227,8 @@ static PyMethodDef test_methods[] = { {"sequence_fast_get_size", sequence_fast_get_size, METH_O}, {"sequence_fast_get_item", sequence_fast_get_item, METH_VARARGS}, + {"object_setattr_null_exc", object_setattr_null_exc, METH_VARARGS}, + {"object_setattrstring_null_exc", object_setattrstring_null_exc, METH_VARARGS}, {NULL}, }; diff --git a/Objects/object.c b/Objects/object.c index 1223983753ac46..3ed7d55593dffa 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1213,16 +1213,27 @@ PyObject_HasAttrString(PyObject *obj, const char *name) int PyObject_SetAttrString(PyObject *v, const char *name, PyObject *w) { - PyObject *s; - int res; + PyThreadState *tstate = _PyThreadState_GET(); + if (w == NULL && _PyErr_Occurred(tstate)) { + PyObject *exc = _PyErr_GetRaisedException(tstate); + _PyErr_SetString(tstate, PyExc_SystemError, + "PyObject_SetAttrString() must not be called with NULL value " + "and an exception set"); + _PyErr_ChainExceptions1Tstate(tstate, exc); + return -1; + } - if (Py_TYPE(v)->tp_setattr != NULL) + if (Py_TYPE(v)->tp_setattr != NULL) { return (*Py_TYPE(v)->tp_setattr)(v, (char*)name, w); - s = PyUnicode_InternFromString(name); - if (s == NULL) + } + + PyObject *s = PyUnicode_InternFromString(name); + if (s == NULL) { return -1; - res = PyObject_SetAttr(v, s, w); - Py_XDECREF(s); + } + + int res = PyObject_SetAttr(v, s, w); + Py_DECREF(s); return res; } @@ -1440,6 +1451,16 @@ PyObject_HasAttr(PyObject *obj, PyObject *name) int PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) { + PyThreadState *tstate = _PyThreadState_GET(); + if (value == NULL && _PyErr_Occurred(tstate)) { + PyObject *exc = _PyErr_GetRaisedException(tstate); + _PyErr_SetString(tstate, PyExc_SystemError, + "PyObject_SetAttr() must not be called with NULL value " + "and an exception set"); + _PyErr_ChainExceptions1Tstate(tstate, exc); + return -1; + } + PyTypeObject *tp = Py_TYPE(v); int err; @@ -1451,8 +1472,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) } Py_INCREF(name); - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyUnicode_InternMortal(interp, &name); + _PyUnicode_InternMortal(tstate->interp, &name); if (tp->tp_setattro != NULL) { err = (*tp->tp_setattro)(v, name, value); Py_DECREF(name); From 938a5d7e62d962a8462bce9fe04236ac9a2155b8 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Thu, 3 Jul 2025 13:28:25 -0700 Subject: [PATCH 768/989] gh-135252: Document Zstandard integration across zipfile, shutil, and tarfile (#135311) Document Zstandard integration across zipfile, shutil, and tarfile --- Doc/library/compression.zstd.rst | 10 ++++++-- Doc/library/shutil.rst | 11 +++++--- Doc/library/tarfile.rst | 31 ++++++++++++++++++++-- Doc/library/zipfile.rst | 44 +++++++++++++++++++++++--------- 4 files changed, 77 insertions(+), 19 deletions(-) diff --git a/Doc/library/compression.zstd.rst b/Doc/library/compression.zstd.rst index 57ad8e3377fc67..a901403621b84f 100644 --- a/Doc/library/compression.zstd.rst +++ b/Doc/library/compression.zstd.rst @@ -523,8 +523,14 @@ Advanced parameter control .. attribute:: compression_level A high-level means of setting other compression parameters that affect - the speed and ratio of compressing data. Setting the level to zero uses - :attr:`COMPRESSION_LEVEL_DEFAULT`. + the speed and ratio of compressing data. + + Regular compression levels are greater than ``0``. Values greater than + ``20`` are considered "ultra" compression and require more memory than + other levels. Negative values can be used to trade off faster compression + for worse compression ratios. + + Setting the level to zero uses :attr:`COMPRESSION_LEVEL_DEFAULT`. .. attribute:: window_log diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 2dde40c9d92f45..dde38498206c46 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -618,7 +618,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. *format* is the archive format: one of "zip" (if the :mod:`zlib` module is available), "tar", "gztar" (if the :mod:`zlib` module is available), "bztar" (if the :mod:`bz2` module is - available), or "xztar" (if the :mod:`lzma` module is available). + available), "xztar" (if the :mod:`lzma` module is available), or "zstdtar" + (if the :mod:`compression.zstd` module is available). *root_dir* is a directory that will be the root directory of the archive, all paths in the archive will be relative to it; for example, @@ -673,6 +674,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. - *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available). - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available). - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available). + - *zstdtar*: Zstandard compressed tar-file (if the :mod:`compression.zstd` + module is available). You can register new formats or provide your own archiver for any existing formats, by using :func:`register_archive_format`. @@ -716,8 +719,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. *extract_dir* is the name of the target directory where the archive is unpacked. If not provided, the current working directory is used. - *format* is the archive format: one of "zip", "tar", "gztar", "bztar", or - "xztar". Or any other format registered with + *format* is the archive format: one of "zip", "tar", "gztar", "bztar", + "xztar", or "zstdtar". Or any other format registered with :func:`register_unpack_format`. If not provided, :func:`unpack_archive` will use the archive file name extension and see if an unpacker was registered for that extension. In case none is found, @@ -789,6 +792,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. - *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available). - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available). - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available). + - *zstdtar*: Zstandard compressed tar-file (if the :mod:`compression.zstd` + module is available). You can register new formats or provide your own unpacker for any existing formats, by using :func:`register_unpack_format`. diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 7cec108a5bd41d..70466fbbc4d8f9 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -18,8 +18,8 @@ higher-level functions in :ref:`shutil `. Some facts and figures: -* reads and writes :mod:`gzip`, :mod:`bz2` and :mod:`lzma` compressed archives - if the respective modules are available. +* reads and writes :mod:`gzip`, :mod:`bz2`, :mod:`compression.zstd`, and + :mod:`lzma` compressed archives if the respective modules are available. * read/write support for the POSIX.1-1988 (ustar) format. @@ -47,6 +47,10 @@ Some facts and figures: or paths outside of the destination. Previously, the filter strategy was equivalent to :func:`fully_trusted `. +.. versionchanged:: 3.14 + + Added support for Zstandard compression using :mod:`compression.zstd`. + .. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs) Return a :class:`TarFile` object for the pathname *name*. For detailed @@ -71,6 +75,8 @@ Some facts and figures: +------------------+---------------------------------------------+ | ``'r:xz'`` | Open for reading with lzma compression. | +------------------+---------------------------------------------+ + | ``'r:zst'`` | Open for reading with Zstandard compression.| + +------------------+---------------------------------------------+ | ``'x'`` or | Create a tarfile exclusively without | | ``'x:'`` | compression. | | | Raise a :exc:`FileExistsError` exception | @@ -88,6 +94,10 @@ Some facts and figures: | | Raise a :exc:`FileExistsError` exception | | | if it already exists. | +------------------+---------------------------------------------+ + | ``'x:zst'`` | Create a tarfile with Zstandard compression.| + | | Raise a :exc:`FileExistsError` exception | + | | if it already exists. | + +------------------+---------------------------------------------+ | ``'a' or 'a:'`` | Open for appending with no compression. The | | | file is created if it does not exist. | +------------------+---------------------------------------------+ @@ -99,6 +109,8 @@ Some facts and figures: +------------------+---------------------------------------------+ | ``'w:xz'`` | Open for lzma compressed writing. | +------------------+---------------------------------------------+ + | ``'w:zst'`` | Open for Zstandard compressed writing. | + +------------------+---------------------------------------------+ Note that ``'a:gz'``, ``'a:bz2'`` or ``'a:xz'`` is not possible. If *mode* is not suitable to open a certain (compressed) file for reading, @@ -115,6 +127,15 @@ Some facts and figures: For modes ``'w:xz'``, ``'x:xz'`` and ``'w|xz'``, :func:`tarfile.open` accepts the keyword argument *preset* to specify the compression level of the file. + For modes ``'w:zst'``, ``'x:zst'`` and ``'w|zst'``, :func:`tarfile.open` + accepts the keyword argument *level* to specify the compression level of + the file. The keyword argument *options* may also be passed, providing + advanced Zstandard compression parameters described by + :class:`~compression.zstd.CompressionParameter`. The keyword argument + *zstd_dict* can be passed to provide a :class:`~compression.zstd.ZstdDict`, + a Zstandard dictionary used to improve compression of smaller amounts of + data. + For special purposes, there is a second format for *mode*: ``'filemode|[compression]'``. :func:`tarfile.open` will return a :class:`TarFile` object that processes its data as a stream of blocks. No random seeking will @@ -146,6 +167,9 @@ Some facts and figures: | ``'r|xz'`` | Open an lzma compressed *stream* for | | | reading. | +-------------+--------------------------------------------+ + | ``'r|zst'`` | Open a Zstandard compressed *stream* for | + | | reading. | + +-------------+--------------------------------------------+ | ``'w|'`` | Open an uncompressed *stream* for writing. | +-------------+--------------------------------------------+ | ``'w|gz'`` | Open a gzip compressed *stream* for | @@ -157,6 +181,9 @@ Some facts and figures: | ``'w|xz'`` | Open an lzma compressed *stream* for | | | writing. | +-------------+--------------------------------------------+ + | ``'w|zst'`` | Open a Zstandard compressed *stream* for | + | | writing. | + +-------------+--------------------------------------------+ .. versionchanged:: 3.5 The ``'x'`` (exclusive creation) mode was added. diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 6a4fa67332e179..bf9136a2139112 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -129,14 +129,28 @@ The module defines the following items: .. versionadded:: 3.3 +.. data:: ZIP_ZSTANDARD + + The numeric constant for Zstandard compression. This requires the + :mod:`compression.zstd` module. + .. note:: - The ZIP file format specification has included support for bzip2 compression - since 2001, and for LZMA compression since 2006. However, some tools - (including older Python releases) do not support these compression - methods, and may either refuse to process the ZIP file altogether, - or fail to extract individual files. + In APPNOTE 6.3.7, the method ID ``20`` was assigned to Zstandard + compression. This was changed in APPNOTE 6.3.8 to method ID ``93`` to + avoid conflicts, with method ID ``20`` being deprecated. For + compatibility, the :mod:`!zipfile` module reads both method IDs but will + only write data with method ID ``93``. + + .. versionadded:: 3.14 + +.. note:: + The ZIP file format specification has included support for bzip2 compression + since 2001, for LZMA compression since 2006, and Zstandard compression since + 2020. However, some tools (including older Python releases) do not support + these compression methods, and may either refuse to process the ZIP file + altogether, or fail to extract individual files. .. seealso:: @@ -176,10 +190,11 @@ ZipFile Objects *compression* is the ZIP compression method to use when writing the archive, and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, - :const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized - values will cause :exc:`NotImplementedError` to be raised. If - :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2` or :const:`ZIP_LZMA` is specified - but the corresponding module (:mod:`zlib`, :mod:`bz2` or :mod:`lzma`) is not + :const:`ZIP_BZIP2`, :const:`ZIP_LZMA`, or :const:`ZIP_ZSTANDARD`; + unrecognized values will cause :exc:`NotImplementedError` to be raised. If + :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2`, :const:`ZIP_LZMA`, or + :const:`ZIP_ZSTANDARD` is specified but the corresponding module + (:mod:`zlib`, :mod:`bz2`, :mod:`lzma`, or :mod:`compression.zstd`) is not available, :exc:`RuntimeError` is raised. The default is :const:`ZIP_STORED`. If *allowZip64* is ``True`` (the default) zipfile will create ZIP files that @@ -194,6 +209,10 @@ ZipFile Objects (see :class:`zlib ` for more information). When using :const:`ZIP_BZIP2` integers ``1`` through ``9`` are accepted (see :class:`bz2 ` for more information). + When using :const:`ZIP_ZSTANDARD` integers ``-131072`` through ``22`` are + commonly accepted (see + :attr:`CompressionParameter.compression_level ` + for more on retrieving valid values and their meaning). The *strict_timestamps* argument, when set to ``False``, allows to zip files older than 1980-01-01 at the cost of setting the @@ -415,9 +434,10 @@ ZipFile Objects read or append. *pwd* is the password used for encrypted files as a :class:`bytes` object and, if specified, overrides the default password set with :meth:`setpassword`. Calling :meth:`read` on a ZipFile that uses a compression method other than - :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2` or - :const:`ZIP_LZMA` will raise a :exc:`NotImplementedError`. An error will also - be raised if the corresponding compression module is not available. + :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2`, + :const:`ZIP_LZMA`, or :const:`ZIP_ZSTANDARD` will raise a + :exc:`NotImplementedError`. An error will also be raised if the + corresponding compression module is not available. .. versionchanged:: 3.6 Calling :meth:`read` on a closed ZipFile will raise a :exc:`ValueError`. From 0243f97cbadec8d985e63b1daec5d1cbc850cae3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 3 Jul 2025 23:33:02 +0300 Subject: [PATCH 769/989] gh-135661: Fix parsing start and end tags in HTMLParser according to the HTML5 standard (GH-135930) * Whitespaces no longer accepted between `` does not end the script section. * Vertical tabulation (`\v`) and non-ASCII whitespaces no longer recognized as whitespaces. The only whitespaces are `\t\n\r\f `. * Null character (U+0000) no longer ends the tag name. * Attributes and slashes after the tag name in end tags are now ignored, instead of terminating after the first `>` in quoted attribute value. E.g. ``. * Multiple slashes and whitespaces between the last attribute and closing `>` are now ignored in both start and end tags. E.g. ``. * Multiple `=` between attribute name and value are no longer collapsed. E.g. `` produces attribute "foo" with value "=bar". * Whitespaces between the `=` separator and attribute name or value are no longer ignored. E.g. `` produces two attributes "foo" and "=bar", both with value None; `` produces two attributes: "foo" with value "" and "bar" with value None. * Fix Sphinx errors. * Apply suggestions from code review Co-authored-by: Ezio Melotti * Address review comments. * Move to Security. --------- Co-authored-by: Ezio Melotti --- Lib/html/parser.py | 143 ++++++++-------- Lib/test/test_htmlparser.py | 155 +++++++++++------- ...-06-25-14-13-39.gh-issue-135661.idjQ0B.rst | 25 +++ 3 files changed, 194 insertions(+), 129 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst diff --git a/Lib/html/parser.py b/Lib/html/parser.py index ba416e7fa6e3fe..cc15de07b5bae6 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -31,15 +31,43 @@ piclose = re.compile('>') commentclose = re.compile(r'--\s*>') # Note: -# 1) if you change tagfind/attrfind remember to update locatestarttagend too; -# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will +# 1) if you change tagfind/attrfind remember to update locatetagend too; +# 2) if you change tagfind/attrfind and/or locatetagend the parser will # explode, so don't do it. -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*') -attrfind_tolerant = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') +# see the HTML5 specs section "13.2.5.6 Tag open state", +# "13.2.5.8 Tag name state" and "13.2.5.33 Attribute name state". +# https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state +# https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state +# https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state +tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*') +attrfind_tolerant = re.compile(r""" + ( + (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + ) + (= # value indicator + ('[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + (?:[\t\n\r\f ]|/(?!>))* # possibly followed by a space +""", re.VERBOSE) +locatetagend = re.compile(r""" + [a-zA-Z][^\t\n\r\f />]* # tag name + [\t\n\r\f /]* # optional whitespace before attribute name + (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + (?:= # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + [\t\n\r\f /]* # possibly followed by a space + )* + >? +""", re.VERBOSE) +# The following variables are not used, but are temporarily left for +# backward compatibility. locatestarttagend_tolerant = re.compile(r""" <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name @@ -56,8 +84,6 @@ \s* # trailing whitespace """, re.VERBOSE) endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# ') # Character reference processing logic specific to attribute values @@ -141,7 +167,8 @@ def get_starttag_text(self): def set_cdata_mode(self, elem): self.cdata_elem = elem.lower() - self.interesting = re.compile(r'' % self.cdata_elem, re.I) + self.interesting = re.compile(r'])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) def clear_cdata_mode(self): self.interesting = interesting_normal @@ -166,7 +193,7 @@ def goahead(self, end): # & near the end and see if it's followed by a space or ;. amppos = rawdata.rfind('&', max(i, n-34)) if (amppos >= 0 and - not re.compile(r'[\s;]').search(rawdata, amppos)): + not re.compile(r'[\t\n\r\f ;]').search(rawdata, amppos)): break # wait till we get all the text j = n else: @@ -310,7 +337,7 @@ def parse_html_declaration(self, i): return self.parse_bogus_comment(i) # Internal -- parse bogus comment, return length or -1 if not terminated - # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state + # see https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state def parse_bogus_comment(self, i, report=1): rawdata = self.rawdata assert rawdata[i:i+2] in ('": - return j + 1 - if next == "/": - if rawdata.startswith("/>", j): - return j + 2 - if rawdata.startswith("/", j): - # buffer boundary - return -1 - # else bogus input - if j > i: - return j - else: - return i + 1 - if next == "": - # end of input - return -1 - if next in ("abcdefghijklmnopqrstuvwxyz=/" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ"): - # end of input in or before attribute value, or we have the - # '/' from a '/>' ending - return -1 - if j > i: - return j - else: - return i + 1 - raise AssertionError("we should not get here!") + match = locatetagend.match(rawdata, i+1) + assert match + j = match.end() + if rawdata[j-1] != ">": + return -1 + return j # Internal -- parse endtag, return end or -1 if incomplete def parse_endtag(self, i): + # See the HTML5 specs section "13.2.5.7 End tag open state" + # https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state rawdata = self.rawdata assert rawdata[i:i+2] == " - if not match: + if rawdata.find('>', i+2) < 0: # fast check return -1 - gtpos = match.end() - match = endtagfind.match(rawdata, i) # - if not match: - if self.cdata_elem is not None: - self.handle_data(rawdata[i:gtpos]) - return gtpos - # find the name: w3.org/TR/html5/tokenization.html#tag-name-state - namematch = tagfind_tolerant.match(rawdata, i+2) - if not namematch: - # w3.org/TR/html5/tokenization.html#end-tag-open-state - if rawdata[i:i+3] == '': - return i+3 - else: - return self.parse_bogus_comment(i) - tagname = namematch.group(1).lower() - # consume and ignore other stuff between the name and the > - # Note: this is not 100% correct, since we might have things like - # , but looking for > after the name should cover - # most of the cases and is much simpler - gtpos = rawdata.find('>', namematch.end()) - self.handle_endtag(tagname) - return gtpos+1 + if not endtagopen.match(rawdata, i): # ': # is ignored + # "missing-end-tag-name" parser error + return i+3 + else: + return self.parse_bogus_comment(i) - elem = match.group(1).lower() # script or style - if self.cdata_elem is not None: - if elem != self.cdata_elem: - self.handle_data(rawdata[i:gtpos]) - return gtpos + match = locatetagend.match(rawdata, i+2) + assert match + j = match.end() + if rawdata[j-1] != ">": + return -1 - self.handle_endtag(elem) + # find the name: "13.2.5.8 Tag name state" + # https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state + match = tagfind_tolerant.match(rawdata, i+2) + assert match + tag = match.group(1).lower() + self.handle_endtag(tag) self.clear_cdata_mode() - return gtpos + return j # Overridable -- finish processing of start+end tag: def handle_startendtag(self, tag, attrs): diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 65a4bee72b9775..d0d2c54217ccaf 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -81,6 +81,13 @@ def handle_entityref(self, data): self.fail('This should never be called with convert_charrefs=True') +# The normal event collector normalizes the events in get_events, +# so we override it to return the original list of events. +class EventCollectorNoNormalize(EventCollector): + def get_events(self): + return self.events + + class TestCaseBase(unittest.TestCase): def get_collector(self): @@ -265,8 +272,7 @@ def test_get_starttag_text(self): ("starttag", "foo:bar", [("one", "1"), ("two", "2")]), ("starttag_text", s)]) - def test_cdata_content(self): - contents = [ + @support.subTests('content', [ ' ¬-an-entity-ref;', "", '

', @@ -279,54 +285,83 @@ def test_cdata_content(self): 'src="http://www.example.org/r=\'+new ' 'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'), '\n\n', - 'foo = "";', '', - # these two should be invalid according to the HTML 5 spec, - # section 8.1.2.2 - #'foo = ', - #'foo = ', - ] - elements = ['script', 'style', 'SCRIPT', 'STYLE', 'Script', 'Style'] - for content in contents: - for element in elements: - element_lower = element.lower() - s = '<{element}>{content}'.format(element=element, - content=content) - self._run_check(s, [("starttag", element_lower, []), - ("data", content), - ("endtag", element_lower)]) - - def test_cdata_with_closing_tags(self): + 'foo = ""', + 'foo = ""', + 'foo = ""', + 'foo = ""', + 'foo = ""', + 'foo = ""', + ]) + def test_script_content(self, content): + s = f'' + self._run_check(s, [("starttag", "script", []), + ("data", content), + ("endtag", "script")]) + + @support.subTests('content', [ + 'a::before { content: ""; }', + 'a::before { content: "¬-an-entity-ref;"; }', + 'a::before { content: ""; }', + 'a::before { content: "\u2603"; }', + 'a::before { content: "< /style>"; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + ]) + def test_style_content(self, content): + s = f'' + self._run_check(s, [("starttag", "style", []), + ("data", content), + ("endtag", "style")]) + + @support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n', + 'script/', 'script foo=bar', 'script foo=">"']) + def test_script_closing_tag(self, endtag): # see issue #13358 # make sure that HTMLParser calls handle_data only once for each CDATA. - # The normal event collector normalizes the events in get_events, - # so we override it to return the original list of events. - class Collector(EventCollector): - def get_events(self): - return self.events - content = """ ¬-an-entity-ref;

''""" - for element in [' script', 'script ', ' script ', - '\nscript', 'script\n', '\nscript\n']: - element_lower = element.lower().strip() - s = '{content}{tail}' + self._run_check(s, [("starttag", "script", []), + ("data", content if end else content + tail)], + collector=EventCollectorNoNormalize(convert_charrefs=False)) def test_comments(self): html = ("" @@ -443,7 +478,7 @@ def test_starttag_junk_chars(self): self._run_check("", [('comment', '$')]) self._run_check("", [('endtag', 'a')]) + self._run_check("", [('comment', ' a')]) self._run_check("", [('starttag', 'a", [('endtag', 'a', [('endtag', 'a')]) + self._run_check('', [('endtag', 'a')]) + def test_declaration_junk_chars(self): self._run_check("", [('decl', 'DOCTYPE foo $ ')]) @@ -525,15 +564,11 @@ def test_invalid_end_tags(self): self._run_check(html, expected) def test_broken_invalid_end_tag(self): - # This is technically wrong (the "> shouldn't be included in the 'data') - # but is probably not worth fixing it (in addition to all the cases of - # the previous test, it would require a full attribute parsing). - # see #13993 html = 'This confuses the parser' expected = [('starttag', 'b', []), ('data', 'This'), ('endtag', 'b'), - ('data', '"> confuses the parser')] + ('data', ' confuses the parser')] self._run_check(html, expected) def test_correct_detection_of_start_tags(self): @@ -560,7 +595,7 @@ def test_correct_detection_of_start_tags(self): html = '

The rain' expected = [ - ('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]), + ('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]), ('starttag', 'b', []), ('data', 'The '), ('starttag', 'a', [('href', 'some_url')]), @@ -749,9 +784,15 @@ def test_attr_syntax(self): ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)]) ] self._run_check("""""", output) - self._run_check("""""", output) - self._run_check("""""", output) - self._run_check("""""", output) + self._run_check("", [('starttag', 'a', [('foo', '=bar')])]) + self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo\v', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo\xa0', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', '\vbar')])]) + self._run_check("", [('starttag', 'a', [('foo', '\xa0bar')])]) def test_attr_values(self): self._run_check("""""", @@ -760,6 +801,10 @@ def test_attr_values(self): ("d", "\txyz\n")])]) self._run_check("""""", [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("", + [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("", + [("starttag", "a", [("b", "\v"), ("c", "\xa0")])]) # Regression test for SF patch #669683. self._run_check("", [("starttag", "e", [("a", "rgb(1,2,3)")])]) @@ -831,7 +876,7 @@ def test_malformed_attributes(self): ('data', 'test - bad2'), ('endtag', 'a'), ('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]), ('data', 'test - bad3'), ('endtag', 'a'), - ('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]), + ('starttag', 'a', [('href', None), ('=', None), ("test' style", 'color:red;bad4')]), ('data', 'test - bad4'), ('endtag', 'a') ] self._run_check(html, expected) diff --git a/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst b/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst new file mode 100644 index 00000000000000..b6f9e104e44047 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst @@ -0,0 +1,25 @@ +Fix parsing start and end tags in :class:`html.parser.HTMLParser` +according to the HTML5 standard. + +* Whitespaces no longer accepted between ```` does not end the script section. + +* Vertical tabulation (``\v``) and non-ASCII whitespaces no longer recognized + as whitespaces. The only whitespaces are ``\t\n\r\f`` and space. + +* Null character (U+0000) no longer ends the tag name. + +* Attributes and slashes after the tag name in end tags are now ignored, + instead of terminating after the first ``>`` in quoted attribute value. + E.g. ````. + +* Multiple slashes and whitespaces between the last attribute and closing ``>`` + are now ignored in both start and end tags. E.g. ````. + +* Multiple ``=`` between attribute name and value are no longer collapsed. + E.g. ```` produces attribute "foo" with value "=bar". + +* Whitespaces between the ``=`` separator and attribute name or value are no + longer ignored. E.g. ```` produces two attributes "foo" and + "=bar", both with value None; ```` produces two attributes: + "foo" with value "" and "bar" with value None. From b4991056f4f44acb50aea3838463e749c24c7420 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 3 Jul 2025 14:04:01 -0700 Subject: [PATCH 770/989] Clarify some details regarding `sys.monitoring` (#133981) --- Doc/library/sys.monitoring.rst | 61 ++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/Doc/library/sys.monitoring.rst b/Doc/library/sys.monitoring.rst index 0674074b8c0df6..f62a4011e4144b 100644 --- a/Doc/library/sys.monitoring.rst +++ b/Doc/library/sys.monitoring.rst @@ -137,7 +137,8 @@ The following events are supported: .. monitoring-event:: PY_UNWIND - Exit from a Python function during exception unwinding. + Exit from a Python function during exception unwinding. This includes exceptions raised directly within the + function and that are allowed to continue to propagate. .. monitoring-event:: PY_YIELD @@ -171,7 +172,7 @@ events, use the expression ``PY_RETURN | PY_START``. if get_events(DEBUGGER_ID) == NO_EVENTS: ... -Events are divided into three groups: + Setting this event deactivates all events. .. _monitoring-event-local: @@ -243,20 +244,23 @@ raise an exception unless it would be visible to other code. To allow tools to monitor for real exceptions without slowing down generators and coroutines, the :monitoring-event:`STOP_ITERATION` event is provided. -:monitoring-event:`STOP_ITERATION` can be locally disabled, unlike :monitoring-event:`RAISE`. +:monitoring-event:`STOP_ITERATION` can be locally disabled, unlike +:monitoring-event:`RAISE`. -Note that the :monitoring-event:`STOP_ITERATION` event and the :monitoring-event:`RAISE` -event for a :exc:`StopIteration` exception are equivalent, and are treated as interchangeable -when generating events. Implementations will favor :monitoring-event:`STOP_ITERATION` for -performance reasons, but may generate a :monitoring-event:`RAISE` event with a :exc:`StopIteration`. +Note that the :monitoring-event:`STOP_ITERATION` event and the +:monitoring-event:`RAISE` event for a :exc:`StopIteration` exception are +equivalent, and are treated as interchangeable when generating events. +Implementations will favor :monitoring-event:`STOP_ITERATION` for performance +reasons, but may generate a :monitoring-event:`RAISE` event with a +:exc:`StopIteration`. Turning events on and off ------------------------- In order to monitor an event, it must be turned on and a corresponding callback -must be registered. -Events can be turned on or off by setting the events either globally or -for a particular code object. +must be registered. Events can be turned on or off by setting the events either +globally and/or for a particular code object. An event will trigger only once, +even if it is turned on both globally and locally. Setting events globally @@ -292,10 +296,6 @@ in Python (see :ref:`c-api-monitoring`). Activates all the local events for *code* which are set in *event_set*. Raises a :exc:`ValueError` if *tool_id* is not in use. -Local events add to global events, but do not mask them. -In other words, all global events will trigger for a code object, -regardless of the local events. - Disabling events '''''''''''''''' @@ -325,8 +325,6 @@ except for a few breakpoints. Registering callback functions ------------------------------ -To register a callable for events call - .. function:: register_callback(tool_id: int, event: int, func: Callable | None, /) -> Callable | None Registers the callable *func* for the *event* with the given *tool_id* @@ -335,12 +333,16 @@ To register a callable for events call it is unregistered and returned. Otherwise :func:`register_callback` returns ``None``. - Functions can be unregistered by calling ``sys.monitoring.register_callback(tool_id, event, None)``. Callback functions can be registered and unregistered at any time. +Callbacks are called only once regardless if the event is turned on both +globally and locally. As such, if an event could be turned on for both global +and local events by your code then the callback needs to be written to handle +either trigger. + Registering or unregistering a callback function will generate a :func:`sys.audit` event. @@ -353,37 +355,46 @@ Callback function arguments that there are no arguments to the call. When an active event occurs, the registered callback function is called. +Callback functions returning an object other than :data:`DISABLE` will have no effect. Different events will provide the callback function with different arguments, as follows: * :monitoring-event:`PY_START` and :monitoring-event:`PY_RESUME`:: - func(code: CodeType, instruction_offset: int) -> DISABLE | Any + func(code: CodeType, instruction_offset: int) -> object * :monitoring-event:`PY_RETURN` and :monitoring-event:`PY_YIELD`:: - func(code: CodeType, instruction_offset: int, retval: object) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, retval: object) -> object -* :monitoring-event:`CALL`, :monitoring-event:`C_RAISE` and :monitoring-event:`C_RETURN`:: +* :monitoring-event:`CALL`, :monitoring-event:`C_RAISE` and :monitoring-event:`C_RETURN` + (*arg0* can be :data:`MISSING` specifically):: - func(code: CodeType, instruction_offset: int, callable: object, arg0: object | MISSING) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, callable: object, arg0: object) -> object + *code* represents the code object where the call is being made, while + *callable* is the object that is about to be called (and thus + triggered the event). If there are no arguments, *arg0* is set to :data:`sys.monitoring.MISSING`. + For instance methods, *callable* will be the function object as found on the + class with *arg0* set to the instance (i.e. the ``self`` argument to the + method). + * :monitoring-event:`RAISE`, :monitoring-event:`RERAISE`, :monitoring-event:`EXCEPTION_HANDLED`, :monitoring-event:`PY_UNWIND`, :monitoring-event:`PY_THROW` and :monitoring-event:`STOP_ITERATION`:: - func(code: CodeType, instruction_offset: int, exception: BaseException) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, exception: BaseException) -> object * :monitoring-event:`LINE`:: - func(code: CodeType, line_number: int) -> DISABLE | Any + func(code: CodeType, line_number: int) -> object * :monitoring-event:`BRANCH_LEFT`, :monitoring-event:`BRANCH_RIGHT` and :monitoring-event:`JUMP`:: - func(code: CodeType, instruction_offset: int, destination_offset: int) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, destination_offset: int) -> object Note that the *destination_offset* is where the code will next execute. * :monitoring-event:`INSTRUCTION`:: - func(code: CodeType, instruction_offset: int) -> DISABLE | Any + func(code: CodeType, instruction_offset: int) -> object From 48cb9b61121f2ad58c2754faa1e92a20c47524e4 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 3 Jul 2025 19:27:21 -0700 Subject: [PATCH 771/989] gh-133982: Test _pyio.BytesIO in free-threaded tests (gh-136218) --- Doc/library/io.rst | 3 + Lib/_pyio.py | 80 ++++++++++++------- Lib/test/test_free_threading/test_io.py | 42 ++++++---- Lib/test/test_io.py | 1 + ...-07-02-18-41-45.gh-issue-133982.7qqAn6.rst | 1 + 5 files changed, 80 insertions(+), 47 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-02-18-41-45.gh-issue-133982.7qqAn6.rst diff --git a/Doc/library/io.rst b/Doc/library/io.rst index de5cab5aee649f..dfebccb5a9cb91 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -719,6 +719,9 @@ than raw I/O does. The optional argument *initial_bytes* is a :term:`bytes-like object` that contains initial data. + Methods may be used from multiple threads without external locking in + :term:`free threading` builds. + :class:`BytesIO` provides or overrides these methods in addition to those from :class:`BufferedIOBase` and :class:`IOBase`: diff --git a/Lib/_pyio.py b/Lib/_pyio.py index fb2a6d049caab6..5db8ce9244b5ba 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -876,16 +876,28 @@ class BytesIO(BufferedIOBase): _buffer = None def __init__(self, initial_bytes=None): + # Use to keep self._buffer and self._pos consistent. + self._lock = Lock() + buf = bytearray() if initial_bytes is not None: buf += initial_bytes - self._buffer = buf - self._pos = 0 + + with self._lock: + self._buffer = buf + self._pos = 0 def __getstate__(self): if self.closed: raise ValueError("__getstate__ on closed file") - return self.__dict__.copy() + with self._lock: + state = self.__dict__.copy() + del state['_lock'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._lock = Lock() def getvalue(self): """Return the bytes value (contents) of the buffer @@ -918,14 +930,16 @@ def read(self, size=-1): raise TypeError(f"{size!r} is not an integer") else: size = size_index() - if size < 0: - size = len(self._buffer) - if len(self._buffer) <= self._pos: - return b"" - newpos = min(len(self._buffer), self._pos + size) - b = self._buffer[self._pos : newpos] - self._pos = newpos - return bytes(b) + + with self._lock: + if size < 0: + size = len(self._buffer) + if len(self._buffer) <= self._pos: + return b"" + newpos = min(len(self._buffer), self._pos + size) + b = self._buffer[self._pos : newpos] + self._pos = newpos + return bytes(b) def read1(self, size=-1): """This is the same as read. @@ -941,12 +955,14 @@ def write(self, b): n = view.nbytes # Size of any bytes-like object if n == 0: return 0 - pos = self._pos - if pos > len(self._buffer): - # Pad buffer to pos with null bytes. - self._buffer.resize(pos) - self._buffer[pos:pos + n] = b - self._pos += n + + with self._lock: + pos = self._pos + if pos > len(self._buffer): + # Pad buffer to pos with null bytes. + self._buffer.resize(pos) + self._buffer[pos:pos + n] = b + self._pos += n return n def seek(self, pos, whence=0): @@ -963,9 +979,11 @@ def seek(self, pos, whence=0): raise ValueError("negative seek position %r" % (pos,)) self._pos = pos elif whence == 1: - self._pos = max(0, self._pos + pos) + with self._lock: + self._pos = max(0, self._pos + pos) elif whence == 2: - self._pos = max(0, len(self._buffer) + pos) + with self._lock: + self._pos = max(0, len(self._buffer) + pos) else: raise ValueError("unsupported whence value") return self._pos @@ -978,18 +996,20 @@ def tell(self): def truncate(self, pos=None): if self.closed: raise ValueError("truncate on closed file") - if pos is None: - pos = self._pos - else: - try: - pos_index = pos.__index__ - except AttributeError: - raise TypeError(f"{pos!r} is not an integer") + + with self._lock: + if pos is None: + pos = self._pos else: - pos = pos_index() - if pos < 0: - raise ValueError("negative truncate position %r" % (pos,)) - del self._buffer[pos:] + try: + pos_index = pos.__index__ + except AttributeError: + raise TypeError(f"{pos!r} is not an integer") + else: + pos = pos_index() + if pos < 0: + raise ValueError("negative truncate position %r" % (pos,)) + del self._buffer[pos:] return pos def readable(self): diff --git a/Lib/test/test_free_threading/test_io.py b/Lib/test/test_free_threading/test_io.py index f9bec740ddff50..41d89e04da8716 100644 --- a/Lib/test/test_free_threading/test_io.py +++ b/Lib/test/test_free_threading/test_io.py @@ -1,12 +1,13 @@ +import io +import _pyio as pyio import threading from unittest import TestCase from test.support import threading_helper from random import randint -from io import BytesIO from sys import getsizeof -class TestBytesIO(TestCase): +class ThreadSafetyMixin: # Test pretty much everything that can break under free-threading. # Non-deterministic, but at least one of these things will fail if # BytesIO object is not free-thread safe. @@ -90,20 +91,27 @@ def sizeof(barrier, b, *ignore): barrier.wait() getsizeof(b) - self.check([write] * 10, BytesIO()) - self.check([writelines] * 10, BytesIO()) - self.check([write] * 10 + [truncate] * 10, BytesIO()) - self.check([truncate] + [read] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [read1] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [readline] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [readlines] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [readinto] * 10, BytesIO(b'0\n'*204800), bytearray(b'0\n'*204800)) - self.check([close] + [write] * 10, BytesIO()) - self.check([truncate] + [getvalue] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [getbuffer] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [iter] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [getstate] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [setstate] * 10, BytesIO(b'0\n'*204800), (b'123', 0, None)) - self.check([truncate] + [sizeof] * 10, BytesIO(b'0\n'*204800)) + self.check([write] * 10, self.ioclass()) + self.check([writelines] * 10, self.ioclass()) + self.check([write] * 10 + [truncate] * 10, self.ioclass()) + self.check([truncate] + [read] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [read1] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [readline] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [readlines] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [readinto] * 10, self.ioclass(b'0\n'*204800), bytearray(b'0\n'*204800)) + self.check([close] + [write] * 10, self.ioclass()) + self.check([truncate] + [getvalue] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [getbuffer] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [iter] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [getstate] * 10, self.ioclass(b'0\n'*204800)) + state = self.ioclass(b'123').__getstate__() + self.check([truncate] + [setstate] * 10, self.ioclass(b'0\n'*204800), state) + self.check([truncate] + [sizeof] * 10, self.ioclass(b'0\n'*204800)) # no tests for seek or tell because they don't break anything + +class CBytesIOTest(ThreadSafetyMixin, TestCase): + ioclass = io.BytesIO + +class PyBytesIOTest(ThreadSafetyMixin, TestCase): + ioclass = pyio.BytesIO diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 0c921ffbc2576a..b487bcabf01ca4 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -9,6 +9,7 @@ # * test_univnewlines - tests universal newline support # * test_largefile - tests operations on a file greater than 2**32 bytes # (only enabled with -ulargefile) +# * test_free_threading/test_io - tests thread safety of io objects ################################################################################ # ATTENTION TEST WRITERS!!! diff --git a/Misc/NEWS.d/next/Library/2025-07-02-18-41-45.gh-issue-133982.7qqAn6.rst b/Misc/NEWS.d/next/Library/2025-07-02-18-41-45.gh-issue-133982.7qqAn6.rst new file mode 100644 index 00000000000000..a2d0810cebeaa6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-02-18-41-45.gh-issue-133982.7qqAn6.rst @@ -0,0 +1 @@ +Update Python implementation of :class:`io.BytesIO` to be thread safe. From 85f092f541a2caf47e77b8510cbc720216e91e63 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Fri, 4 Jul 2025 09:04:47 +0530 Subject: [PATCH 772/989] gh-115999: remove redundant check in free-threading from `_STORE_ATTR_WITH_HINT` (#136249) --- Python/bytecodes.c | 6 ------ Python/executor_cases.c.h | 9 --------- Python/generated_cases.c.h | 10 ---------- 3 files changed, 25 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a5b74d88d7d4fe..d9abc4c53d1f50 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2641,12 +2641,6 @@ dummy_func( PyDictObject *dict = _PyObject_GetManagedDict(owner_o); DEOPT_IF(dict == NULL); DEOPT_IF(!LOCK_OBJECT(dict)); - #ifdef Py_GIL_DISABLED - if (dict != _PyObject_GetManagedDict(owner_o)) { - UNLOCK_OBJECT(dict); - DEOPT_IF(true); - } - #endif assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); if (hint >= (size_t)dict->ma_keys->dk_nentries || diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 276c320c5f4d90..e152865e4ec9e8 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3704,15 +3704,6 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - #ifdef Py_GIL_DISABLED - if (dict != _PyObject_GetManagedDict(owner_o)) { - UNLOCK_OBJECT(dict); - if (true) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - } - #endif assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); if (hint >= (size_t)dict->ma_keys->dk_nentries || diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index bb153bc1c0e11a..aa1eb373b7ba4b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -11064,16 +11064,6 @@ assert(_PyOpcode_Deopt[opcode] == (STORE_ATTR)); JUMP_TO_PREDICTED(STORE_ATTR); } - #ifdef Py_GIL_DISABLED - if (dict != _PyObject_GetManagedDict(owner_o)) { - UNLOCK_OBJECT(dict); - if (true) { - UPDATE_MISS_STATS(STORE_ATTR); - assert(_PyOpcode_Deopt[opcode] == (STORE_ATTR)); - JUMP_TO_PREDICTED(STORE_ATTR); - } - } - #endif assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); if (hint >= (size_t)dict->ma_keys->dk_nentries || From b582d751b4968d4534fdb7894e50713676789b2f Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Fri, 4 Jul 2025 09:18:55 +0530 Subject: [PATCH 773/989] gh-129824: fix data races in subinterpreters under TSAN (#135794) This fixes the data races in typeobject.c in subinterpreters under free-threading. The type flags and slots are only modified in the main interpreter as all static types are first initialised in main interpreter. --- Objects/typeobject.c | 61 ++++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 6e7471cb5941a7..e84278d13c3e9c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -54,7 +54,6 @@ class object "PyObject *" "&PyBaseObject_Type" PyUnicode_CheckExact(name) && \ (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE) -#define NEXT_GLOBAL_VERSION_TAG _PyRuntime.types.next_version_tag #define NEXT_VERSION_TAG(interp) \ (interp)->types.next_version_tag @@ -266,8 +265,8 @@ static_ext_type_lookup(PyInterpreterState *interp, size_t index, assert(index < _Py_MAX_MANAGED_STATIC_EXT_TYPES); size_t full_index = index + _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; - int64_t interp_count = - _PyRuntime.types.managed_static.types[full_index].interp_count; + int64_t interp_count = _Py_atomic_load_int64( + &_PyRuntime.types.managed_static.types[full_index].interp_count); assert((interp_count == 0) == (_PyRuntime.types.managed_static.types[full_index].type == NULL)); *p_interp_count = interp_count; @@ -344,7 +343,7 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self, : index + _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; assert((initial == 1) == - (_PyRuntime.types.managed_static.types[full_index].interp_count == 0)); + (_Py_atomic_load_int64(&_PyRuntime.types.managed_static.types[full_index].interp_count) == 0)); (void)_Py_atomic_add_int64( &_PyRuntime.types.managed_static.types[full_index].interp_count, 1); @@ -393,7 +392,7 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self, : &(interp->types.for_extensions.initialized[index]); assert(state != NULL); - assert(_PyRuntime.types.managed_static.types[full_index].interp_count > 0); + assert(_Py_atomic_load_int64(&_PyRuntime.types.managed_static.types[full_index].interp_count) > 0); assert(_PyRuntime.types.managed_static.types[full_index].type == state->type); assert(state->type != NULL); @@ -403,7 +402,7 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self, (void)_Py_atomic_add_int64( &_PyRuntime.types.managed_static.types[full_index].interp_count, -1); if (final) { - assert(!_PyRuntime.types.managed_static.types[full_index].interp_count); + assert(!_Py_atomic_load_int64(&_PyRuntime.types.managed_static.types[full_index].interp_count)); _PyRuntime.types.managed_static.types[full_index].type = NULL; managed_static_type_index_clear(self); @@ -1359,6 +1358,19 @@ _PyType_LookupByVersion(unsigned int version) #error "_Py_ATTR_CACHE_UNUSED must be bigger than max" #endif +static inline unsigned int +next_global_version_tag(void) +{ + unsigned int old; + do { + old = _Py_atomic_load_uint_relaxed(&_PyRuntime.types.next_version_tag); + if (old >= _Py_MAX_GLOBAL_TYPE_VERSION_TAG) { + return 0; + } + } while (!_Py_atomic_compare_exchange_uint(&_PyRuntime.types.next_version_tag, &old, old + 1)); + return old + 1; +} + static int assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) { @@ -1389,11 +1401,12 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) } if (type->tp_flags & Py_TPFLAGS_IMMUTABLETYPE) { /* static types */ - if (NEXT_GLOBAL_VERSION_TAG > _Py_MAX_GLOBAL_TYPE_VERSION_TAG) { + unsigned int next_version_tag = next_global_version_tag(); + if (next_version_tag == 0) { /* We have run out of version numbers */ return 0; } - set_version_unlocked(type, NEXT_GLOBAL_VERSION_TAG++); + set_version_unlocked(type, next_version_tag); assert (type->tp_version_tag <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); } else { @@ -9007,7 +9020,11 @@ type_ready_set_new(PyTypeObject *type, int initial) && base == &PyBaseObject_Type && !(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { - type_add_flags(type, Py_TPFLAGS_DISALLOW_INSTANTIATION); + if (initial) { + type_add_flags(type, Py_TPFLAGS_DISALLOW_INSTANTIATION); + } else { + assert(type->tp_flags & Py_TPFLAGS_DISALLOW_INSTANTIATION); + } } if (!(type->tp_flags & Py_TPFLAGS_DISALLOW_INSTANTIATION)) { @@ -9021,13 +9038,17 @@ type_ready_set_new(PyTypeObject *type, int initial) } } else { - // tp_new is NULL: inherit tp_new from base - type->tp_new = base->tp_new; + if (initial) { + // tp_new is NULL: inherit tp_new from base + type->tp_new = base->tp_new; + } } } else { // Py_TPFLAGS_DISALLOW_INSTANTIATION sets tp_new to NULL - type->tp_new = NULL; + if (initial) { + type->tp_new = NULL; + } } return 0; } @@ -9160,7 +9181,12 @@ type_ready(PyTypeObject *type, int initial) } /* All done -- set the ready flag */ - type_add_flags(type, Py_TPFLAGS_READY); + if (initial) { + type_add_flags(type, Py_TPFLAGS_READY); + } else { + assert(type->tp_flags & Py_TPFLAGS_READY); + } + stop_readying(type); assert(_PyType_CheckConsistency(type)); @@ -9209,15 +9235,16 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_DICT)); assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_WEAKREF)); - if ((self->tp_flags & Py_TPFLAGS_READY) == 0) { - assert(initial); + if (initial) { + assert((self->tp_flags & Py_TPFLAGS_READY) == 0); type_add_flags(self, _Py_TPFLAGS_STATIC_BUILTIN); type_add_flags(self, Py_TPFLAGS_IMMUTABLETYPE); - assert(NEXT_GLOBAL_VERSION_TAG <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); if (self->tp_version_tag == 0) { - _PyType_SetVersion(self, NEXT_GLOBAL_VERSION_TAG++); + unsigned int next_version_tag = next_global_version_tag(); + assert(next_version_tag != 0); + _PyType_SetVersion(self, next_version_tag); } } else { From 8ac7613dc8b8f82253d7c0e2b6ef6ed703a0a1ee Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 4 Jul 2025 10:00:23 +0300 Subject: [PATCH 774/989] gh-102555: Fix comment parsing in HTMLParser according to the HTML5 standard (GH-135664) * "--!>" now ends the comment. * "-- >" no longer ends the comment. * Support abnormally ended empty comments "<-->" and "<--->". --------- Co-author: Kerim Kabirov Co-authored-by: Ezio Melotti --- Lib/html/parser.py | 18 ++++++++++- Lib/test/test_htmlparser.py | 32 +++++++++++++++++-- ...-06-18-13-28-08.gh-issue-102555.nADrzJ.rst | 3 ++ 3 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-06-18-13-28-08.gh-issue-102555.nADrzJ.rst diff --git a/Lib/html/parser.py b/Lib/html/parser.py index cc15de07b5bae6..9b4f09599134bd 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -29,7 +29,8 @@ starttagopen = re.compile('<[a-zA-Z]') endtagopen = re.compile('') -commentclose = re.compile(r'--\s*>') +commentclose = re.compile(r'--!?>') +commentabruptclose = re.compile(r'-?>') # Note: # 1) if you change tagfind/attrfind remember to update locatetagend too; # 2) if you change tagfind/attrfind and/or locatetagend the parser will @@ -336,6 +337,21 @@ def parse_html_declaration(self, i): else: return self.parse_bogus_comment(i) + # Internal -- parse comment, return length or -1 if not terminated + # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state + def parse_comment(self, i, report=True): + rawdata = self.rawdata + assert rawdata.startswith('" '' '' + '' '' + # abrupt-closing-of-empty-comment + '' + '' '' '' - '') + '' + '' + '' + '' + '' + '' + '' + # nested-comment + ' -->' + '' + '' + ) expected = [('comment', " I'm a valid comment "), ('comment', 'me too!'), ('comment', '--'), + ('comment', '-'), + ('comment', ''), + ('comment', ''), ('comment', ''), ('comment', '--I have many hyphens--'), ('comment', ' I have a > in the middle '), - ('comment', ' and I have -- in the middle! ')] + ('comment', ' and I have -- in the middle! '), + ('comment', 'incorrectly-closed-comment'), + ('comment', ''), + ('comment', '--!'), + ('comment', '-- >'), + ('comment', '-!>'), + ('comment', '!>'), + ('comment', ' '), + ('comment', '`` now ends the comment. ``-- >`` no longer ends the +comment. Support abnormally ended empty comments ``<-->`` and ``<--->``. From 93263d43141a81d369adfcddf325f9a54cb5766d Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Fri, 4 Jul 2025 06:44:37 -0700 Subject: [PATCH 775/989] gh-135773: have pyvenv.cfg without home key anchor a venv and deduce home (#135831) This is still formally undefined behaviour, but we may as well keep the *same* undefined behaviour as previous versions. PEP 796 proposes a cleaner and more consistent replacement for 3.15+ --- Lib/test/test_getpath.py | 21 +++++++++++++++++++++ Modules/getpath.py | 11 ++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_getpath.py b/Lib/test/test_getpath.py index f86df9d0d03485..83f09f3495547a 100644 --- a/Lib/test/test_getpath.py +++ b/Lib/test/test_getpath.py @@ -354,6 +354,27 @@ def test_venv_posix(self): actual = getpath(ns, expected) self.assertEqual(expected, actual) + def test_venv_posix_without_home_key(self): + ns = MockPosixNamespace( + argv0="/venv/bin/python3", + PREFIX="/usr", + ENV_PATH="/usr/bin", + ) + # Setup the bare minimum venv + ns.add_known_xfile("/usr/bin/python3") + ns.add_known_xfile("/venv/bin/python3") + ns.add_known_link("/venv/bin/python3", "/usr/bin/python3") + ns.add_known_file("/venv/pyvenv.cfg", [ + # home = key intentionally omitted + ]) + expected = dict( + executable="/venv/bin/python3", + prefix="/venv", + base_prefix="/usr", + ) + actual = getpath(ns, expected) + self.assertEqual(expected, actual) + def test_venv_changed_name_posix(self): "Test a venv layout on *nix." ns = MockPosixNamespace( diff --git a/Modules/getpath.py b/Modules/getpath.py index be2210345afbda..b89d7427e3febd 100644 --- a/Modules/getpath.py +++ b/Modules/getpath.py @@ -364,10 +364,9 @@ def search_up(prefix, *landmarks, test=isfile): venv_prefix = None pyvenvcfg = [] - # Search for the 'home' key in pyvenv.cfg. Currently, we don't consider the - # presence of a pyvenv.cfg file without a 'home' key to signify the - # existence of a virtual environment — we quietly ignore them. - # XXX: If we don't find a 'home' key, we don't look for another pyvenv.cfg! + # Search for the 'home' key in pyvenv.cfg. If a home key isn't found, + # then it means a venv is active and home is based on the venv's + # executable (if its a symlink, home is where the symlink points). for line in pyvenvcfg: key, had_equ, value = line.partition('=') if had_equ and key.strip().lower() == 'home': @@ -412,10 +411,8 @@ def search_up(prefix, *landmarks, test=isfile): if isfile(candidate): base_executable = candidate break + # home key found; stop iterating over lines break - else: - # We didn't find a 'home' key in pyvenv.cfg (no break), reset venv_prefix. - venv_prefix = None # ****************************************************************************** From ade19880943509945da193202ca89e0b2b6fbd75 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Fri, 4 Jul 2025 11:40:32 -0300 Subject: [PATCH 776/989] Docs: Move "or" outside monospace syntax in `tarfile.rst` (GH-136263) --- Doc/library/tarfile.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 70466fbbc4d8f9..99e8ef7b886035 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -63,8 +63,8 @@ Some facts and figures: +------------------+---------------------------------------------+ | mode | action | +==================+=============================================+ - | ``'r' or 'r:*'`` | Open for reading with transparent | - | | compression (recommended). | + | ``'r'`` or | Open for reading with transparent | + | ``'r:*'`` | compression (recommended). | +------------------+---------------------------------------------+ | ``'r:'`` | Open for reading exclusively without | | | compression. | @@ -98,10 +98,11 @@ Some facts and figures: | | Raise a :exc:`FileExistsError` exception | | | if it already exists. | +------------------+---------------------------------------------+ - | ``'a' or 'a:'`` | Open for appending with no compression. The | - | | file is created if it does not exist. | + | ``'a'`` or | Open for appending with no compression. The | + | ``'a:'`` | file is created if it does not exist. | +------------------+---------------------------------------------+ - | ``'w' or 'w:'`` | Open for uncompressed writing. | + | ``'w'`` or | Open for uncompressed writing. | + | ``'w:'`` | | +------------------+---------------------------------------------+ | ``'w:gz'`` | Open for gzip compressed writing. | +------------------+---------------------------------------------+ From d1d5dce14f90d777608e4403d09079421ff55944 Mon Sep 17 00:00:00 2001 From: William S Fulton Date: Fri, 4 Jul 2025 16:54:00 +0100 Subject: [PATCH 777/989] gh-136288: Fix error message in `_testcapi/vectorcall.c` (GH-136258) Use the %N format specifier instead of %s and `PyType_GetName`. --- Modules/_testcapi/vectorcall.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_testcapi/vectorcall.c b/Modules/_testcapi/vectorcall.c index 03aaacb328e0b6..f89dcb6c4cf03c 100644 --- a/Modules/_testcapi/vectorcall.c +++ b/Modules/_testcapi/vectorcall.c @@ -179,14 +179,14 @@ _testcapi_VectorCallClass_set_vectorcall_impl(PyObject *self, if (!PyObject_TypeCheck(self, type)) { return PyErr_Format( PyExc_TypeError, - "expected %s instance", - PyType_GetName(type)); + "expected %N instance", + type); } if (!type->tp_vectorcall_offset) { return PyErr_Format( PyExc_TypeError, - "type %s has no vectorcall offset", - PyType_GetName(type)); + "type %N has no vectorcall offset", + type); } *(vectorcallfunc*)((char*)self + type->tp_vectorcall_offset) = ( VectorCallClass_vectorcall); From 5de7e3f9739b01ad180fffb242ac57cea930e74d Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sat, 5 Jul 2025 09:14:40 +0300 Subject: [PATCH 778/989] gh-136297: Test all `pickle` protocols in `test_zoneinfo_property.py` (#136298) --- .../test_zoneinfo/test_zoneinfo_property.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo_property.py b/Lib/test/test_zoneinfo/test_zoneinfo_property.py index feaa77f3e7f0b9..294c7e9b27a857 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo_property.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo_property.py @@ -146,20 +146,24 @@ def setUp(self): @add_key_examples def test_pickle_unpickle_cache(self, key): zi = self.klass(key) - pkl_str = pickle.dumps(zi) - zi_rt = pickle.loads(pkl_str) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + pkl_str = pickle.dumps(zi, proto) + zi_rt = pickle.loads(pkl_str) - self.assertIs(zi, zi_rt) + self.assertIs(zi, zi_rt) @hypothesis.given(key=valid_keys()) @add_key_examples def test_pickle_unpickle_no_cache(self, key): zi = self.klass.no_cache(key) - pkl_str = pickle.dumps(zi) - zi_rt = pickle.loads(pkl_str) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + pkl_str = pickle.dumps(zi, proto) + zi_rt = pickle.loads(pkl_str) - self.assertIsNot(zi, zi_rt) - self.assertEqual(str(zi), str(zi_rt)) + self.assertIsNot(zi, zi_rt) + self.assertEqual(str(zi), str(zi_rt)) @hypothesis.given(key=valid_keys()) @add_key_examples From f0c7344a8fbfe67c05981cce67562e7facfae73d Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Sat, 5 Jul 2025 21:29:02 +0800 Subject: [PATCH 779/989] gh-101100: Fix references in `http.cookiejar` docs (GH-136238) --- Doc/library/http.cookiejar.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/http.cookiejar.rst b/Doc/library/http.cookiejar.rst index 23ddecf873876d..251aea891c3f98 100644 --- a/Doc/library/http.cookiejar.rst +++ b/Doc/library/http.cookiejar.rst @@ -570,7 +570,7 @@ Netscape protocol strictness switches: Don't allow setting cookies whose path doesn't path-match request URI. -:attr:`strict_ns_domain` is a collection of flags. Its value is constructed by +:attr:`~DefaultCookiePolicy.strict_ns_domain` is a collection of flags. Its value is constructed by or-ing together (for example, ``DomainStrictNoDots|DomainStrictNonDomain`` means both flags are set). From 5b56daa9d728fa38a1fb6d8a823d795081f067d8 Mon Sep 17 00:00:00 2001 From: Victorien <65306057+Viicos@users.noreply.github.com> Date: Sat, 5 Jul 2025 15:55:39 +0200 Subject: [PATCH 780/989] gh-130870: Preserve `GenericAlias` subclasses in `typing.get_type_hints()` (#131583) --- Lib/test/test_typing.py | 28 +++++++++++++++++-- Lib/typing.py | 26 ++++++++++------- ...-06-10-10-22-18.gh-issue-130870.JipqbO.rst | 2 ++ 3 files changed, 44 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-10-10-22-18.gh-issue-130870.JipqbO.rst diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index ef02e8202fc829..bef6773ad6cb2f 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -1605,7 +1605,10 @@ def func1(*args: *Ts): pass self.assertEqual(gth(func1), {'args': Unpack[Ts]}) def func2(*args: *tuple[int, str]): pass - self.assertEqual(gth(func2), {'args': Unpack[tuple[int, str]]}) + hint = gth(func2)['args'] + self.assertIsInstance(hint, types.GenericAlias) + self.assertEqual(hint.__args__[0], int) + self.assertIs(hint.__unpacked__, True) class CustomVariadic(Generic[*Ts]): pass @@ -1620,7 +1623,10 @@ def func1(*args: '*Ts'): pass {'args': Unpack[Ts]}) def func2(*args: '*tuple[int, str]'): pass - self.assertEqual(gth(func2), {'args': Unpack[tuple[int, str]]}) + hint = gth(func2)['args'] + self.assertIsInstance(hint, types.GenericAlias) + self.assertEqual(hint.__args__[0], int) + self.assertIs(hint.__unpacked__, True) class CustomVariadic(Generic[*Ts]): pass @@ -7114,6 +7120,24 @@ def add_right(self, node: 'Node[T]' = None): right_hints = get_type_hints(t.add_right, globals(), locals()) self.assertEqual(right_hints['node'], Node[T]) + def test_get_type_hints_preserve_generic_alias_subclasses(self): + # https://github.com/python/cpython/issues/130870 + # A real world example of this is `collections.abc.Callable`. When parameterized, + # the result is a subclass of `types.GenericAlias`. + class MyAlias(types.GenericAlias): + pass + + class MyClass: + def __class_getitem__(cls, args): + return MyAlias(cls, args) + + # Using a forward reference is important, otherwise it works as expected. + # `y` tests that the `GenericAlias` subclass is preserved when stripping `Annotated`. + def func(x: MyClass['int'], y: MyClass[Annotated[int, ...]]): ... + + assert isinstance(get_type_hints(func)['x'], MyAlias) + assert isinstance(get_type_hints(func)['y'], MyAlias) + class GetUtilitiesTestCase(TestCase): def test_get_origin(self): diff --git a/Lib/typing.py b/Lib/typing.py index ed1dd4fc6413a5..4ebf0eb92f589f 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -407,6 +407,17 @@ def inner(*args, **kwds): return decorator +def _rebuild_generic_alias(alias: GenericAlias, args: tuple[object, ...]) -> GenericAlias: + is_unpacked = alias.__unpacked__ + if _should_unflatten_callable_args(alias, args): + t = alias.__origin__[(args[:-1], args[-1])] + else: + t = alias.__origin__[args] + if is_unpacked: + t = Unpack[t] + return t + + def _deprecation_warning_for_no_type_params_passed(funcname: str) -> None: import warnings @@ -454,25 +465,20 @@ def _eval_type(t, globalns, localns, type_params=_sentinel, *, recursive_guard=f _make_forward_ref(arg) if isinstance(arg, str) else arg for arg in t.__args__ ) - is_unpacked = t.__unpacked__ - if _should_unflatten_callable_args(t, args): - t = t.__origin__[(args[:-1], args[-1])] - else: - t = t.__origin__[args] - if is_unpacked: - t = Unpack[t] + else: + args = t.__args__ ev_args = tuple( _eval_type( a, globalns, localns, type_params, recursive_guard=recursive_guard, format=format, owner=owner, ) - for a in t.__args__ + for a in args ) if ev_args == t.__args__: return t if isinstance(t, GenericAlias): - return GenericAlias(t.__origin__, ev_args) + return _rebuild_generic_alias(t, ev_args) if isinstance(t, Union): return functools.reduce(operator.or_, ev_args) else: @@ -2404,7 +2410,7 @@ def _strip_annotations(t): stripped_args = tuple(_strip_annotations(a) for a in t.__args__) if stripped_args == t.__args__: return t - return GenericAlias(t.__origin__, stripped_args) + return _rebuild_generic_alias(t, stripped_args) if isinstance(t, Union): stripped_args = tuple(_strip_annotations(a) for a in t.__args__) if stripped_args == t.__args__: diff --git a/Misc/NEWS.d/next/Library/2025-06-10-10-22-18.gh-issue-130870.JipqbO.rst b/Misc/NEWS.d/next/Library/2025-06-10-10-22-18.gh-issue-130870.JipqbO.rst new file mode 100644 index 00000000000000..64173285e08417 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-10-10-22-18.gh-issue-130870.JipqbO.rst @@ -0,0 +1,2 @@ +Preserve :class:`types.GenericAlias` subclasses in +:func:`typing.get_type_hints` From 887e5c8646dfc6dd3d64b482c5310a414ac9162b Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Sat, 5 Jul 2025 23:24:33 +0900 Subject: [PATCH 781/989] gh-136047: Allow typing._allow_reckless_class_checks to check `_py_abc` (#136115) --- Lib/typing.py | 4 +++- .../Library/2025-07-05-06-59-46.gh-issue-136047.qWvycf.rst | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-05-06-59-46.gh-issue-136047.qWvycf.rst diff --git a/Lib/typing.py b/Lib/typing.py index 4ebf0eb92f589f..27105838a0a064 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -1866,7 +1866,9 @@ def _allow_reckless_class_checks(depth=2): The abc and functools modules indiscriminately call isinstance() and issubclass() on the whole MRO of a user class, which may contain protocols. """ - return _caller(depth) in {'abc', 'functools', None} + # gh-136047: When `_abc` module is not available, `_py_abc` is required to + # allow `_py_abc.ABCMeta` fallback. + return _caller(depth) in {'abc', '_py_abc', 'functools', None} _PROTO_ALLOWLIST = { diff --git a/Misc/NEWS.d/next/Library/2025-07-05-06-59-46.gh-issue-136047.qWvycf.rst b/Misc/NEWS.d/next/Library/2025-07-05-06-59-46.gh-issue-136047.qWvycf.rst new file mode 100644 index 00000000000000..1a381860914bc3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-05-06-59-46.gh-issue-136047.qWvycf.rst @@ -0,0 +1,2 @@ +Fix issues with :mod:`typing` when the C implementation of :mod:`abc` is not +available. From 5dac137b9f75c5c1d5096101bcd33d565d0526e4 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Sat, 5 Jul 2025 10:32:28 -0700 Subject: [PATCH 782/989] gh-136315: Fix skipped multithreading test in test_zstd (#136320) Fix skipped test in test_zstd --- Lib/test/test_zstd.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index d4c28aed38ef90..90b2adc9665480 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -62,15 +62,18 @@ TRAINED_DICT = None -SUPPORT_MULTITHREADING = False +# Cannot be deferred to setup as it is used to check whether or not to skip +# tests +try: + SUPPORT_MULTITHREADING = CompressionParameter.nb_workers.bounds() != (0, 0) +except Exception: + SUPPORT_MULTITHREADING = False C_INT_MIN = -(2**31) C_INT_MAX = (2**31) - 1 def setUpModule(): - global SUPPORT_MULTITHREADING - SUPPORT_MULTITHREADING = CompressionParameter.nb_workers.bounds() != (0, 0) # uncompressed size 130KB, more than a zstd block. # with a frame epilogue, 4 bytes checksum. global DAT_130K_D From 1953713d0d67a4f54ff75bf8449895a2f08cc750 Mon Sep 17 00:00:00 2001 From: "W. H. Wang" Date: Sun, 6 Jul 2025 06:54:26 +0800 Subject: [PATCH 783/989] gh-136032: Fix `argparse.BooleanOptionalAction` doc (#136133) --- Doc/library/argparse.rst | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index a03d88092dbf1f..f189f6b8fa8953 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -839,23 +839,11 @@ how the command-line arguments should be handled. The supplied actions are: >>> parser.parse_args(['--version']) PROG 2.0 -Only actions that consume command-line arguments (e.g. ``'store'``, -``'append'`` or ``'extend'``) can be used with positional arguments. - -.. class:: BooleanOptionalAction - - You may also specify an arbitrary action by passing an :class:`Action` subclass or - other object that implements the same interface. The :class:`!BooleanOptionalAction` - is available in :mod:`!argparse` and adds support for boolean actions such as - ``--foo`` and ``--no-foo``:: - - >>> import argparse - >>> parser = argparse.ArgumentParser() - >>> parser.add_argument('--foo', action=argparse.BooleanOptionalAction) - >>> parser.parse_args(['--no-foo']) - Namespace(foo=False) - - .. versionadded:: 3.9 +You may also specify an arbitrary action by passing an :class:`Action` subclass +(e.g. :class:`BooleanOptionalAction`) or other object that implements the same +interface. Only actions that consume command-line arguments (e.g. ``'store'``, +``'append'``, ``'extend'``, or custom actions with non-zero ``nargs``) can be used +with positional arguments. The recommended way to create a custom action is to extend :class:`Action`, overriding the :meth:`!__call__` method and optionally the :meth:`!__init__` and @@ -1429,6 +1417,21 @@ this API may be passed as the ``action`` parameter to and return a string which will be used when printing the usage of the program. If such method is not provided, a sensible default will be used. +.. class:: BooleanOptionalAction + + A subclass of :class:`Action` for handling boolean flags with positive + and negative options. Adding a single argument such as ``--foo`` automatically + creates both ``--foo`` and ``--no-foo`` options, storing ``True`` and ``False`` + respectively:: + + >>> import argparse + >>> parser = argparse.ArgumentParser() + >>> parser.add_argument('--foo', action=argparse.BooleanOptionalAction) + >>> parser.parse_args(['--no-foo']) + Namespace(foo=False) + + .. versionadded:: 3.9 + The parse_args() method ----------------------- From 06e347b84648f3f8e144e8f70671d610da082b77 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sun, 6 Jul 2025 10:35:30 +0300 Subject: [PATCH 784/989] gh-136285: Improve `pickle` protocol testing in `test_interpreters` (#136286) --- Lib/concurrent/interpreters/__init__.py | 8 ++------ Lib/concurrent/interpreters/_queues.py | 8 ++------ Lib/test/support/channels.py | 8 ++------ Lib/test/test_interpreters/test_api.py | 8 +++++--- Lib/test/test_interpreters/test_channels.py | 16 ++++++++++------ Lib/test/test_interpreters/test_queues.py | 8 +++++--- ...025-07-05-09-45-04.gh-issue-136286.N67Amr.rst | 2 ++ 7 files changed, 28 insertions(+), 30 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-05-09-45-04.gh-issue-136286.N67Amr.rst diff --git a/Lib/concurrent/interpreters/__init__.py b/Lib/concurrent/interpreters/__init__.py index 0fd661249a276c..aa46a2b37a48d5 100644 --- a/Lib/concurrent/interpreters/__init__.py +++ b/Lib/concurrent/interpreters/__init__.py @@ -146,12 +146,8 @@ def __del__(self): self._decref() # for pickling: - def __getnewargs__(self): - return (self._id,) - - # for pickling: - def __getstate__(self): - return None + def __reduce__(self): + return (type(self), (self._id,)) def _decref(self): if not self._ownsref: diff --git a/Lib/concurrent/interpreters/_queues.py b/Lib/concurrent/interpreters/_queues.py index 99987f2f6926b0..9c12b2c8c24664 100644 --- a/Lib/concurrent/interpreters/_queues.py +++ b/Lib/concurrent/interpreters/_queues.py @@ -129,12 +129,8 @@ def __hash__(self): return hash(self._id) # for pickling: - def __getnewargs__(self): - return (self._id,) - - # for pickling: - def __getstate__(self): - return None + def __reduce__(self): + return (type(self), (self._id,)) def _set_unbound(self, op, items=None): assert not hasattr(self, '_unbound') diff --git a/Lib/test/support/channels.py b/Lib/test/support/channels.py index b2de24d9d3e534..fab1797659b312 100644 --- a/Lib/test/support/channels.py +++ b/Lib/test/support/channels.py @@ -105,12 +105,8 @@ def __eq__(self, other): return other._id == self._id # for pickling: - def __getnewargs__(self): - return (int(self._id),) - - # for pickling: - def __getstate__(self): - return None + def __reduce__(self): + return (type(self), (int(self._id),)) @property def id(self): diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 0ee4582b5d1568..a34b20beaca7a3 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -412,9 +412,11 @@ def test_equality(self): def test_pickle(self): interp = interpreters.create() - data = pickle.dumps(interp) - unpickled = pickle.loads(data) - self.assertEqual(unpickled, interp) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=protocol): + data = pickle.dumps(interp, protocol) + unpickled = pickle.loads(data) + self.assertEqual(unpickled, interp) class TestInterpreterIsRunning(TestBase): diff --git a/Lib/test/test_interpreters/test_channels.py b/Lib/test/test_interpreters/test_channels.py index 109ddf344539ad..52827357078b85 100644 --- a/Lib/test/test_interpreters/test_channels.py +++ b/Lib/test/test_interpreters/test_channels.py @@ -121,9 +121,11 @@ def test_equality(self): def test_pickle(self): ch, _ = channels.create() - data = pickle.dumps(ch) - unpickled = pickle.loads(data) - self.assertEqual(unpickled, ch) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=protocol): + data = pickle.dumps(ch, protocol) + unpickled = pickle.loads(data) + self.assertEqual(unpickled, ch) class TestSendChannelAttrs(TestBase): @@ -152,9 +154,11 @@ def test_equality(self): def test_pickle(self): _, ch = channels.create() - data = pickle.dumps(ch) - unpickled = pickle.loads(data) - self.assertEqual(unpickled, ch) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=protocol): + data = pickle.dumps(ch, protocol) + unpickled = pickle.loads(data) + self.assertEqual(unpickled, ch) class TestSendRecv(TestBase): diff --git a/Lib/test/test_interpreters/test_queues.py b/Lib/test/test_interpreters/test_queues.py index cb17340f581b0a..5451c6654acb47 100644 --- a/Lib/test/test_interpreters/test_queues.py +++ b/Lib/test/test_interpreters/test_queues.py @@ -188,9 +188,11 @@ def test_equality(self): def test_pickle(self): queue = queues.create() - data = pickle.dumps(queue) - unpickled = pickle.loads(data) - self.assertEqual(unpickled, queue) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=protocol): + data = pickle.dumps(queue, protocol) + unpickled = pickle.loads(data) + self.assertEqual(unpickled, queue) class TestQueueOps(TestBase): diff --git a/Misc/NEWS.d/next/Library/2025-07-05-09-45-04.gh-issue-136286.N67Amr.rst b/Misc/NEWS.d/next/Library/2025-07-05-09-45-04.gh-issue-136286.N67Amr.rst new file mode 100644 index 00000000000000..0a0d66ac0b8abf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-05-09-45-04.gh-issue-136286.N67Amr.rst @@ -0,0 +1,2 @@ +Fix pickling failures for protocols 0 and 1 for many objects realted to +subinterpreters. From d22e073d2b49313bbf42d40cbe74afa2b69385df Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sun, 6 Jul 2025 20:48:11 +0530 Subject: [PATCH 785/989] gh-109700: fix memory error handling in `PyDict_SetDefault` (#136338) --- .../2025-07-06-14-53-19.gh-issue-109700.KVNQQi.rst | 1 + Objects/dictobject.c | 1 + 2 files changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-06-14-53-19.gh-issue-109700.KVNQQi.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-06-14-53-19.gh-issue-109700.KVNQQi.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-06-14-53-19.gh-issue-109700.KVNQQi.rst new file mode 100644 index 00000000000000..a37f4a51050947 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-06-14-53-19.gh-issue-109700.KVNQQi.rst @@ -0,0 +1 @@ +Fix memory error handling in :c:func:`PyDict_SetDefault`. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 6b7b150f0e2f2c..be62ae5eefd00d 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -4411,6 +4411,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu if (result) { *result = NULL; } + return -1; } STORE_USED(mp, mp->ma_used + 1); From 77a8bd29da99e7d4fa8e7f07c4063977c2bb14d3 Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 7 Jul 2025 03:26:26 +0900 Subject: [PATCH 786/989] gh-109070: Document that get_context in multiprocessing have side effect (#136341) Document that get_context in multiprocessing have side effect --- Doc/library/multiprocessing.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index fc3c1134f97c85..546876bd925db0 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -1118,7 +1118,9 @@ Miscellaneous Return a context object which has the same attributes as the :mod:`multiprocessing` module. - If *method* is ``None`` then the default context is returned. + If *method* is ``None`` then the default context is returned. Note that if + the global start method has not been set, this will set it to the + default method. Otherwise *method* should be ``'fork'``, ``'spawn'``, ``'forkserver'``. :exc:`ValueError` is raised if the specified start method is not available. See :ref:`multiprocessing-start-methods`. @@ -1129,10 +1131,10 @@ Miscellaneous Return the name of start method used for starting processes. - If the start method has not been fixed and *allow_none* is false, - then the start method is fixed to the default and the name is - returned. If the start method has not been fixed and *allow_none* - is true then ``None`` is returned. + If the global start method has not been set and *allow_none* is + ``False``, then the start method is set to the default and the name + is returned. If the start method has not been set and *allow_none* is + ``True`` then ``None`` is returned. The return value can be ``'fork'``, ``'spawn'``, ``'forkserver'`` or ``None``. See :ref:`multiprocessing-start-methods`. From c89f76e6c4ca9b0200d5cc8cf0a675a76de50ba8 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sun, 6 Jul 2025 22:11:13 +0300 Subject: [PATCH 787/989] gh-136021: Make `type_params` a required parameter for `typing._eval_type` (#136332) --- Lib/test/test_typing.py | 25 ------------------- Lib/typing.py | 8 +----- ...-07-06-10-18-48.gh-issue-136021.f-FJYT.rst | 3 +++ 3 files changed, 4 insertions(+), 32 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-06-10-18-48.gh-issue-136021.f-FJYT.rst diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index bef6773ad6cb2f..932c7b9c0a526b 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -6309,31 +6309,6 @@ def foo(a: 'whatevers') -> {}: class InternalsTests(BaseTestCase): - def test_deprecation_for_no_type_params_passed_to__evaluate(self): - with self.assertWarnsRegex( - DeprecationWarning, - ( - "Failing to pass a value to the 'type_params' parameter " - "of 'typing._eval_type' is deprecated" - ) - ) as cm: - self.assertEqual(typing._eval_type(list["int"], globals(), {}), list[int]) - - self.assertEqual(cm.filename, __file__) - - f = ForwardRef("int") - - with self.assertWarnsRegex( - DeprecationWarning, - ( - "Failing to pass a value to the 'type_params' parameter " - "of 'typing.ForwardRef._evaluate' is deprecated" - ) - ) as cm: - self.assertIs(f._evaluate(globals(), {}, recursive_guard=frozenset()), int) - - self.assertEqual(cm.filename, __file__) - def test_collect_parameters(self): typing = import_helper.import_fresh_module("typing") with self.assertWarnsRegex( diff --git a/Lib/typing.py b/Lib/typing.py index 27105838a0a064..3ef377b954205f 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -437,10 +437,7 @@ def __repr__(self): return '' -_sentinel = _Sentinel() - - -def _eval_type(t, globalns, localns, type_params=_sentinel, *, recursive_guard=frozenset(), +def _eval_type(t, globalns, localns, type_params, *, recursive_guard=frozenset(), format=None, owner=None): """Evaluate all forward references in the given type t. @@ -448,9 +445,6 @@ def _eval_type(t, globalns, localns, type_params=_sentinel, *, recursive_guard=f recursive_guard is used to prevent infinite recursion with a recursive ForwardRef. """ - if type_params is _sentinel: - _deprecation_warning_for_no_type_params_passed("typing._eval_type") - type_params = () if isinstance(t, _lazy_annotationlib.ForwardRef): # If the forward_ref has __forward_module__ set, evaluate() infers the globals # from the module, and it will probably pick better than the globals we have here. diff --git a/Misc/NEWS.d/next/Library/2025-07-06-10-18-48.gh-issue-136021.f-FJYT.rst b/Misc/NEWS.d/next/Library/2025-07-06-10-18-48.gh-issue-136021.f-FJYT.rst new file mode 100644 index 00000000000000..39a848c11eb8a1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-06-10-18-48.gh-issue-136021.f-FJYT.rst @@ -0,0 +1,3 @@ +Make ``type_params`` parameter required in :func:`!typing._eval_type` after +a deprecation period for not providing this parameter. Also remove the +:exc:`DeprecationWarning` for the old behavior. From 9312702d2e12c2f58f02bfa02877d0ec790d06bd Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sun, 6 Jul 2025 16:44:20 -0700 Subject: [PATCH 788/989] gh-136316: Make typing.evaluate_forward_ref better at evaluating nested forwardrefs (#136319) --- Lib/test/test_typing.py | 6 ++++++ Lib/test/typinganndata/fwdref_module.py | 6 ++++++ Lib/typing.py | 12 +++++++++--- .../2025-07-05-06-56-16.gh-issue-136316.3zj_Do.rst | 2 ++ 4 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 Lib/test/typinganndata/fwdref_module.py create mode 100644 Misc/NEWS.d/next/Library/2025-07-05-06-56-16.gh-issue-136316.3zj_Do.rst diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 932c7b9c0a526b..b1615bbff383c2 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -7326,6 +7326,12 @@ def test_partial_evaluation(self): list[EqualToForwardRef('A')], ) + def test_with_module(self): + from test.typinganndata import fwdref_module + + typing.evaluate_forward_ref( + fwdref_module.fw,) + class CollectionsAbcTests(BaseTestCase): diff --git a/Lib/test/typinganndata/fwdref_module.py b/Lib/test/typinganndata/fwdref_module.py new file mode 100644 index 00000000000000..7347a7a42455c2 --- /dev/null +++ b/Lib/test/typinganndata/fwdref_module.py @@ -0,0 +1,6 @@ +from typing import ForwardRef + +MyList = list[int] +MyDict = dict[str, 'MyList'] + +fw = ForwardRef('MyDict', module=__name__) diff --git a/Lib/typing.py b/Lib/typing.py index 3ef377b954205f..f1455c273d31ca 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -438,7 +438,7 @@ def __repr__(self): def _eval_type(t, globalns, localns, type_params, *, recursive_guard=frozenset(), - format=None, owner=None): + format=None, owner=None, parent_fwdref=None): """Evaluate all forward references in the given type t. For use of globalns and localns see the docstring for get_type_hints(). @@ -456,7 +456,7 @@ def _eval_type(t, globalns, localns, type_params, *, recursive_guard=frozenset() if isinstance(t, (_GenericAlias, GenericAlias, Union)): if isinstance(t, GenericAlias): args = tuple( - _make_forward_ref(arg) if isinstance(arg, str) else arg + _make_forward_ref(arg, parent_fwdref=parent_fwdref) if isinstance(arg, str) else arg for arg in t.__args__ ) else: @@ -936,7 +936,12 @@ def run(arg: Child | Unrelated): return _GenericAlias(self, (item,)) -def _make_forward_ref(code, **kwargs): +def _make_forward_ref(code, *, parent_fwdref=None, **kwargs): + if parent_fwdref is not None: + if parent_fwdref.__forward_module__ is not None: + kwargs['module'] = parent_fwdref.__forward_module__ + if parent_fwdref.__owner__ is not None: + kwargs['owner'] = parent_fwdref.__owner__ forward_ref = _lazy_annotationlib.ForwardRef(code, **kwargs) # For compatibility, eagerly compile the forwardref's code. forward_ref.__forward_code__ @@ -1001,6 +1006,7 @@ def evaluate_forward_ref( recursive_guard=_recursive_guard | {forward_ref.__forward_arg__}, format=format, owner=owner, + parent_fwdref=forward_ref, ) diff --git a/Misc/NEWS.d/next/Library/2025-07-05-06-56-16.gh-issue-136316.3zj_Do.rst b/Misc/NEWS.d/next/Library/2025-07-05-06-56-16.gh-issue-136316.3zj_Do.rst new file mode 100644 index 00000000000000..dd5cecdf3a1209 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-05-06-56-16.gh-issue-136316.3zj_Do.rst @@ -0,0 +1,2 @@ +Improve support for evaluating nested forward references in +:func:`typing.evaluate_forward_ref`. From 85b817da94cf911a964d42e81a57e1de9ab71ef9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 7 Jul 2025 09:29:00 +0300 Subject: [PATCH 789/989] gh-136289: Fix test_sqlite3 on platforms with strict UTF-8 filesystem (GH-136326) --- Lib/test/test_sqlite3/test_dbapi.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_sqlite3/test_dbapi.py b/Lib/test/test_sqlite3/test_dbapi.py index 291e035625397d..3602726437d8cf 100644 --- a/Lib/test/test_sqlite3/test_dbapi.py +++ b/Lib/test/test_sqlite3/test_dbapi.py @@ -31,8 +31,7 @@ import warnings from test.support import ( - SHORT_TIMEOUT, check_disallow_instantiation, requires_subprocess, - is_apple, is_emscripten, is_wasi + SHORT_TIMEOUT, check_disallow_instantiation, requires_subprocess ) from test.support import gc_collect from test.support import threading_helper, import_helper @@ -641,14 +640,21 @@ def test_open_with_path_like_object(self): self.assertTrue(os.path.exists(path)) cx.execute(self._sql) + def get_undecodable_path(self): + path = TESTFN_UNDECODABLE + if not path: + self.skipTest("only works if there are undecodable paths") + try: + open(path, 'wb').close() + except OSError: + self.skipTest(f"can't create file with undecodable path {path!r}") + unlink(path) + return path + @unittest.skipIf(sys.platform == "win32", "skipped on Windows") - @unittest.skipIf(is_apple, "skipped on Apple platforms") - @unittest.skipIf(is_emscripten or is_wasi, "not supported on Emscripten/WASI") - @unittest.skipUnless(TESTFN_UNDECODABLE, "only works if there are undecodable paths") def test_open_with_undecodable_path(self): - path = TESTFN_UNDECODABLE + path = self.get_undecodable_path() self.addCleanup(unlink, path) - self.assertFalse(os.path.exists(path)) with contextlib.closing(sqlite.connect(path)) as cx: self.assertTrue(os.path.exists(path)) cx.execute(self._sql) @@ -688,14 +694,10 @@ def test_open_uri_readonly(self): cx.execute(self._sql) @unittest.skipIf(sys.platform == "win32", "skipped on Windows") - @unittest.skipIf(is_apple, "skipped on Apple platforms") - @unittest.skipIf(is_emscripten or is_wasi, "not supported on Emscripten/WASI") - @unittest.skipUnless(TESTFN_UNDECODABLE, "only works if there are undecodable paths") def test_open_undecodable_uri(self): - path = TESTFN_UNDECODABLE + path = self.get_undecodable_path() self.addCleanup(unlink, path) uri = "file:" + urllib.parse.quote(path) - self.assertFalse(os.path.exists(path)) with contextlib.closing(sqlite.connect(uri, uri=True)) as cx: self.assertTrue(os.path.exists(path)) cx.execute(self._sql) From 0c3e3da19570424649c33c0c2c29dc12541935e7 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 7 Jul 2025 12:45:22 +0530 Subject: [PATCH 790/989] gh-109700: fix interpreter finalization while handling memory error (#136342) --- Python/pylifecycle.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 724fda63511282..00e8d030765560 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1702,8 +1702,10 @@ finalize_modules(PyThreadState *tstate) #endif // Stop watching __builtin__ modifications - PyDict_Unwatch(0, interp->builtins); - + if (PyDict_Unwatch(0, interp->builtins) < 0) { + // might happen if interp is cleared before watching the __builtin__ + PyErr_Clear(); + } PyObject *modules = _PyImport_GetModules(interp); if (modules == NULL) { // Already done @@ -2377,15 +2379,13 @@ new_interpreter(PyThreadState **tstate_p, error: *tstate_p = NULL; if (tstate != NULL) { - PyThreadState_Clear(tstate); - _PyThreadState_Detach(tstate); - PyThreadState_Delete(tstate); + Py_EndInterpreter(tstate); + } else { + PyInterpreterState_Delete(interp); } if (save_tstate != NULL) { _PyThreadState_Attach(save_tstate); } - PyInterpreterState_Delete(interp); - return status; } From 90a5b4402bc6b8f44a9fe9a5299721b1887b50f8 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 7 Jul 2025 11:16:27 +0300 Subject: [PATCH 791/989] gh-87790: support thousands separators for formatting fractional part of Decimal (#132202) Co-authored-by: Serhiy Storchaka --- Lib/_pydecimal.py | 14 +++++++++++++- Lib/test/test_decimal.py | 13 +++++++++++++ .../2025-04-07-09-53-54.gh-issue-87790.6nj3zQ.rst | 2 ++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-07-09-53-54.gh-issue-87790.6nj3zQ.rst diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index 781b38ec26ba33..9b8e42a2342536 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -6122,7 +6122,11 @@ def _convert_for_comparison(self, other, equality_op=False): (?P0)? (?P\d+)? (?P[,_])? -(?:\.(?P\d+))? +(?:\. + (?=[\d,_]) # lookahead for digit or separator + (?P\d+)? + (?P[,_])? +)? (?P[eEfFgGn%])? \z """, re.VERBOSE|re.DOTALL) @@ -6215,6 +6219,9 @@ def _parse_format_specifier(format_spec, _localeconv=None): format_dict['grouping'] = [3, 0] format_dict['decimal_point'] = '.' + if format_dict['frac_separators'] is None: + format_dict['frac_separators'] = '' + return format_dict def _format_align(sign, body, spec): @@ -6334,6 +6341,11 @@ def _format_number(is_negative, intpart, fracpart, exp, spec): sign = _format_sign(is_negative, spec) + frac_sep = spec['frac_separators'] + if fracpart and frac_sep: + fracpart = frac_sep.join(fracpart[pos:pos + 3] + for pos in range(0, len(fracpart), 3)) + if fracpart or spec['alt']: fracpart = spec['decimal_point'] + fracpart diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index ef64b878805d77..08a8f4c3b36bd6 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -1089,6 +1089,15 @@ def test_formatting(self): ('07_', '1234.56', '1_234.56'), ('_', '1.23456789', '1.23456789'), ('_%', '123.456789', '12_345.6789%'), + # and now for something completely different... + ('.,', '1.23456789', '1.234,567,89'), + ('._', '1.23456789', '1.234_567_89'), + ('.6_f', '12345.23456789', '12345.234_568'), + (',._%', '123.456789', '12,345.678_9%'), + (',._e', '123456', '1.234_56e+5'), + (',.4_e', '123456', '1.234_6e+5'), + (',.3_e', '123456', '1.235e+5'), + (',._E', '123456', '1.234_56E+5'), # negative zero: default behavior ('.1f', '-0', '-0.0'), @@ -1162,6 +1171,10 @@ def test_formatting(self): # bytes format argument self.assertRaises(TypeError, Decimal(1).__format__, b'-020') + # precision or fractional part separator should follow after dot + self.assertRaises(ValueError, format, Decimal(1), '.f') + self.assertRaises(ValueError, format, Decimal(1), '._6f') + def test_negative_zero_format_directed_rounding(self): with self.decimal.localcontext() as ctx: ctx.rounding = ROUND_CEILING diff --git a/Misc/NEWS.d/next/Library/2025-04-07-09-53-54.gh-issue-87790.6nj3zQ.rst b/Misc/NEWS.d/next/Library/2025-04-07-09-53-54.gh-issue-87790.6nj3zQ.rst new file mode 100644 index 00000000000000..cf80c71271bbd1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-07-09-53-54.gh-issue-87790.6nj3zQ.rst @@ -0,0 +1,2 @@ +Support underscore and comma as thousands separators in the fractional part +for :class:`~decimal.Decimal`'s formatting. Patch by Sergey B Kirpichev. From 3e849d75f400569cbf3c29c356061c788284b71e Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 7 Jul 2025 11:16:31 +0300 Subject: [PATCH 792/989] gh-87790: support thousands separators for formatting fractional part of Fraction (#132204) --- Lib/fractions.py | 10 +++++++++- Lib/test/test_fractions.py | 13 +++++++++++++ .../2025-04-07-10-20-16.gh-issue-87790.X2SjJe.rst | 2 ++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-07-10-20-16.gh-issue-87790.X2SjJe.rst diff --git a/Lib/fractions.py b/Lib/fractions.py index a8c670685228d3..c1b12e7a1c091c 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -170,7 +170,11 @@ def _round_to_figures(n, d, figures): (?P0(?=[0-9]))? (?P[0-9]+)? (?P[,_])? - (?:\.(?P[0-9]+))? + (?:\. + (?=[,_0-9]) # lookahead for digit or separator + (?P[0-9]+)? + (?P[,_])? + )? (?P[eEfFgG%]) """, re.DOTALL | re.VERBOSE).fullmatch @@ -499,6 +503,7 @@ def _format_float_style(self, match): minimumwidth = int(match["minimumwidth"] or "0") thousands_sep = match["thousands_sep"] precision = int(match["precision"] or "6") + frac_sep = match["frac_separators"] or "" presentation_type = match["presentation_type"] trim_zeros = presentation_type in "gG" and not alternate_form trim_point = not alternate_form @@ -555,6 +560,9 @@ def _format_float_style(self, match): if trim_zeros: frac_part = frac_part.rstrip("0") separator = "" if trim_point and not frac_part else "." + if frac_sep: + frac_part = frac_sep.join(frac_part[pos:pos + 3] + for pos in range(0, len(frac_part), 3)) trailing = separator + frac_part + suffix # Do zero padding if required. diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index 1875a2f529c957..cf42b86358dbca 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -1322,6 +1322,8 @@ def test_format_e_presentation_type(self): # Thousands separators (F('1234567.123456'), ',.5e', '1.23457e+06'), (F('123.123456'), '012_.2e', '0_001.23e+02'), + # Thousands separators for fractional part (or for integral too) + (F('1234567.123456'), '.5_e', '1.234_57e+06'), # z flag is legal, but never makes a difference to the output (F(-1, 7**100), 'z.6e', '-3.091690e-85'), ] @@ -1447,6 +1449,12 @@ def test_format_f_presentation_type(self): (F('1234567'), ',.2f', '1,234,567.00'), (F('12345678'), ',.2f', '12,345,678.00'), (F('12345678'), ',f', '12,345,678.000000'), + # Thousands separators for fractional part (or for integral too) + (F('123456.789123123'), '._f', '123456.789_123'), + (F('123456.789123123'), '.7_f', '123456.789_123_1'), + (F('123456.789123123'), '.9_f', '123456.789_123_123'), + (F('123456.789123123'), '.,f', '123456.789,123'), + (F('123456.789123123'), '_.,f', '123_456.789,123'), # Underscore as thousands separator (F(2, 3), '_.2f', '0.67'), (F(2, 3), '_.7f', '0.6666667'), @@ -1620,6 +1628,11 @@ def test_invalid_formats(self): '.f', '.g', '.%', + # Thousands separators before precision + '._6e', + '._6f', + '._6g', + '._6%', # Z instead of z for negative zero suppression 'Z.2f' # z flag not supported for general formatting diff --git a/Misc/NEWS.d/next/Library/2025-04-07-10-20-16.gh-issue-87790.X2SjJe.rst b/Misc/NEWS.d/next/Library/2025-04-07-10-20-16.gh-issue-87790.X2SjJe.rst new file mode 100644 index 00000000000000..be2a30d69cab45 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-07-10-20-16.gh-issue-87790.X2SjJe.rst @@ -0,0 +1,2 @@ +Support underscore and comma as thousands separators in the fractional part +for :class:`~fractions.Fraction`'s formatting. Patch by Sergey B Kirpichev. From d05423a90ce0ee9ad5207dce3dd06ab2397f3d6e Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Mon, 7 Jul 2025 10:01:03 +0100 Subject: [PATCH 793/989] gh-94503: Update logging cookbook with an example of uniformly handling newlines in output. (GH-136217) --- Doc/howto/logging-cookbook.rst | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index 7d64a02358adb3..ae2697fbce30ad 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -4078,6 +4078,68 @@ lines. With this approach, you get better output: WARNING:demo: 1/0 WARNING:demo:ZeroDivisionError: division by zero +How to uniformly handle newlines in logging output +-------------------------------------------------- + +Usually, messages that are logged (say to console or file) consist of a single +line of text. However, sometimes there is a need to handle messages with +multiple lines - whether because a logging format string contains newlines, or +logged data contains newlines. If you want to handle such messages uniformly, so +that each line in the logged message appears uniformly formatted as if it was +logged separately, you can do this using a handler mixin, as in the following +snippet: + +.. code-block:: python + + # Assume this is in a module mymixins.py + import copy + + class MultilineMixin: + def emit(self, record): + s = record.getMessage() + if '\n' not in s: + super().emit(record) + else: + lines = s.splitlines() + rec = copy.copy(record) + rec.args = None + for line in lines: + rec.msg = line + super().emit(rec) + +You can use the mixin as in the following script: + +.. code-block:: python + + import logging + + from mymixins import MultilineMixin + + logger = logging.getLogger(__name__) + + class StreamHandler(MultilineMixin, logging.StreamHandler): + pass + + if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)-9s %(message)s', + handlers = [StreamHandler()]) + logger.debug('Single line') + logger.debug('Multiple lines:\nfool me once ...') + logger.debug('Another single line') + logger.debug('Multiple lines:\n%s', 'fool me ...\ncan\'t get fooled again') + +The script, when run, prints something like: + +.. code-block:: text + + 2025-07-02 13:54:47,234 DEBUG Single line + 2025-07-02 13:54:47,234 DEBUG Multiple lines: + 2025-07-02 13:54:47,234 DEBUG fool me once ... + 2025-07-02 13:54:47,234 DEBUG Another single line + 2025-07-02 13:54:47,234 DEBUG Multiple lines: + 2025-07-02 13:54:47,234 DEBUG fool me ... + 2025-07-02 13:54:47,234 DEBUG can't get fooled again + .. patterns-to-avoid: From cb99d992774b67761441e122965ed056bac09241 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 7 Jul 2025 11:03:07 +0200 Subject: [PATCH 794/989] gh-127502: Remove XML vulnerability table (GH-135294) * Remove the table * Replace warnings with notes Latest releases of Python 3.9-3.15 include expat 2.7.1 which is not vulnerable. expat 2.6.0 was released in February 2024. --- Doc/library/pyexpat.rst | 7 ++- Doc/library/security_warnings.rst | 2 +- Doc/library/xml.dom.minidom.rst | 7 ++- Doc/library/xml.dom.pulldom.rst | 7 ++- Doc/library/xml.etree.elementtree.rst | 7 ++- Doc/library/xml.rst | 76 +++++---------------------- Doc/library/xml.sax.rst | 7 ++- Doc/library/xmlrpc.client.rst | 4 +- Doc/library/xmlrpc.server.rst | 4 +- 9 files changed, 32 insertions(+), 89 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 2d57cff10a9278..5506ac828e5abe 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -16,11 +16,10 @@ references to these attributes should be marked using the :member: role. -.. warning:: +.. note:: - The :mod:`pyexpat` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. .. index:: single: Expat diff --git a/Doc/library/security_warnings.rst b/Doc/library/security_warnings.rst index a573c98f73eb0a..70c359cc1c0fc3 100644 --- a/Doc/library/security_warnings.rst +++ b/Doc/library/security_warnings.rst @@ -28,7 +28,7 @@ The following modules have specific security considerations: ` * :mod:`tempfile`: :ref:`mktemp is deprecated due to vulnerability to race conditions ` -* :mod:`xml`: :ref:`XML vulnerabilities ` +* :mod:`xml`: :ref:`XML security ` * :mod:`zipfile`: :ref:`maliciously prepared .zip files can cause disk volume exhaustion ` diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst index 00a18751207e7a..9ffedf7366a7b8 100644 --- a/Doc/library/xml.dom.minidom.rst +++ b/Doc/library/xml.dom.minidom.rst @@ -19,11 +19,10 @@ not already proficient with the DOM should consider using the :mod:`xml.etree.ElementTree` module for their XML processing instead. -.. warning:: +.. note:: - The :mod:`xml.dom.minidom` module is not secure against - maliciously constructed data. If you need to parse untrusted or - unauthenticated data see :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. DOM applications typically start by parsing some XML into a DOM. With diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index fd96765cbe3c96..8bceeecd46393e 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -19,11 +19,10 @@ responsible for explicitly pulling events from the stream, looping over those events until either processing is finished or an error condition occurs. -.. warning:: +.. note:: - The :mod:`xml.dom.pulldom` module is not secure against - maliciously constructed data. If you need to parse untrusted or - unauthenticated data see :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. .. versionchanged:: 3.7.1 diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 1daf6628013bf0..00075ac2a23e6b 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -20,11 +20,10 @@ for parsing and creating XML data. The :mod:`!xml.etree.cElementTree` module is deprecated. -.. warning:: +.. note:: - The :mod:`xml.etree.ElementTree` module is not secure against - maliciously constructed data. If you need to parse untrusted or - unauthenticated data see :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. Tutorial -------- diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index d495995398959d..28465219a1ac18 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -15,12 +15,10 @@ XML Processing Modules Python's interfaces for processing XML are grouped in the ``xml`` package. -.. warning:: +.. note:: - The XML modules are not secure against erroneous or maliciously - constructed data. If you need to parse untrusted or - unauthenticated data see the :ref:`xml-vulnerabilities` and - :ref:`defusedxml-package` sections. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. It is important to note that modules in the :mod:`xml` package require that there be at least one SAX-compliant XML parser available. The Expat parser is @@ -47,46 +45,22 @@ The XML handling submodules are: * :mod:`xml.parsers.expat`: the Expat parser binding +.. _xml-security: .. _xml-vulnerabilities: -XML vulnerabilities -------------------- +XML security +------------ -The XML processing modules are not secure against maliciously constructed data. An attacker can abuse XML features to carry out denial of service attacks, access local files, generate network connections to other machines, or circumvent firewalls. -The following table gives an overview of the known attacks and whether -the various modules are vulnerable to them. - -========================= ================== ================== ================== ================== ================== -kind sax etree minidom pulldom xmlrpc -========================= ================== ================== ================== ================== ================== -billion laughs **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) -quadratic blowup **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) -external entity expansion Safe (5) Safe (2) Safe (3) Safe (5) Safe (4) -`DTD`_ retrieval Safe (5) Safe Safe Safe (5) Safe -decompression bomb Safe Safe Safe Safe **Vulnerable** -large tokens **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) -========================= ================== ================== ================== ================== ================== - -1. Expat 2.4.1 and newer is not vulnerable to the "billion laughs" and - "quadratic blowup" vulnerabilities. Items still listed as vulnerable due to - potential reliance on system-provided libraries. Check - :const:`!pyexpat.EXPAT_VERSION`. -2. :mod:`xml.etree.ElementTree` doesn't expand external entities and raises a - :exc:`~xml.etree.ElementTree.ParseError` when an entity occurs. -3. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns - the unexpanded entity verbatim. -4. :mod:`xmlrpc.client` doesn't expand external entities and omits them. -5. Since Python 3.7.1, external general entities are no longer processed by - default. -6. Expat 2.6.0 and newer is not vulnerable to denial of service - through quadratic runtime caused by parsing large tokens. - Items still listed as vulnerable due to - potential reliance on system-provided libraries. Check - :const:`!pyexpat.EXPAT_VERSION`. +Expat versions lower that 2.6.0 may be vulnerable to "billion laughs", +"quadratic blowup" and "large tokens". Python may be vulnerable if it uses such +older versions of Expat as a system-provided library. +Check :const:`!pyexpat.EXPAT_VERSION`. + +:mod:`xmlrpc` is **vulnerable** to the "decompression bomb" attack. billion laughs / exponential entity expansion @@ -103,16 +77,6 @@ quadratic blowup entity expansion efficient as the exponential case but it avoids triggering parser countermeasures that forbid deeply nested entities. -external entity expansion - Entity declarations can contain more than just text for replacement. They can - also point to external resources or local files. The XML - parser accesses the resource and embeds the content into the XML document. - -`DTD`_ retrieval - Some XML libraries like Python's :mod:`xml.dom.pulldom` retrieve document type - definitions from remote or local locations. The feature has similar - implications as the external entity expansion issue. - decompression bomb Decompression bombs (aka `ZIP bomb`_) apply to all XML libraries that can parse compressed XML streams such as gzipped HTTP streams or @@ -126,21 +90,5 @@ large tokens be used to cause denial of service in the application parsing XML. The issue is known as :cve:`2023-52425`. -The documentation for :pypi:`defusedxml` on PyPI has further information about -all known attack vectors with examples and references. - -.. _defusedxml-package: - -The :mod:`!defusedxml` Package ------------------------------- - -:pypi:`defusedxml` is a pure Python package with modified subclasses of all stdlib -XML parsers that prevent any potentially malicious operation. Use of this -package is recommended for any server code that parses untrusted XML data. The -package also ships with example exploits and extended documentation on more -XML exploits such as XPath injection. - - .. _Billion Laughs: https://en.wikipedia.org/wiki/Billion_laughs .. _ZIP bomb: https://en.wikipedia.org/wiki/Zip_bomb -.. _DTD: https://en.wikipedia.org/wiki/Document_type_definition diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst index c60e9e505f7544..5fa92645a440ce 100644 --- a/Doc/library/xml.sax.rst +++ b/Doc/library/xml.sax.rst @@ -18,11 +18,10 @@ SAX exceptions and the convenience functions which will be most used by users of the SAX API. -.. warning:: +.. note:: - The :mod:`xml.sax` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. .. versionchanged:: 3.7.1 diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst index 654154cb43d6e5..547cb50be78a78 100644 --- a/Doc/library/xmlrpc.client.rst +++ b/Doc/library/xmlrpc.client.rst @@ -24,8 +24,8 @@ between conformable Python objects and XML on the wire. .. warning:: The :mod:`xmlrpc.client` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + constructed data. If you need to parse untrusted or unauthenticated data, + see :ref:`xml-security`. .. versionchanged:: 3.5 diff --git a/Doc/library/xmlrpc.server.rst b/Doc/library/xmlrpc.server.rst index 06169c7eca8b0c..2a8f6f8d5fc0de 100644 --- a/Doc/library/xmlrpc.server.rst +++ b/Doc/library/xmlrpc.server.rst @@ -20,8 +20,8 @@ servers written in Python. Servers can either be free standing, using .. warning:: The :mod:`xmlrpc.server` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + constructed data. If you need to parse untrusted or unauthenticated data, + see :ref:`xml-security`. .. include:: ../includes/wasm-notavail.rst From 2468aafe984fdf923811ef0c6969e3d6c1b92a82 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 7 Jul 2025 12:31:13 +0200 Subject: [PATCH 795/989] gh-135755: Document __future__.* and CO_* as proper Sphinx objects (GH-135980) * Turn the __future__ table to list-table. This'll make it easier to add entries that need longer markup * Semantic markup for __future__ feature descriptions. * Document CO_* C macros. --- Doc/c-api/code.rst | 65 +++++++++++++++++++++++++++++++ Doc/c-api/veryhigh.rst | 8 ++-- Doc/library/__future__.rst | 78 ++++++++++++++++++++++---------------- 3 files changed, 115 insertions(+), 36 deletions(-) diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst index 42594f063b0709..717b0da8f87c7f 100644 --- a/Doc/c-api/code.rst +++ b/Doc/c-api/code.rst @@ -211,6 +211,71 @@ bound into a function. .. versionadded:: 3.12 +.. _c_codeobject_flags: + +Code Object Flags +----------------- + +Code objects contain a bit-field of flags, which can be retrieved as the +:attr:`~codeobject.co_flags` Python attribute (for example using +:c:func:`PyObject_GetAttrString`), and set using a *flags* argument to +:c:func:`PyUnstable_Code_New` and similar functions. + +Flags whose names start with ``CO_FUTURE_`` correspond to features normally +selectable by :ref:`future statements `. These flags can be used in +:c:member:`PyCompilerFlags.cf_flags`. +Note that many ``CO_FUTURE_`` flags are mandatory in current versions of +Python, and setting them has no effect. + +The following flags are available. +For their meaning, see the linked documentation of their Python equivalents. + + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * Flag + * Meaning + * * .. c:macro:: CO_OPTIMIZED + * :py:data:`inspect.CO_OPTIMIZED` + * * .. c:macro:: CO_NEWLOCALS + * :py:data:`inspect.CO_NEWLOCALS` + * * .. c:macro:: CO_VARARGS + * :py:data:`inspect.CO_VARARGS` + * * .. c:macro:: CO_VARKEYWORDS + * :py:data:`inspect.CO_VARKEYWORDS` + * * .. c:macro:: CO_NESTED + * :py:data:`inspect.CO_NESTED` + * * .. c:macro:: CO_GENERATOR + * :py:data:`inspect.CO_GENERATOR` + * * .. c:macro:: CO_COROUTINE + * :py:data:`inspect.CO_COROUTINE` + * * .. c:macro:: CO_ITERABLE_COROUTINE + * :py:data:`inspect.CO_ITERABLE_COROUTINE` + * * .. c:macro:: CO_ASYNC_GENERATOR + * :py:data:`inspect.CO_ASYNC_GENERATOR` + * * .. c:macro:: CO_HAS_DOCSTRING + * :py:data:`inspect.CO_HAS_DOCSTRING` + * * .. c:macro:: CO_METHOD + * :py:data:`inspect.CO_METHOD` + + * * .. c:macro:: CO_FUTURE_DIVISION + * no effect (:py:data:`__future__.division`) + * * .. c:macro:: CO_FUTURE_ABSOLUTE_IMPORT + * no effect (:py:data:`__future__.absolute_import`) + * * .. c:macro:: CO_FUTURE_WITH_STATEMENT + * no effect (:py:data:`__future__.with_statement`) + * * .. c:macro:: CO_FUTURE_PRINT_FUNCTION + * no effect (:py:data:`__future__.print_function`) + * * .. c:macro:: CO_FUTURE_UNICODE_LITERALS + * no effect (:py:data:`__future__.unicode_literals`) + * * .. c:macro:: CO_FUTURE_GENERATOR_STOP + * no effect (:py:data:`__future__.generator_stop`) + * * .. c:macro:: CO_FUTURE_ANNOTATIONS + * :py:data:`__future__.annotations` + + Extra information ----------------- diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst index 1ef4181d52eb10..fb07fec7effce8 100644 --- a/Doc/c-api/veryhigh.rst +++ b/Doc/c-api/veryhigh.rst @@ -361,7 +361,7 @@ the same library that the Python runtime is using. :py:mod:`!ast` Python module, which exports these constants under the same names. - .. c:var:: int CO_FUTURE_DIVISION - - This bit can be set in *flags* to cause division operator ``/`` to be - interpreted as "true division" according to :pep:`238`. + The "``PyCF``" flags above can be combined with "``CO_FUTURE``" flags such + as :c:macro:`CO_FUTURE_ANNOTATIONS` to enable features normally + selectable using :ref:`future statements `. + See :ref:`c_codeobject_flags` for a complete list. diff --git a/Doc/library/__future__.rst b/Doc/library/__future__.rst index 4f3b663006fb28..5d916b30112d3c 100644 --- a/Doc/library/__future__.rst +++ b/Doc/library/__future__.rst @@ -37,38 +37,52 @@ No feature description will ever be deleted from :mod:`__future__`. Since its introduction in Python 2.1 the following features have found their way into the language using this mechanism: -+------------------+-------------+--------------+---------------------------------------------+ -| feature | optional in | mandatory in | effect | -+==================+=============+==============+=============================================+ -| nested_scopes | 2.1.0b1 | 2.2 | :pep:`227`: | -| | | | *Statically Nested Scopes* | -+------------------+-------------+--------------+---------------------------------------------+ -| generators | 2.2.0a1 | 2.3 | :pep:`255`: | -| | | | *Simple Generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| division | 2.2.0a2 | 3.0 | :pep:`238`: | -| | | | *Changing the Division Operator* | -+------------------+-------------+--------------+---------------------------------------------+ -| absolute_import | 2.5.0a1 | 3.0 | :pep:`328`: | -| | | | *Imports: Multi-Line and Absolute/Relative* | -+------------------+-------------+--------------+---------------------------------------------+ -| with_statement | 2.5.0a1 | 2.6 | :pep:`343`: | -| | | | *The "with" Statement* | -+------------------+-------------+--------------+---------------------------------------------+ -| print_function | 2.6.0a2 | 3.0 | :pep:`3105`: | -| | | | *Make print a function* | -+------------------+-------------+--------------+---------------------------------------------+ -| unicode_literals | 2.6.0a2 | 3.0 | :pep:`3112`: | -| | | | *Bytes literals in Python 3000* | -+------------------+-------------+--------------+---------------------------------------------+ -| generator_stop | 3.5.0b1 | 3.7 | :pep:`479`: | -| | | | *StopIteration handling inside generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| annotations | 3.7.0b1 | Never [1]_ | :pep:`563`: | -| | | | *Postponed evaluation of annotations*, | -| | | | :pep:`649`: *Deferred evaluation of | -| | | | annotations using descriptors* | -+------------------+-------------+--------------+---------------------------------------------+ + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * feature + * optional in + * mandatory in + * effect + * * .. data:: nested_scopes + * 2.1.0b1 + * 2.2 + * :pep:`227`: *Statically Nested Scopes* + * * .. data:: generators + * 2.2.0a1 + * 2.3 + * :pep:`255`: *Simple Generators* + * * .. data:: division + * 2.2.0a2 + * 3.0 + * :pep:`238`: *Changing the Division Operator* + * * .. data:: absolute_import + * 2.5.0a1 + * 3.0 + * :pep:`328`: *Imports: Multi-Line and Absolute/Relative* + * * .. data:: with_statement + * 2.5.0a1 + * 2.6 + * :pep:`343`: *The “with” Statement* + * * .. data:: print_function + * 2.6.0a2 + * 3.0 + * :pep:`3105`: *Make print a function* + * * .. data:: unicode_literals + * 2.6.0a2 + * 3.0 + * :pep:`3112`: *Bytes literals in Python 3000* + * * .. data:: generator_stop + * 3.5.0b1 + * 3.7 + * :pep:`479`: *StopIteration handling inside generators* + * * .. data:: annotations + * 3.7.0b1 + * Never [1]_ + * :pep:`563`: *Postponed evaluation of annotations*, + :pep:`649`: *Deferred evaluation of annotations using descriptors* .. XXX Adding a new entry? Remember to update simple_stmts.rst, too. From 0a332215947be01a54026e40803c65591dd58279 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 7 Jul 2025 13:41:11 +0300 Subject: [PATCH 796/989] gh-101100: Fix sphinx warnings in `whatsnew/3.9` (#136163) --- Doc/library/random.rst | 5 +++++ Doc/tools/.nitignore | 1 - Doc/whatsnew/3.9.rst | 10 +++++----- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Doc/library/random.rst b/Doc/library/random.rst index ef0cfb0e76cef6..b1120b3a4d8eb4 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -447,6 +447,11 @@ Alternative Generator Override this method in subclasses to customise the :meth:`~random.getrandbits` behaviour of :class:`!Random` instances. + .. method:: Random.randbytes(n) + + Override this method in subclasses to customise the + :meth:`~random.randbytes` behaviour of :class:`!Random` instances. + .. class:: SystemRandom([seed]) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index e3bcb968128d96..88f22db56f9684 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -73,6 +73,5 @@ Doc/whatsnew/3.5.rst Doc/whatsnew/3.6.rst Doc/whatsnew/3.7.rst Doc/whatsnew/3.8.rst -Doc/whatsnew/3.9.rst Doc/whatsnew/3.10.rst Doc/whatsnew/3.11.rst diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 896e8f4a489649..7fd9e6ac66e6c8 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -423,8 +423,8 @@ digests. It skips MD5 on platforms that block MD5 digest. fcntl ----- -Added constants :const:`~fcntl.F_OFD_GETLK`, :const:`~fcntl.F_OFD_SETLK` -and :const:`~fcntl.F_OFD_SETLKW`. +Added constants :const:`!fcntl.F_OFD_GETLK`, :const:`!fcntl.F_OFD_SETLK` +and :const:`!fcntl.F_OFD_SETLKW`. (Contributed by Donghee Na in :issue:`38602`.) ftplib @@ -644,7 +644,7 @@ attribute. random ------ -Added a new :attr:`random.Random.randbytes` method: generate random bytes. +Added a new :meth:`random.Random.randbytes` method: generate random bytes. (Contributed by Victor Stinner in :issue:`40286`.) signal @@ -776,7 +776,7 @@ Optimizations :pep:`590` vectorcall protocol. (Contributed by Donghee Na, Mark Shannon, Jeroen Demeyer and Petr Viktorin in :issue:`37207`.) -* Optimized :func:`~set.difference_update` for the case when the other set +* Optimized :meth:`!set.difference_update` for the case when the other set is much larger than the base set. (Suggested by Evgeny Kapun with code contributed by Michele Orrù in :issue:`8425`.) @@ -1139,7 +1139,7 @@ Changes in the Python API (Contributed by Christian Heimes in :issue:`36384`). * :func:`codecs.lookup` now normalizes the encoding name the same way as - :func:`encodings.normalize_encoding`, except that :func:`codecs.lookup` also + :func:`!encodings.normalize_encoding`, except that :func:`codecs.lookup` also converts the name to lower case. For example, ``"latex+latin1"`` encoding name is now normalized to ``"latex_latin1"``. (Contributed by Jordon Xu in :issue:`37751`.) From 9aac5a3d44d7191001660aed3a383b062ee4b29b Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 7 Jul 2025 12:56:29 +0200 Subject: [PATCH 797/989] gh-131591: Document Py_REMOTE_DEBUG (GH-135929) --- Doc/using/configure.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index a26d48d7d0a345..e5fe3c72b1b26e 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -686,6 +686,13 @@ also be used to improve performance. not compiled. This includes both the functionality to schedule code to be executed and the functionality to receive code to be executed. + .. c:macro:: Py_REMOTE_DEBUG + + This macro is defined by default, unless Python is configured with + :option:`--without-remote-debug`. + + Note that even if the macro is defined, remote debugging may not be + available (for example, on an incompatible platform). .. versionadded:: 3.14 From 11f074b243756bca0db5a7d35dd84f00879de616 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 7 Jul 2025 07:22:04 -0400 Subject: [PATCH 798/989] gh-86682: Add versionadded for sys._getframemodulename (#136325) add versionadded for sys._getframemodulename --- Doc/library/sys.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 1626a89a0731bf..05bc7cfb9dc089 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -953,6 +953,8 @@ always available. Unless explicitly noted otherwise, all variables are read-only This function should be used for internal and specialized purposes only. It is not guaranteed to exist in all implementations of Python. + .. versionadded:: 3.12 + .. function:: getobjects(limit[, type]) From 73e1207a4ebdb3b43d597cd6c288dae6d7d1dbdb Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 7 Jul 2025 14:05:17 +0200 Subject: [PATCH 799/989] gh-135913: Document ob_refcnt, ob_type, ob_size (GH-135914) * gh-135913: Document ob_refcnt, ob_type, ob_size In `typeobj.rst`, instead of `:c:member:` it would be better to use `.. c:member::` with a `:no-index:` option, see: See ref. https://www.sphinx-doc.org/en/master/usage/domains/index.html#basic-markup However, `c:member` currently does not support `:no-index:`. --- Doc/c-api/structures.rst | 75 +++++++++++++++++++++++++++++++--------- Doc/c-api/typeobj.rst | 11 +++--- 2 files changed, 64 insertions(+), 22 deletions(-) diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index 987bc167c68d6c..58dd915e04f619 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -28,18 +28,52 @@ under :ref:`reference counting `. object. In a normal "release" build, it contains only the object's reference count and a pointer to the corresponding type object. Nothing is actually declared to be a :c:type:`PyObject`, but every pointer - to a Python object can be cast to a :c:expr:`PyObject*`. Access to the - members must be done by using the macros :c:macro:`Py_REFCNT` and - :c:macro:`Py_TYPE`. + to a Python object can be cast to a :c:expr:`PyObject*`. + + The members must not be accessed directly; instead use macros such as + :c:macro:`Py_REFCNT` and :c:macro:`Py_TYPE`. + + .. c:member:: Py_ssize_t ob_refcnt + + The object's reference count, as returned by :c:macro:`Py_REFCNT`. + Do not use this field directly; instead use functions and macros such as + :c:macro:`!Py_REFCNT`, :c:func:`Py_INCREF` and :c:func:`Py_DecRef`. + + The field type may be different from ``Py_ssize_t``, depending on + build configuration and platform. + + .. c:member:: PyTypeObject* ob_type + + The object's type. + Do not use this field directly; use :c:macro:`Py_TYPE` and + :c:func:`Py_SET_TYPE` instead. .. c:type:: PyVarObject - This is an extension of :c:type:`PyObject` that adds the :c:member:`~PyVarObject.ob_size` - field. This is only used for objects that have some notion of *length*. - This type does not often appear in the Python/C API. - Access to the members must be done by using the macros - :c:macro:`Py_REFCNT`, :c:macro:`Py_TYPE`, and :c:macro:`Py_SIZE`. + An extension of :c:type:`PyObject` that adds the + :c:member:`~PyVarObject.ob_size` field. + This is intended for objects that have some notion of *length*. + + As with :c:type:`!PyObject`, the members must not be accessed directly; + instead use macros such as :c:macro:`Py_SIZE`, :c:macro:`Py_REFCNT` and + :c:macro:`Py_TYPE`. + + .. c:member:: Py_ssize_t ob_size + + A size field, whose contents should be considered an object's internal + implementation detail. + + Do not use this field directly; use :c:macro:`Py_SIZE` instead. + + Object creation functions such as :c:func:`PyObject_NewVar` will + generally set this field to the requested size (number of items). + After creation, arbitrary values can be stored in :c:member:`!ob_size` + using :c:macro:`Py_SET_SIZE`. + + To get an object's publicly exposed length, as returned by + the Python function :py:func:`len`, use :c:func:`PyObject_Length` + instead. .. c:macro:: PyObject_HEAD @@ -103,9 +137,8 @@ under :ref:`reference counting `. Get the type of the Python object *o*. - Return a :term:`borrowed reference`. - - Use the :c:func:`Py_SET_TYPE` function to set an object type. + The returned reference is :term:`borrowed ` from *o*. + Do not release it with :c:func:`Py_DECREF` or similar. .. versionchanged:: 3.11 :c:func:`Py_TYPE()` is changed to an inline static function. @@ -122,16 +155,26 @@ under :ref:`reference counting `. .. c:function:: void Py_SET_TYPE(PyObject *o, PyTypeObject *type) - Set the object *o* type to *type*. + Set the type of object *o* to *type*, without any checking or reference + counting. + + This is a very low-level operation. + Consider instead setting the Python attribute :attr:`~object.__class__` + using :c:func:`PyObject_SetAttrString` or similar. + + Note that assigning an incompatible type can lead to undefined behavior. + + If *type* is a :ref:`heap type `, the caller must create a + new reference to it. + Similarly, if the old type of *o* is a heap type, the caller must release + a reference to that type. .. versionadded:: 3.9 .. c:function:: Py_ssize_t Py_SIZE(PyVarObject *o) - Get the size of the Python object *o*. - - Use the :c:func:`Py_SET_SIZE` function to set an object size. + Get the :c:member:`~PyVarObject.ob_size` field of *o*. .. versionchanged:: 3.11 :c:func:`Py_SIZE()` is changed to an inline static function. @@ -140,7 +183,7 @@ under :ref:`reference counting `. .. c:function:: void Py_SET_SIZE(PyVarObject *o, Py_ssize_t size) - Set the object *o* size to *size*. + Set the :c:member:`~PyVarObject.ob_size` field of *o* to *size*. .. versionadded:: 3.9 diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index af2bead3bb5004..060d6f60174b41 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -492,9 +492,9 @@ metatype) initializes :c:member:`~PyTypeObject.tp_itemsize`, which means that it type objects) *must* have the :c:member:`~PyVarObject.ob_size` field. -.. c:member:: Py_ssize_t PyObject.ob_refcnt +:c:member:`PyObject.ob_refcnt` - This is the type object's reference count, initialized to ``1`` by the + The type object's reference count is initialized to ``1`` by the ``PyObject_HEAD_INIT`` macro. Note that for :ref:`statically allocated type objects `, the type's instances (objects whose :c:member:`~PyObject.ob_type` points back to the type) do *not* count as references. But for @@ -506,7 +506,7 @@ type objects) *must* have the :c:member:`~PyVarObject.ob_size` field. This field is not inherited by subtypes. -.. c:member:: PyTypeObject* PyObject.ob_type +:c:member:`PyObject.ob_type` This is the type's type, in other words its metatype. It is initialized by the argument to the ``PyObject_HEAD_INIT`` macro, and its value should normally be @@ -532,14 +532,13 @@ type objects) *must* have the :c:member:`~PyVarObject.ob_size` field. PyVarObject Slots ----------------- -.. c:member:: Py_ssize_t PyVarObject.ob_size +:c:member:`PyVarObject.ob_size` For :ref:`statically allocated type objects `, this should be initialized to zero. For :ref:`dynamically allocated type objects `, this field has a special internal meaning. - This field should be accessed using the :c:func:`Py_SIZE()` and - :c:func:`Py_SET_SIZE()` macros. + This field should be accessed using the :c:func:`Py_SIZE()` macro. **Inheritance:** From b7aa2a4b4df697db6ea45a555eeb3fefa5ca5bd4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 7 Jul 2025 15:14:17 +0300 Subject: [PATCH 800/989] gh-124486: Fix test_whichdb_ndbm in test_dbm on NetBSD (GH-136335) On NetBSD, ndbm.open() does not fail for empty file. --- Lib/test/test_dbm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index 7e8d78b89405ab..ae9faabd536a6c 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -274,7 +274,8 @@ def test_whichdb(self): @unittest.skipUnless(ndbm, reason='Test requires ndbm') def test_whichdb_ndbm(self): # Issue 17198: check that ndbm which is referenced in whichdb is defined - with open(_fname + '.db', 'wb'): pass + with open(_fname + '.db', 'wb') as f: + f.write(b'spam') _bytes_fname = os.fsencode(_fname) fnames = [_fname, os_helper.FakePath(_fname), _bytes_fname, os_helper.FakePath(_bytes_fname)] From c45da6ae16a0bc796e880c9dbcd4f643345b03b7 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Mon, 7 Jul 2025 15:29:27 +0200 Subject: [PATCH 801/989] gh-136155: Docs: only add custom OpenGraph protocol meta tags for HTML builds (#136187) Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Doc/conf.py | 19 ++++++++++--------- ...-07-01-21-04-47.gh-issue-136155.ufmH4Q.rst | 1 + 2 files changed, 11 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2025-07-01-21-04-47.gh-issue-136155.ufmH4Q.rst diff --git a/Doc/conf.py b/Doc/conf.py index 161c2986441edd..8b2a8f20fcc558 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -635,13 +635,14 @@ 'image': '_static/og-image.png', 'line_color': '#3776ab', } -ogp_custom_meta_tags = [ - '', -] -if 'create-social-cards' not in tags: # noqa: F821 - # Define a static preview image when not creating social cards - ogp_image = '_static/og-image.png' - ogp_custom_meta_tags += [ - '', - '', +if 'builder_html' in tags: # noqa: F821 + ogp_custom_meta_tags = [ + '', ] + if 'create-social-cards' not in tags: # noqa: F821 + # Define a static preview image when not creating social cards + ogp_image = '_static/og-image.png' + ogp_custom_meta_tags += [ + '', + '', + ] diff --git a/Misc/NEWS.d/next/Documentation/2025-07-01-21-04-47.gh-issue-136155.ufmH4Q.rst b/Misc/NEWS.d/next/Documentation/2025-07-01-21-04-47.gh-issue-136155.ufmH4Q.rst new file mode 100644 index 00000000000000..0341b5f7f0d5e6 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2025-07-01-21-04-47.gh-issue-136155.ufmH4Q.rst @@ -0,0 +1 @@ +EPUB builds are fixed by excluding non-XHTML-compatible tags. From fb170cf50d0c71d7a036ac6f0db5ee19b46f6b57 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 7 Jul 2025 17:57:48 +0200 Subject: [PATCH 802/989] gh-120713: Make _Py_NORMALIZE_CENTURY private (GH-135933) --- Modules/_datetimemodule.c | 2 +- configure | 2 +- configure.ac | 2 +- pyconfig.h.in | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index eb90be81c8d34c..7a6426593d021f 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1934,7 +1934,7 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, } replacement = freplacement; } -#ifdef Py_NORMALIZE_CENTURY +#ifdef _Py_NORMALIZE_CENTURY else if (ch == 'Y' || ch == 'G' || ch == 'F' || ch == 'C' ) { diff --git a/configure b/configure index 9df366697b8546..4a0f8959c8727d 100755 --- a/configure +++ b/configure @@ -28218,7 +28218,7 @@ printf "%s\n" "$ac_cv_normalize_century" >&6; } if test "$ac_cv_normalize_century" = yes then -printf "%s\n" "#define Py_NORMALIZE_CENTURY 1" >>confdefs.h +printf "%s\n" "#define _Py_NORMALIZE_CENTURY 1" >>confdefs.h fi diff --git a/configure.ac b/configure.ac index cb7f2144345b37..10d7a0c6056107 100644 --- a/configure.ac +++ b/configure.ac @@ -6803,7 +6803,7 @@ int main(void) [ac_cv_normalize_century=yes])]) if test "$ac_cv_normalize_century" = yes then - AC_DEFINE([Py_NORMALIZE_CENTURY], [1], + AC_DEFINE([_Py_NORMALIZE_CENTURY], [1], [Define if year with century should be normalized for strftime.]) fi diff --git a/pyconfig.h.in b/pyconfig.h.in index d7c496fccc682c..1c533b2bfb7fb4 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1737,9 +1737,6 @@ SipHash13: 3, externally defined: 0 */ #undef Py_HASH_ALGORITHM -/* Define if year with century should be normalized for strftime. */ -#undef Py_NORMALIZE_CENTURY - /* Define if you want to enable remote debugging support. */ #undef Py_REMOTE_DEBUG @@ -2026,6 +2023,9 @@ /* HACL* library can compile SIMD256 implementations */ #undef _Py_HACL_CAN_COMPILE_VEC256 +/* Define if year with century should be normalized for strftime. */ +#undef _Py_NORMALIZE_CENTURY + /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT From 0b62523959672509cbd705c926d426c7af1ebe40 Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Tue, 8 Jul 2025 00:28:44 +0800 Subject: [PATCH 803/989] gh-101100: Fix Sphinx warnings in library/email.compat32-message.rst (#136323) --- Doc/library/email.compat32-message.rst | 18 +++++++++--------- Doc/tools/.nitignore | 1 - 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Doc/library/email.compat32-message.rst b/Doc/library/email.compat32-message.rst index 4285c436e8da80..5754c2b65b239f 100644 --- a/Doc/library/email.compat32-message.rst +++ b/Doc/library/email.compat32-message.rst @@ -181,7 +181,7 @@ Here are the methods of the :class:`Message` class: :meth:`set_payload` instead. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by :meth:`~email.message.EmailMessage.set_content` and the related ``make`` and ``add`` methods. @@ -224,7 +224,7 @@ Here are the methods of the :class:`Message` class: ASCII charset. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by :meth:`~email.message.EmailMessage.get_content` and :meth:`~email.message.EmailMessage.iter_parts`. @@ -236,7 +236,7 @@ Here are the methods of the :class:`Message` class: the message's default character set; see :meth:`set_charset` for details. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by :meth:`~email.message.EmailMessage.set_content`. @@ -265,9 +265,9 @@ Here are the methods of the :class:`Message` class: using that :mailheader:`Content-Transfer-Encoding` and is not modified. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the *charset* parameter of the - :meth:`email.emailmessage.EmailMessage.set_content` method. + :meth:`email.message.EmailMessage.set_content` method. .. method:: get_charset() @@ -276,7 +276,7 @@ Here are the methods of the :class:`Message` class: message's payload. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class it always returns + :class:`~email.message.EmailMessage` class it always returns ``None``. @@ -486,7 +486,7 @@ Here are the methods of the :class:`Message` class: search instead of :mailheader:`Content-Type`. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the *params* property of the individual header objects returned by the header access methods. @@ -524,7 +524,7 @@ Here are the methods of the :class:`Message` class: to ``False``. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the *params* property of the individual header objects returned by the header access methods. @@ -579,7 +579,7 @@ Here are the methods of the :class:`Message` class: header is also added. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the ``make_`` and ``add_`` methods. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 88f22db56f9684..eb760e0fac5e37 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -15,7 +15,6 @@ Doc/extending/extending.rst Doc/library/ast.rst Doc/library/asyncio-extending.rst Doc/library/email.charset.rst -Doc/library/email.compat32-message.rst Doc/library/email.parser.rst Doc/library/exceptions.rst Doc/library/functools.rst From fe187fae8d8321f1b8d3c9560a35efe904de4217 Mon Sep 17 00:00:00 2001 From: Charlie Lin Date: Mon, 7 Jul 2025 16:56:14 +0000 Subject: [PATCH 804/989] gh-135906: Use `_PyObject_CAST` in internal headers (GH-135892) Fixes build errors encountered in python-greenlet/greenlet#450 when building greenlet on the free-threaded build. --------- Co-authored-by: Peter Bierma Co-authored-by: Victor Stinner --- Include/internal/pycore_object.h | 4 ++-- Include/internal/pycore_stackref.h | 2 +- .../next/C_API/2025-06-25-01-03-10.gh-issue-135906.UBrCWq.rst | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-06-25-01-03-10.gh-issue-135906.UBrCWq.rst diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 8fe9875fae0687..40f8ca68c00b72 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -614,7 +614,7 @@ static inline PyObject * _Py_XGetRef(PyObject **ptr) { for (;;) { - PyObject *value = _Py_atomic_load_ptr(ptr); + PyObject *value = _PyObject_CAST(_Py_atomic_load_ptr(ptr)); if (value == NULL) { return value; } @@ -629,7 +629,7 @@ _Py_XGetRef(PyObject **ptr) static inline PyObject * _Py_TryXGetRef(PyObject **ptr) { - PyObject *value = _Py_atomic_load_ptr(ptr); + PyObject *value = _PyObject_CAST(_Py_atomic_load_ptr(ptr)); if (value == NULL) { return value; } diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 48a40a4c3479c7..6bf82d8322f508 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -829,7 +829,7 @@ _Py_TryIncrefCompareStackRef(PyObject **src, PyObject *op, _PyStackRef *out) static inline int _Py_TryXGetStackRef(PyObject **src, _PyStackRef *out) { - PyObject *op = _Py_atomic_load_ptr_relaxed(src); + PyObject *op = _PyObject_CAST(_Py_atomic_load_ptr_relaxed(src)); if (op == NULL) { *out = PyStackRef_NULL; return 1; diff --git a/Misc/NEWS.d/next/C_API/2025-06-25-01-03-10.gh-issue-135906.UBrCWq.rst b/Misc/NEWS.d/next/C_API/2025-06-25-01-03-10.gh-issue-135906.UBrCWq.rst new file mode 100644 index 00000000000000..7852759a702804 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-06-25-01-03-10.gh-issue-135906.UBrCWq.rst @@ -0,0 +1 @@ +Fix compilation errors when compiling the internal headers with a C++ compiler. From 0240ef4705d835e27beb2437dfadb5d34aa2db17 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 7 Jul 2025 23:30:27 +0530 Subject: [PATCH 805/989] gh-98388: add tests for happy eyeballs (#136368) --- Lib/test/test_asyncio/test_base_events.py | 88 +++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/Lib/test/test_asyncio/test_base_events.py b/Lib/test/test_asyncio/test_base_events.py index 12179eb0c9e274..22ae0ef3581156 100644 --- a/Lib/test/test_asyncio/test_base_events.py +++ b/Lib/test/test_asyncio/test_base_events.py @@ -150,6 +150,29 @@ def test_ipaddr_info_no_inet_pton(self, m_socket): socket.SOCK_STREAM, socket.IPPROTO_TCP)) + def test_interleave_addrinfos(self): + self.maxDiff = None + SIX_A = (socket.AF_INET6, 0, 0, '', ('2001:db8::1', 1)) + SIX_B = (socket.AF_INET6, 0, 0, '', ('2001:db8::2', 2)) + SIX_C = (socket.AF_INET6, 0, 0, '', ('2001:db8::3', 3)) + SIX_D = (socket.AF_INET6, 0, 0, '', ('2001:db8::4', 4)) + FOUR_A = (socket.AF_INET, 0, 0, '', ('192.0.2.1', 5)) + FOUR_B = (socket.AF_INET, 0, 0, '', ('192.0.2.2', 6)) + FOUR_C = (socket.AF_INET, 0, 0, '', ('192.0.2.3', 7)) + FOUR_D = (socket.AF_INET, 0, 0, '', ('192.0.2.4', 8)) + + addrinfos = [SIX_A, SIX_B, SIX_C, FOUR_A, FOUR_B, FOUR_C, FOUR_D, SIX_D] + expected = [SIX_A, FOUR_A, SIX_B, FOUR_B, SIX_C, FOUR_C, SIX_D, FOUR_D] + + self.assertEqual(expected, base_events._interleave_addrinfos(addrinfos)) + + expected_fafc_2 = [SIX_A, SIX_B, FOUR_A, SIX_C, FOUR_B, SIX_D, FOUR_C, FOUR_D] + self.assertEqual( + expected_fafc_2, + base_events._interleave_addrinfos(addrinfos, first_address_family_count=2), + ) + + class BaseEventLoopTests(test_utils.TestCase): @@ -1053,6 +1076,71 @@ def test_asyncgen_finalization_by_gc_in_other_thread(self): test_utils.run_briefly(self.loop) self.assertTrue(status['finalized']) + @unittest.skipUnless(socket_helper.IPV6_ENABLED, 'no IPv6 support') + @patch_socket + def test_create_connection_happy_eyeballs(self, m_socket): + + class MyProto(asyncio.Protocol): + pass + + async def getaddrinfo(*args, **kw): + return [(socket.AF_INET6, 0, 0, '', ('2001:db8::1', 1)), + (socket.AF_INET, 0, 0, '', ('192.0.2.1', 5))] + + async def sock_connect(sock, address): + if address[0] == '2001:db8::1': + await asyncio.sleep(1) + sock.connect(address) + + loop = asyncio.new_event_loop() + loop._add_writer = mock.Mock() + loop._add_writer = mock.Mock() + loop._add_reader = mock.Mock() + loop.getaddrinfo = getaddrinfo + loop.sock_connect = sock_connect + + coro = loop.create_connection(MyProto, 'example.com', 80, happy_eyeballs_delay=0.3) + transport, protocol = loop.run_until_complete(coro) + try: + sock = transport._sock + sock.connect.assert_called_with(('192.0.2.1', 5)) + finally: + transport.close() + test_utils.run_briefly(loop) # allow transport to close + loop.close() + + @patch_socket + def test_create_connection_happy_eyeballs_ipv4_only(self, m_socket): + + class MyProto(asyncio.Protocol): + pass + + async def getaddrinfo(*args, **kw): + return [(socket.AF_INET, 0, 0, '', ('192.0.2.1', 5)), + (socket.AF_INET, 0, 0, '', ('192.0.2.2', 6))] + + async def sock_connect(sock, address): + if address[0] == '192.0.2.1': + await asyncio.sleep(1) + sock.connect(address) + + loop = asyncio.new_event_loop() + loop._add_writer = mock.Mock() + loop._add_writer = mock.Mock() + loop._add_reader = mock.Mock() + loop.getaddrinfo = getaddrinfo + loop.sock_connect = sock_connect + + coro = loop.create_connection(MyProto, 'example.com', 80, happy_eyeballs_delay=0.3) + transport, protocol = loop.run_until_complete(coro) + try: + sock = transport._sock + sock.connect.assert_called_with(('192.0.2.2', 6)) + finally: + transport.close() + test_utils.run_briefly(loop) # allow transport to close + loop.close() + class MyProto(asyncio.Protocol): done = None From fbef0c1d6a6540f5063138b861f79bdb2a935b0a Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 8 Jul 2025 08:59:00 +0300 Subject: [PATCH 806/989] gh-102567: Add missing newline to `--help-all` (GH-136391) --- Python/initconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/initconfig.c b/Python/initconfig.c index 71d7cfed5c44c1..73a9a9bf1ca460 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -312,7 +312,7 @@ The following implementation-specific options are available:\n\ "-X gil=[0|1]: enable (1) or disable (0) the GIL; also PYTHON_GIL\n" #endif "\ --X importtime[=2]: show how long each import takes; use -X importtime=2 to\ +-X importtime[=2]: show how long each import takes; use -X importtime=2 to\n\ log imports of already-loaded modules; also PYTHONPROFILEIMPORTTIME\n\ -X int_max_str_digits=N: limit the size of int<->str conversions;\n\ 0 disables the limit; also PYTHONINTMAXSTRDIGITS\n\ From 5b78c85fb4cde94bb2637f683025ff71af76d48a Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Tue, 8 Jul 2025 14:25:13 +0800 Subject: [PATCH 807/989] gh-101100: Fix sphinx warnings in `whatsnew/3.11.rst` (#136402) --- Doc/tools/.nitignore | 1 - Doc/whatsnew/3.11.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index eb760e0fac5e37..e29775b075a82f 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -73,4 +73,3 @@ Doc/whatsnew/3.6.rst Doc/whatsnew/3.7.rst Doc/whatsnew/3.8.rst Doc/whatsnew/3.10.rst -Doc/whatsnew/3.11.rst diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 2dd205dd2b8831..abf9677fd9cac5 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -1292,7 +1292,7 @@ This section covers specific optimizations independent of the (Contributed by Stefan Behnel in :gh:`68264`.) * Resizing lists is streamlined for the common case, - speeding up :meth:`list.append` by ≈15% + speeding up :meth:`!list.append` by ≈15% and simple :term:`list comprehension`\s by up to 20-30% (Contributed by Dennis Sweeney in :gh:`91165`.) From 89f06a38c07f8ad72611e3bb545484d71a8a8d1b Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Tue, 8 Jul 2025 13:04:50 +0530 Subject: [PATCH 808/989] gh-134043: use `_PyObject_GetMethodStackRef` in pattern matching (#136356) --- Python/ceval.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 50665defd382a2..291e753dec0ce5 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -627,12 +627,14 @@ _PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys) PyObject *seen = NULL; PyObject *dummy = NULL; PyObject *values = NULL; - PyObject *get = NULL; // We use the two argument form of map.get(key, default) for two reasons: // - Atomically check for a key and get its value without error handling. // - Don't cause key creation or resizing in dict subclasses like // collections.defaultdict that define __missing__ (or similar). - int meth_found = _PyObject_GetMethod(map, &_Py_ID(get), &get); + _PyCStackRef cref; + _PyThreadState_PushCStackRef(tstate, &cref); + int meth_found = _PyObject_GetMethodStackRef(tstate, map, &_Py_ID(get), &cref.ref); + PyObject *get = PyStackRef_AsPyObjectBorrow(cref.ref); if (get == NULL) { goto fail; } @@ -682,12 +684,12 @@ _PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys) } // Success: done: - Py_DECREF(get); + _PyThreadState_PopCStackRef(tstate, &cref); Py_DECREF(seen); Py_DECREF(dummy); return values; fail: - Py_XDECREF(get); + _PyThreadState_PopCStackRef(tstate, &cref); Py_XDECREF(seen); Py_XDECREF(dummy); Py_XDECREF(values); From 51934000babed30c35dd49787145785a2d8f722d Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Tue, 8 Jul 2025 13:05:24 +0530 Subject: [PATCH 809/989] gh-117657: enable `test_capi` under TSAN (#136269) --- Lib/test/libregrtest/tsan.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/libregrtest/tsan.py b/Lib/test/libregrtest/tsan.py index 3545c5f999ff70..f1f8c8bde920ae 100644 --- a/Lib/test/libregrtest/tsan.py +++ b/Lib/test/libregrtest/tsan.py @@ -3,9 +3,7 @@ TSAN_TESTS = [ 'test_asyncio', - # TODO: enable more of test_capi once bugs are fixed (GH-116908, GH-116909). - 'test_capi.test_mem', - 'test_capi.test_pyatomic', + 'test_capi', 'test_code', 'test_ctypes', 'test_concurrent_futures', From db699db99d214dccb7c4849fa609fdd5188ee038 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Tue, 8 Jul 2025 10:51:36 +0300 Subject: [PATCH 810/989] gh-136297: Fix `hypothesis` and `subTest` usage in `test_zoneinfo_property.py` (#136384) --- Lib/test/test_zoneinfo/test_zoneinfo_property.py | 16 +++++++--------- Tools/requirements-hypothesis.txt | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo_property.py b/Lib/test/test_zoneinfo/test_zoneinfo_property.py index 294c7e9b27a857..c00815e2fd4c36 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo_property.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo_property.py @@ -147,23 +147,21 @@ def setUp(self): def test_pickle_unpickle_cache(self, key): zi = self.klass(key) for proto in range(pickle.HIGHEST_PROTOCOL + 1): - with self.subTest(proto=proto): - pkl_str = pickle.dumps(zi, proto) - zi_rt = pickle.loads(pkl_str) + pkl_str = pickle.dumps(zi, proto) + zi_rt = pickle.loads(pkl_str) - self.assertIs(zi, zi_rt) + self.assertIs(zi, zi_rt) @hypothesis.given(key=valid_keys()) @add_key_examples def test_pickle_unpickle_no_cache(self, key): zi = self.klass.no_cache(key) for proto in range(pickle.HIGHEST_PROTOCOL + 1): - with self.subTest(proto=proto): - pkl_str = pickle.dumps(zi, proto) - zi_rt = pickle.loads(pkl_str) + pkl_str = pickle.dumps(zi, proto) + zi_rt = pickle.loads(pkl_str) - self.assertIsNot(zi, zi_rt) - self.assertEqual(str(zi), str(zi_rt)) + self.assertIsNot(zi, zi_rt) + self.assertEqual(str(zi), str(zi_rt)) @hypothesis.given(key=valid_keys()) @add_key_examples diff --git a/Tools/requirements-hypothesis.txt b/Tools/requirements-hypothesis.txt index 66898885c0a412..e5deac497fbe3f 100644 --- a/Tools/requirements-hypothesis.txt +++ b/Tools/requirements-hypothesis.txt @@ -1,4 +1,4 @@ # Requirements file for hypothesis that # we use to run our property-based tests in CI. -hypothesis==6.111.2 +hypothesis==6.135.26 From 0152df5fffb5fc4e538f62543a658d3980079de5 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Tue, 8 Jul 2025 21:05:05 +0900 Subject: [PATCH 811/989] gh-101100: Fix sphinx warnings in `Doc/library/exceptions.rst` (#136309) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/exceptions.rst | 30 +++++++++++++++++++----------- Doc/tools/.nitignore | 1 - 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index c09e1615a5b733..a73cc2e8a2dd3e 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -204,10 +204,16 @@ The following exceptions are the exceptions that are usually raised. assignment fails. (When an object does not support attribute references or attribute assignments at all, :exc:`TypeError` is raised.) - The :attr:`name` and :attr:`obj` attributes can be set using keyword-only - arguments to the constructor. When set they represent the name of the attribute - that was attempted to be accessed and the object that was accessed for said - attribute, respectively. + The optional *name* and *obj* keyword-only arguments + set the corresponding attributes: + + .. attribute:: name + + The name of the attribute that was attempted to be accessed. + + .. attribute:: obj + + The object that was accessed for the named attribute. .. versionchanged:: 3.10 Added the :attr:`name` and :attr:`obj` attributes. @@ -215,7 +221,7 @@ The following exceptions are the exceptions that are usually raised. .. exception:: EOFError Raised when the :func:`input` function hits an end-of-file condition (EOF) - without reading any data. (N.B.: the :meth:`io.IOBase.read` and + without reading any data. (Note: the :meth:`!io.IOBase.read` and :meth:`io.IOBase.readline` methods return an empty string when they hit EOF.) @@ -312,9 +318,11 @@ The following exceptions are the exceptions that are usually raised. unqualified names. The associated value is an error message that includes the name that could not be found. - The :attr:`name` attribute can be set using a keyword-only argument to the - constructor. When set it represent the name of the variable that was attempted - to be accessed. + The optional *name* keyword-only argument sets the attribute: + + .. attribute:: name + + The name of the variable that was attempted to be accessed. .. versionchanged:: 3.10 Added the :attr:`name` attribute. @@ -382,7 +390,7 @@ The following exceptions are the exceptions that are usually raised. The corresponding error message, as provided by the operating system. It is formatted by the C - functions :c:func:`perror` under POSIX, and :c:func:`FormatMessage` + functions :c:func:`!perror` under POSIX, and :c:func:`!FormatMessage` under Windows. .. attribute:: filename @@ -398,7 +406,7 @@ The following exceptions are the exceptions that are usually raised. .. versionchanged:: 3.3 :exc:`EnvironmentError`, :exc:`IOError`, :exc:`WindowsError`, :exc:`socket.error`, :exc:`select.error` and - :exc:`mmap.error` have been merged into :exc:`OSError`, and the + :exc:`!mmap.error` have been merged into :exc:`OSError`, and the constructor may return a subclass. .. versionchanged:: 3.4 @@ -597,7 +605,7 @@ The following exceptions are the exceptions that are usually raised. handled, the Python interpreter exits; no stack traceback is printed. The constructor accepts the same optional argument passed to :func:`sys.exit`. If the value is an integer, it specifies the system exit status (passed to - C's :c:func:`exit` function); if it is ``None``, the exit status is zero; if + C's :c:func:`!exit` function); if it is ``None``, the exit status is zero; if it has another type (such as a string), the object's value is printed and the exit status is one. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index e29775b075a82f..4f5396857f3024 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -16,7 +16,6 @@ Doc/library/ast.rst Doc/library/asyncio-extending.rst Doc/library/email.charset.rst Doc/library/email.parser.rst -Doc/library/exceptions.rst Doc/library/functools.rst Doc/library/http.cookiejar.rst Doc/library/http.server.rst From ba9c1986305517ed88470129fe7c71aaec22d08d Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 8 Jul 2025 13:23:31 +0100 Subject: [PATCH 812/989] gh-136186: Fix race condition in test_external_inspection.test_only_active_thread (#136347) --- Lib/test/test_external_inspection.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 0f31c225e68de3..44890ebfe5f76d 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -5,6 +5,7 @@ import sys import socket import threading +import time from asyncio import staggered, taskgroups, base_events, tasks from unittest.mock import ANY from test.support import os_helper, SHORT_TIMEOUT, busy_retry, requires_gil_enabled @@ -930,9 +931,6 @@ def main_work(): # Signal threads to start waiting ready_event.set() - # Give threads time to start sleeping - time.sleep(0.1) - # Now do busy work to hold the GIL main_work() """ @@ -967,7 +965,23 @@ def main_work(): # Get stack trace with all threads unwinder_all = RemoteUnwinder(p.pid, all_threads=True) - all_traces = unwinder_all.get_stack_trace() + for _ in range(10): + # Wait for the main thread to start its busy work + all_traces = unwinder_all.get_stack_trace() + found = False + for thread_id, stack in all_traces: + if not stack: + continue + current_frame = stack[0] + if current_frame.funcname == "main_work" and current_frame.lineno >15: + found = True + + if found: + break + # Give a bit of time to take the next sample + time.sleep(0.1) + else: + self.fail("Main thread did not start its busy work on time") # Get stack trace with only GIL holder unwinder_gil = RemoteUnwinder(p.pid, only_active_thread=True) From 490eea02819ad303a5042529af7507b7b1fdabdc Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 8 Jul 2025 22:32:14 +0900 Subject: [PATCH 813/989] gh-136380: Fix import behavior for `concurrent.futures.InterpreterPoolExecutor` (#136381) Co-authored-by: Serhiy Storchaka Co-authored-by: Peter Bierma Co-authored-by: sobolevn --- Lib/concurrent/futures/__init__.py | 35 ++++++++-------- .../test_interpreter_pool.py | 41 +++++++++++++++++++ ...-07-07-22-12-32.gh-issue-136380.1b_nXl.rst | 3 ++ 3 files changed, 62 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-07-22-12-32.gh-issue-136380.1b_nXl.rst diff --git a/Lib/concurrent/futures/__init__.py b/Lib/concurrent/futures/__init__.py index 7ada7431c1ab8c..e717222cf98b32 100644 --- a/Lib/concurrent/futures/__init__.py +++ b/Lib/concurrent/futures/__init__.py @@ -17,7 +17,7 @@ wait, as_completed) -__all__ = ( +__all__ = [ 'FIRST_COMPLETED', 'FIRST_EXCEPTION', 'ALL_COMPLETED', @@ -29,10 +29,18 @@ 'Executor', 'wait', 'as_completed', - 'InterpreterPoolExecutor', 'ProcessPoolExecutor', 'ThreadPoolExecutor', -) +] + + +try: + import _interpreters +except ImportError: + _interpreters = None + +if _interpreters: + __all__.append('InterpreterPoolExecutor') def __dir__(): @@ -43,22 +51,15 @@ def __getattr__(name): global ProcessPoolExecutor, ThreadPoolExecutor, InterpreterPoolExecutor if name == 'ProcessPoolExecutor': - from .process import ProcessPoolExecutor as pe - ProcessPoolExecutor = pe - return pe + from .process import ProcessPoolExecutor + return ProcessPoolExecutor if name == 'ThreadPoolExecutor': - from .thread import ThreadPoolExecutor as te - ThreadPoolExecutor = te - return te + from .thread import ThreadPoolExecutor + return ThreadPoolExecutor - if name == 'InterpreterPoolExecutor': - try: - from .interpreter import InterpreterPoolExecutor as ie - except ModuleNotFoundError: - ie = InterpreterPoolExecutor = None - else: - InterpreterPoolExecutor = ie - return ie + if _interpreters and name == 'InterpreterPoolExecutor': + from .interpreter import InterpreterPoolExecutor + return InterpreterPoolExecutor raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py index 844dfdd6fc901c..b10bbebd0984c4 100644 --- a/Lib/test/test_concurrent_futures/test_interpreter_pool.py +++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py @@ -2,7 +2,9 @@ import contextlib import io import os +import subprocess import sys +import textwrap import time import unittest from concurrent.futures.interpreter import BrokenInterpreterPool @@ -457,6 +459,45 @@ def test_free_reference(self): # Weak references don't cross between interpreters. raise unittest.SkipTest('not applicable') + @support.requires_subprocess() + def test_import_interpreter_pool_executor(self): + # Test the import behavior normally if _interpreters is unavailable. + code = textwrap.dedent(""" + import sys + # Set it to None to emulate the case when _interpreter is unavailable. + sys.modules['_interpreters'] = None + from concurrent import futures + + try: + futures.InterpreterPoolExecutor + except AttributeError: + pass + else: + print('AttributeError not raised!', file=sys.stderr) + sys.exit(1) + + try: + from concurrent.futures import InterpreterPoolExecutor + except ImportError: + pass + else: + print('ImportError not raised!', file=sys.stderr) + sys.exit(1) + + from concurrent.futures import * + + if 'InterpreterPoolExecutor' in globals(): + print('InterpreterPoolExecutor should not be imported!', + file=sys.stderr) + sys.exit(1) + """) + + cmd = [sys.executable, '-c', code] + p = subprocess.run(cmd, capture_output=True) + self.assertEqual(p.returncode, 0, p.stderr.decode()) + self.assertEqual(p.stdout.decode(), '') + self.assertEqual(p.stderr.decode(), '') + class AsyncioTest(InterpretersMixin, testasyncio_utils.TestCase): diff --git a/Misc/NEWS.d/next/Library/2025-07-07-22-12-32.gh-issue-136380.1b_nXl.rst b/Misc/NEWS.d/next/Library/2025-07-07-22-12-32.gh-issue-136380.1b_nXl.rst new file mode 100644 index 00000000000000..4ac04b9c0a6c5c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-07-22-12-32.gh-issue-136380.1b_nXl.rst @@ -0,0 +1,3 @@ +Raises :exc:`AttributeError` when accessing +:class:`concurrent.futures.InterpreterPoolExecutor` and subinterpreters are +not available. From 6c81e8c57a1d291863c6beaa42392a7f1cf52854 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 8 Jul 2025 18:39:47 +0200 Subject: [PATCH 814/989] gh-136156: Allow using linkat() with TemporaryFile (#136281) tempfile.TemporaryFile() no longer uses os.O_EXCL with os.O_TMPFILE, so it's possible to use linkat() on the file descriptor. --- Lib/tempfile.py | 2 +- Lib/test/test_tempfile.py | 23 +++++++++++++++++++ ...-07-04-12-53-02.gh-issue-136156.OYlXoz.rst | 3 +++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-04-12-53-02.gh-issue-136156.OYlXoz.rst diff --git a/Lib/tempfile.py b/Lib/tempfile.py index 5e3ccab5f48502..53d14ff5c67131 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -656,7 +656,7 @@ def TemporaryFile(mode='w+b', buffering=-1, encoding=None, fd = None def opener(*args): nonlocal fd - flags2 = (flags | _os.O_TMPFILE) & ~_os.O_CREAT + flags2 = (flags | _os.O_TMPFILE) & ~_os.O_CREAT & ~_os.O_EXCL fd = _os.open(dir, flags2, 0o600) return fd try: diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index 52b13b98cbcce5..36151b016ea35b 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -1594,6 +1594,29 @@ def test_unexpected_error(self): mock_close.assert_called() self.assertEqual(os.listdir(dir), []) + @unittest.skipUnless(tempfile._O_TMPFILE_WORKS, 'need os.O_TMPFILE') + @unittest.skipUnless(os.path.exists('/proc/self/fd'), + 'need /proc/self/fd') + def test_link_tmpfile(self): + dir = tempfile.mkdtemp() + self.addCleanup(os_helper.rmtree, dir) + filename = os.path.join(dir, "link") + + with tempfile.TemporaryFile('w', dir=dir) as tmp: + # the flag can become False on Linux <= 3.11 + if not tempfile._O_TMPFILE_WORKS: + self.skipTest("O_TMPFILE doesn't work") + + tmp.write("hello") + tmp.flush() + fd = tmp.fileno() + + os.link(f'/proc/self/fd/{fd}', + filename, + follow_symlinks=True) + with open(filename) as fp: + self.assertEqual(fp.read(), "hello") + # Helper for test_del_on_shutdown class NulledModules: diff --git a/Misc/NEWS.d/next/Library/2025-07-04-12-53-02.gh-issue-136156.OYlXoz.rst b/Misc/NEWS.d/next/Library/2025-07-04-12-53-02.gh-issue-136156.OYlXoz.rst new file mode 100644 index 00000000000000..95606790e991a4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-04-12-53-02.gh-issue-136156.OYlXoz.rst @@ -0,0 +1,3 @@ +:func:`tempfile.TemporaryFile` no longer uses :data:`os.O_EXCL` with +:data:`os.O_TMPFILE`, so it's possible to use ``linkat()`` on the file +descriptor. Patch by Victor Stinner. From bc9bc078df302dd5cc9928c42c37f4f6d414c767 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 8 Jul 2025 20:11:48 +0300 Subject: [PATCH 815/989] Update bytecode magic number in tests for the 3.14 release candidate (#136427) --- Lib/test/test_importlib/test_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_importlib/test_util.py b/Lib/test/test_importlib/test_util.py index 5de89714eb50c7..6d6d5f96aab4a8 100644 --- a/Lib/test/test_importlib/test_util.py +++ b/Lib/test/test_importlib/test_util.py @@ -635,7 +635,7 @@ def test_magic_number(self): # stakeholders such as OS package maintainers must be notified # in advance. Such exceptional releases will then require an # adjustment to this test case. - EXPECTED_MAGIC_NUMBER = 3495 + EXPECTED_MAGIC_NUMBER = 3625 actual = int.from_bytes(importlib.util.MAGIC_NUMBER[:2], 'little') msg = ( From b6b99bf7f1edab77c485faf4e23da868f3a7b68c Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Jul 2025 12:19:57 -0700 Subject: [PATCH 816/989] GH-91636: Clear weakrefs created by finalizers. (GH-136401) Weakrefs to unreachable garbage that are created during running of finalizers need to be cleared. This avoids exposing objects that have `tp_clear` called on them to Python-level code. --- Lib/test/test_gc.py | 20 +++++++++------ ...5-07-07-17-26-06.gh-issue-91636.GyHU72.rst | 3 +++ Python/gc.c | 25 ++++++++++++++++--- Python/gc_free_threading.c | 20 ++++++++++++--- 4 files changed, 54 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b4cbfb6d774080..85c43055d0dcec 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -262,9 +262,11 @@ class Cyclic(tuple): # finalizer. def __del__(self): - # 5. Create a weakref to `func` now. If we had created - # it earlier, it would have been cleared by the - # garbage collector before calling the finalizers. + # 5. Create a weakref to `func` now. In previous + # versions of Python, this would avoid having it + # cleared by the garbage collector before calling + # the finalizers. Now, weakrefs get cleared after + # calling finalizers. self[1].ref = weakref.ref(self[0]) # 6. Drop the global reference to `latefin`. The only @@ -293,14 +295,18 @@ def func(): # which will find `cyc` and `func` as garbage. gc.collect() - # 9. Previously, this would crash because `func_qualname` - # had been NULL-ed out by func_clear(). + # 9. Previously, this would crash because the weakref + # created in the finalizer revealed the function after + # `tp_clear` was called and `func_qualname` + # had been NULL-ed out by func_clear(). Now, we clear + # weakrefs to unreachable objects before calling `tp_clear` + # but after calling finalizers. print(f"{func=}") """ - # We're mostly just checking that this doesn't crash. rc, stdout, stderr = assert_python_ok("-c", code) self.assertEqual(rc, 0) - self.assertRegex(stdout, rb"""\A\s*func=\s*\z""") + # The `func` global is None because the weakref was cleared. + self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) @refcount_test diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst new file mode 100644 index 00000000000000..09c192f9c5657e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst @@ -0,0 +1,3 @@ +While performing garbage collection, clear weakrefs to unreachable objects +that are created during running of finalizers. If those weakrefs were are +not cleared, they could reveal unreachable objects. diff --git a/Python/gc.c b/Python/gc.c index 02135a3fb442d6..88849a43680d2e 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -870,7 +870,7 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) * no object in `unreachable` is weakly referenced anymore. */ static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) +handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) { PyGC_Head *gc; PyObject *op; /* generally FROM_GC(gc) */ @@ -879,7 +879,9 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) PyGC_Head *next; int num_freed = 0; - gc_list_init(&wrcb_to_call); + if (allow_callbacks) { + gc_list_init(&wrcb_to_call); + } /* Clear all weakrefs to the objects in unreachable. If such a weakref * also has a callback, move it into `wrcb_to_call` if the callback @@ -935,6 +937,11 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); _PyWeakref_ClearRef(wr); _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + + if (!allow_callbacks) { + continue; + } + if (wr->wr_callback == NULL) { /* no callback */ continue; @@ -987,6 +994,10 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) } } + if (!allow_callbacks) { + return 0; + } + /* Invoke the callbacks we decided to honor. It's safe to invoke them * because they can't reference unreachable objects. */ @@ -1737,7 +1748,7 @@ gc_collect_region(PyThreadState *tstate, } /* Clear weakrefs and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable, to); + stats->collected += handle_weakrefs(&unreachable, to, true); gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); @@ -1751,6 +1762,14 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&final_unreachable); handle_resurrected_objects(&unreachable, &final_unreachable, to); + /* Clear weakrefs to objects in the unreachable set. No Python-level + * code must be allowed to access those unreachable objects. During + * delete_garbage(), finalizers outside the unreachable set might run + * and create new weakrefs. If those weakrefs were not cleared, they + * could reveal unreachable objects. Callbacks are not executed. + */ + handle_weakrefs(&final_unreachable, NULL, false); + /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 5aaa68c5b51f95..d46598b23b3b2f 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1492,9 +1492,9 @@ move_legacy_finalizer_reachable(struct collection_state *state) } // Clear all weakrefs to unreachable objects. Weakrefs with callbacks are -// enqueued in `wrcb_to_call`, but not invoked yet. +// optionally enqueued in `wrcb_to_call`, but not invoked yet. static void -clear_weakrefs(struct collection_state *state) +clear_weakrefs(struct collection_state *state, bool enqueue_callbacks) { PyObject *op; WORKSTACK_FOR_EACH(&state->unreachable, op) { @@ -1526,6 +1526,10 @@ clear_weakrefs(struct collection_state *state) _PyWeakref_ClearRef(wr); _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + if (!enqueue_callbacks) { + continue; + } + // We do not invoke callbacks for weakrefs that are themselves // unreachable. This is partly for historical reasons: weakrefs // predate safe object finalization, and a weakref that is itself @@ -2211,7 +2215,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, interp->gc.long_lived_total = state->long_lived_total; // Clear weakrefs and enqueue callbacks (but do not call them). - clear_weakrefs(state); + clear_weakrefs(state, true); _PyEval_StartTheWorld(interp); // Deallocate any object from the refcount merge step @@ -2222,11 +2226,19 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, call_weakref_callbacks(state); finalize_garbage(state); - // Handle any objects that may have resurrected after the finalization. _PyEval_StopTheWorld(interp); + // Handle any objects that may have resurrected after the finalization. err = handle_resurrected_objects(state); // Clear free lists in all threads _PyGC_ClearAllFreeLists(interp); + if (err == 0) { + // Clear weakrefs to objects in the unreachable set. No Python-level + // code must be allowed to access those unreachable objects. During + // delete_garbage(), finalizers outside the unreachable set might + // run and create new weakrefs. If those weakrefs were not cleared, + // they could reveal unreachable objects. + clear_weakrefs(state, false); + } _PyEval_StartTheWorld(interp); if (err < 0) { From ffd7f2f231f5543e6863c6c85e86f72233229771 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Tue, 8 Jul 2025 23:34:48 +0200 Subject: [PATCH 817/989] gh-136162: Document `encodings` package functions (#136164) Closes #136162. --- Doc/library/codecs.rst | 60 ++++++++++++++++++++++++++++++++++++++++++ Doc/whatsnew/3.9.rst | 2 +- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index b231fa568cf342..efe211cd2db6c8 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1484,6 +1484,66 @@ mapping. It is not supported by :meth:`str.encode` (which only produces Restoration of the ``rot13`` alias. +:mod:`encodings` --- Encodings package +-------------------------------------- + +.. module:: encodings + :synopsis: Encodings package + +This module implements the following functions: + +.. function:: normalize_encoding(encoding) + + Normalize encoding name *encoding*. + + Normalization works as follows: all non-alphanumeric characters except the + dot used for Python package names are collapsed and replaced with a single + underscore, leading and trailing underscores are removed. + For example, ``' -;#'`` becomes ``'_'``. + + Note that *encoding* should be ASCII only. + + +.. note:: + The following functions should not be used directly, except for testing + purposes; :func:`codecs.lookup` should be used instead. + + +.. function:: search_function(encoding) + + Search for the codec module corresponding to the given encoding name + *encoding*. + + This function first normalizes the *encoding* using + :func:`normalize_encoding`, then looks for a corresponding alias. + It attempts to import a codec module from the encodings package using either + the alias or the normalized name. If the module is found and defines a valid + ``getregentry()`` function that returns a :class:`codecs.CodecInfo` object, + the codec is cached and returned. + + If the codec module defines a ``getaliases()`` function any returned aliases + are registered for future use. + + +.. function:: win32_code_page_search_function(encoding) + + Search for a Windows code page encoding *encoding* of the form ``cpXXXX``. + + If the code page is valid and supported, return a :class:`codecs.CodecInfo` + object for it. + + .. availability:: Windows. + + .. versionadded:: 3.14 + + +This module implements the following exception: + +.. exception:: CodecRegistryError + + Raised when a codec is invalid or incompatible. + + :mod:`encodings.idna` --- Internationalized Domain Names in Applications ------------------------------------------------------------------------ diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 7fd9e6ac66e6c8..40d4a27bff9fee 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -1139,7 +1139,7 @@ Changes in the Python API (Contributed by Christian Heimes in :issue:`36384`). * :func:`codecs.lookup` now normalizes the encoding name the same way as - :func:`!encodings.normalize_encoding`, except that :func:`codecs.lookup` also + :func:`encodings.normalize_encoding`, except that :func:`codecs.lookup` also converts the name to lower case. For example, ``"latex+latin1"`` encoding name is now normalized to ``"latex_latin1"``. (Contributed by Jordon Xu in :issue:`37751`.) From a6566e49e63219b6dad6034fb046bd5b54f7af1a Mon Sep 17 00:00:00 2001 From: Arseniy Terekhin Date: Tue, 8 Jul 2025 14:49:55 -0700 Subject: [PATCH 818/989] gh-92536: Fix comment about number of unicode string types (#136439) --- Include/cpython/unicodeobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 86c502730f478c..73e3bc44d6c9ca 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -109,7 +109,7 @@ struct _PyUnicodeObject_state { immediately follow the structure. utf8_length can be found in the length field; the utf8 pointer is equal to the data pointer. */ typedef struct { - /* There are 4 forms of Unicode strings: + /* There are 3 forms of Unicode strings: - compact ascii: From 77d25e5b169f7c306d3a6d9ca6777c0a0be80d8f Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 9 Jul 2025 01:31:17 +0100 Subject: [PATCH 819/989] gh-91048: Revert the memory cache removal for remote debugging (#136440) gh-91048: Reintroduce the memory cache for remote debugging --- Modules/_remote_debugging_module.c | 10 ++++ Python/remote_debug.h | 78 ++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index ce7189637c2d69..b2d4d1ae650814 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -945,6 +945,10 @@ parse_coro_chain( return -1; } + if (name == NULL) { + return 0; + } + if (PyList_Append(render_to, name)) { Py_DECREF(name); set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to coro chain"); @@ -2762,6 +2766,7 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self } exit: + _Py_RemoteDebug_ClearCache(&self->handle); return result; } @@ -2885,9 +2890,11 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s goto result_err; } + _Py_RemoteDebug_ClearCache(&self->handle); return result; result_err: + _Py_RemoteDebug_ClearCache(&self->handle); Py_XDECREF(result); return NULL; } @@ -2954,9 +2961,11 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject goto cleanup; } + _Py_RemoteDebug_ClearCache(&self->handle); return result; cleanup: + _Py_RemoteDebug_ClearCache(&self->handle); Py_XDECREF(result); return NULL; } @@ -2982,6 +2991,7 @@ RemoteUnwinder_dealloc(PyObject *op) } #endif if (self->handle.pid != 0) { + _Py_RemoteDebug_ClearCache(&self->handle); _Py_RemoteDebug_CleanupProcHandle(&self->handle); } PyObject_Del(self); diff --git a/Python/remote_debug.h b/Python/remote_debug.h index d1fcb478d2b035..8f9b6cd4c4960f 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -110,6 +110,14 @@ get_page_size(void) { return page_size; } +typedef struct page_cache_entry { + uintptr_t page_addr; // page-aligned base address + char *data; + int valid; + struct page_cache_entry *next; +} page_cache_entry_t; + +#define MAX_PAGES 1024 // Define a platform-independent process handle structure typedef struct { @@ -121,9 +129,27 @@ typedef struct { #elif defined(__linux__) int memfd; #endif + page_cache_entry_t pages[MAX_PAGES]; Py_ssize_t page_size; } proc_handle_t; +static void +_Py_RemoteDebug_FreePageCache(proc_handle_t *handle) +{ + for (int i = 0; i < MAX_PAGES; i++) { + PyMem_RawFree(handle->pages[i].data); + handle->pages[i].data = NULL; + handle->pages[i].valid = 0; + } +} + +UNUSED static void +_Py_RemoteDebug_ClearCache(proc_handle_t *handle) +{ + for (int i = 0; i < MAX_PAGES; i++) { + handle->pages[i].valid = 0; + } +} #if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX static mach_port_t pid_to_task(pid_t pid); @@ -152,6 +178,10 @@ _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { handle->memfd = -1; #endif handle->page_size = get_page_size(); + for (int i = 0; i < MAX_PAGES; i++) { + handle->pages[i].data = NULL; + handle->pages[i].valid = 0; + } return 0; } @@ -170,6 +200,7 @@ _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) { } #endif handle->pid = 0; + _Py_RemoteDebug_FreePageCache(handle); } #if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX @@ -1035,6 +1066,53 @@ _Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle, size_t size, void *out) { + size_t page_size = handle->page_size; + uintptr_t page_base = addr & ~(page_size - 1); + size_t offset_in_page = addr - page_base; + + if (offset_in_page + size > page_size) { + return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out); + } + + // Search for valid cached page + for (int i = 0; i < MAX_PAGES; i++) { + page_cache_entry_t *entry = &handle->pages[i]; + if (entry->valid && entry->page_addr == page_base) { + memcpy(out, entry->data + offset_in_page, size); + return 0; + } + } + + // Find reusable slot + for (int i = 0; i < MAX_PAGES; i++) { + page_cache_entry_t *entry = &handle->pages[i]; + if (!entry->valid) { + if (entry->data == NULL) { + entry->data = PyMem_RawMalloc(page_size); + if (entry->data == NULL) { + _set_debug_exception_cause(PyExc_MemoryError, + "Cannot allocate %zu bytes for page cache entry " + "during read from PID %d at address 0x%lx", + page_size, handle->pid, addr); + return -1; + } + } + + if (_Py_RemoteDebug_ReadRemoteMemory(handle, page_base, page_size, entry->data) < 0) { + // Try to just copy the exact ammount as a fallback + PyErr_Clear(); + goto fallback; + } + + entry->page_addr = page_base; + entry->valid = 1; + memcpy(out, entry->data + offset_in_page, size); + return 0; + } + } + +fallback: + // Cache full — fallback to uncached read return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out); } From 6ea425828540d7a19296183c3410283897767d9a Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 9 Jul 2025 04:26:41 +0200 Subject: [PATCH 820/989] gh-136229: Remove Platform Emscripten is not supported warning (#136230) Updates configure script to identify Emscripten as Tier 3. --- configure | 2 ++ configure.ac | 43 ++++++++++++++++++++++--------------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/configure b/configure index 4a0f8959c8727d..94a0b810333ce9 100755 --- a/configure +++ b/configure @@ -7254,6 +7254,8 @@ case $host/$ac_cv_cc_name in #( PY_SUPPORT_TIER=3 ;; #( x86_64-*-linux-android/clang) : PY_SUPPORT_TIER=3 ;; #( + wasm32-*-emscripten/emcc) : + PY_SUPPORT_TIER=3 ;; #( *) : PY_SUPPORT_TIER=0 diff --git a/configure.ac b/configure.ac index 10d7a0c6056107..ade71bc011eb87 100644 --- a/configure.ac +++ b/configure.ac @@ -1208,27 +1208,28 @@ dnl NOTE: Windows support tiers are defined in PC/pyconfig.h. dnl AC_MSG_CHECKING([for PEP 11 support tier]) AS_CASE([$host/$ac_cv_cc_name], - [x86_64-*-linux-gnu/gcc], [PY_SUPPORT_TIER=1], dnl Linux on AMD64, any vendor, glibc, gcc - [x86_64-apple-darwin*/clang], [PY_SUPPORT_TIER=1], dnl macOS on Intel, any version - [aarch64-apple-darwin*/clang], [PY_SUPPORT_TIER=1], dnl macOS on M1, any version - [i686-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=1], dnl 32bit Windows on Intel, MSVC - [x86_64-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=1], dnl 64bit Windows on AMD64, MSVC - - [aarch64-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux ARM64, glibc, gcc+clang - [aarch64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], - [powerpc64le-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux on PPC64 little endian, glibc, gcc - [wasm32-unknown-wasip1/clang], [PY_SUPPORT_TIER=2], dnl WebAssembly System Interface preview1, clang - [x86_64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], dnl Linux on AMD64, any vendor, glibc, clang - - [aarch64-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=3], dnl Windows ARM64, MSVC - [armv7l-*-linux-gnueabihf/gcc], [PY_SUPPORT_TIER=3], dnl ARMv7 LE with hardware floats, any vendor, glibc, gcc - [powerpc64le-*-linux-gnu/clang], [PY_SUPPORT_TIER=3], dnl Linux on PPC64 little endian, glibc, clang - [s390x-*-linux-gnu/gcc], [PY_SUPPORT_TIER=3], dnl Linux on 64bit s390x (big endian), glibc, gcc - [x86_64-*-freebsd*/clang], [PY_SUPPORT_TIER=3], dnl FreeBSD on AMD64 - [aarch64-apple-ios*-simulator/clang], [PY_SUPPORT_TIER=3], dnl iOS Simulator on arm64 - [aarch64-apple-ios*/clang], [PY_SUPPORT_TIER=3], dnl iOS on ARM64 - [aarch64-*-linux-android/clang], [PY_SUPPORT_TIER=3], dnl Android on ARM64 - [x86_64-*-linux-android/clang], [PY_SUPPORT_TIER=3], dnl Android on AMD64 + [x86_64-*-linux-gnu/gcc], [PY_SUPPORT_TIER=1], dnl Linux on AMD64, any vendor, glibc, gcc + [x86_64-apple-darwin*/clang], [PY_SUPPORT_TIER=1], dnl macOS on Intel, any version + [aarch64-apple-darwin*/clang], [PY_SUPPORT_TIER=1], dnl macOS on M1, any version + [i686-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=1], dnl 32bit Windows on Intel, MSVC + [x86_64-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=1], dnl 64bit Windows on AMD64, MSVC + + [aarch64-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux ARM64, glibc, gcc+clang + [aarch64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], + [powerpc64le-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux on PPC64 little endian, glibc, gcc + [wasm32-unknown-wasip1/clang], [PY_SUPPORT_TIER=2], dnl WebAssembly System Interface preview1, clang + [x86_64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], dnl Linux on AMD64, any vendor, glibc, clang + + [aarch64-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=3], dnl Windows ARM64, MSVC + [armv7l-*-linux-gnueabihf/gcc], [PY_SUPPORT_TIER=3], dnl ARMv7 LE with hardware floats, any vendor, glibc, gcc + [powerpc64le-*-linux-gnu/clang], [PY_SUPPORT_TIER=3], dnl Linux on PPC64 little endian, glibc, clang + [s390x-*-linux-gnu/gcc], [PY_SUPPORT_TIER=3], dnl Linux on 64bit s390x (big endian), glibc, gcc + [x86_64-*-freebsd*/clang], [PY_SUPPORT_TIER=3], dnl FreeBSD on AMD64 + [aarch64-apple-ios*-simulator/clang], [PY_SUPPORT_TIER=3], dnl iOS Simulator on arm64 + [aarch64-apple-ios*/clang], [PY_SUPPORT_TIER=3], dnl iOS on ARM64 + [aarch64-*-linux-android/clang], [PY_SUPPORT_TIER=3], dnl Android on ARM64 + [x86_64-*-linux-android/clang], [PY_SUPPORT_TIER=3], dnl Android on AMD64 + [wasm32-*-emscripten/emcc], [PY_SUPPORT_TIER=3], dnl Emscripten [PY_SUPPORT_TIER=0] ) From f9932f542edd39c7e0eebd0f4468095c78db9651 Mon Sep 17 00:00:00 2001 From: dgpb <3577712+dg-pb@users.noreply.github.com> Date: Wed, 9 Jul 2025 10:25:45 +0300 Subject: [PATCH 821/989] gh-119109: improve `functools.partial` vectorcall with keywords (#124584) Co-authored-by: Kumar Aditya Co-authored-by: Serhiy Storchaka --- ...-10-17-01-12-22.gh-issue-119109.u4hcvb.rst | 1 + Modules/_functoolsmodule.c | 192 +++++++++++++----- 2 files changed, 140 insertions(+), 53 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-17-01-12-22.gh-issue-119109.u4hcvb.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-17-01-12-22.gh-issue-119109.u4hcvb.rst b/Misc/NEWS.d/next/Library/2024-10-17-01-12-22.gh-issue-119109.u4hcvb.rst new file mode 100644 index 00000000000000..782b2eb8d74c8e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-17-01-12-22.gh-issue-119109.u4hcvb.rst @@ -0,0 +1 @@ +:func:`functools.partial` calls are now faster when keyword arguments are used. diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index d3dabd58b89c70..1c888295cb07f1 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -367,19 +367,6 @@ partial_descr_get(PyObject *self, PyObject *obj, PyObject *type) return PyMethod_New(self, obj); } -/* Merging keyword arguments using the vectorcall convention is messy, so - * if we would need to do that, we stop using vectorcall and fall back - * to using partial_call() instead. */ -Py_NO_INLINE static PyObject * -partial_vectorcall_fallback(PyThreadState *tstate, partialobject *pto, - PyObject *const *args, size_t nargsf, - PyObject *kwnames) -{ - pto->vectorcall = NULL; - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); - return _PyObject_MakeTpCall(tstate, (PyObject *)pto, args, nargs, kwnames); -} - static PyObject * partial_vectorcall(PyObject *self, PyObject *const *args, size_t nargsf, PyObject *kwnames) @@ -388,10 +375,7 @@ partial_vectorcall(PyObject *self, PyObject *const *args, PyThreadState *tstate = _PyThreadState_GET(); Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); - /* pto->kw is mutable, so need to check every time */ - if (PyDict_GET_SIZE(pto->kw)) { - return partial_vectorcall_fallback(tstate, pto, args, nargsf, kwnames); - } + /* Placeholder check */ Py_ssize_t pto_phcount = pto->phcount; if (nargs < pto_phcount) { PyErr_Format(PyExc_TypeError, @@ -400,50 +384,143 @@ partial_vectorcall(PyObject *self, PyObject *const *args, return NULL; } - Py_ssize_t nargskw = nargs; - if (kwnames != NULL) { - nargskw += PyTuple_GET_SIZE(kwnames); - } - PyObject **pto_args = _PyTuple_ITEMS(pto->args); Py_ssize_t pto_nargs = PyTuple_GET_SIZE(pto->args); + Py_ssize_t pto_nkwds = PyDict_GET_SIZE(pto->kw); + Py_ssize_t nkwds = kwnames == NULL ? 0 : PyTuple_GET_SIZE(kwnames); + Py_ssize_t nargskw = nargs + nkwds; + + /* Special cases */ + if (!pto_nkwds) { + /* Fast path if we're called without arguments */ + if (nargskw == 0) { + return _PyObject_VectorcallTstate(tstate, pto->fn, pto_args, + pto_nargs, NULL); + } - /* Fast path if we're called without arguments */ - if (nargskw == 0) { - return _PyObject_VectorcallTstate(tstate, pto->fn, - pto_args, pto_nargs, NULL); + /* Use PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single + * positional argument. */ + if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) { + PyObject **newargs = (PyObject **)args - 1; + PyObject *tmp = newargs[0]; + newargs[0] = pto_args[0]; + PyObject *ret = _PyObject_VectorcallTstate(tstate, pto->fn, newargs, + nargs + 1, kwnames); + newargs[0] = tmp; + return ret; + } } - /* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single - * positional argument */ - if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) { - PyObject **newargs = (PyObject **)args - 1; - PyObject *tmp = newargs[0]; - newargs[0] = pto_args[0]; - PyObject *ret = _PyObject_VectorcallTstate(tstate, pto->fn, - newargs, nargs + 1, kwnames); - newargs[0] = tmp; - return ret; - } + /* Total sizes */ + Py_ssize_t tot_nargs = pto_nargs + nargs - pto_phcount; + Py_ssize_t tot_nkwds = pto_nkwds + nkwds; + Py_ssize_t tot_nargskw = tot_nargs + tot_nkwds; - PyObject *small_stack[_PY_FASTCALL_SMALL_STACK]; - PyObject **stack; + PyObject *pto_kw_merged = NULL; // pto_kw with duplicates merged (if any) + PyObject *tot_kwnames; - Py_ssize_t tot_nargskw = pto_nargs + nargskw - pto_phcount; - if (tot_nargskw <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) { + /* Allocate Stack + * Note, _PY_FASTCALL_SMALL_STACK is optimal for positional only + * This case might have keyword arguments + * furthermore, it might use extra stack space for temporary key storage + * thus, double small_stack size is used, which is 10 * 8 = 80 bytes */ + PyObject *small_stack[_PY_FASTCALL_SMALL_STACK * 2]; + PyObject **tmp_stack, **stack; + Py_ssize_t init_stack_size = tot_nargskw; + if (pto_nkwds) { + // If pto_nkwds, allocate additional space for temporary new keys + init_stack_size += nkwds; + } + if (init_stack_size <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) { stack = small_stack; } else { - stack = PyMem_Malloc(tot_nargskw * sizeof(PyObject *)); + stack = PyMem_Malloc(init_stack_size * sizeof(PyObject *)); if (stack == NULL) { - PyErr_NoMemory(); - return NULL; + return PyErr_NoMemory(); } } - Py_ssize_t tot_nargs; + /* Copy keywords to stack */ + if (!pto_nkwds) { + tot_kwnames = kwnames; + if (nkwds) { + /* if !pto_nkwds & nkwds, then simply append kw */ + memcpy(stack + tot_nargs, args + nargs, nkwds * sizeof(PyObject*)); + } + } + else { + /* stack is now [, , , ] + * Will resize later to [, ] */ + PyObject *key, *val; + + /* Merge kw to pto_kw or add to tail (if not duplicate) */ + Py_ssize_t n_tail = 0; + for (Py_ssize_t i = 0; i < nkwds; ++i) { + key = PyTuple_GET_ITEM(kwnames, i); + val = args[nargs + i]; + if (PyDict_Contains(pto->kw, key)) { + if (pto_kw_merged == NULL) { + pto_kw_merged = PyDict_Copy(pto->kw); + if (pto_kw_merged == NULL) { + goto error; + } + } + if (PyDict_SetItem(pto_kw_merged, key, val) < 0) { + Py_DECREF(pto_kw_merged); + goto error; + } + } + else { + /* Copy keyword tail to stack */ + stack[tot_nargs + pto_nkwds + n_tail] = val; + stack[tot_nargskw + n_tail] = key; + n_tail++; + } + } + Py_ssize_t n_merges = nkwds - n_tail; + + /* Create total kwnames */ + tot_kwnames = PyTuple_New(tot_nkwds - n_merges); + if (tot_kwnames == NULL) { + Py_XDECREF(pto_kw_merged); + goto error; + } + for (Py_ssize_t i = 0; i < n_tail; ++i) { + key = Py_NewRef(stack[tot_nargskw + i]); + PyTuple_SET_ITEM(tot_kwnames, pto_nkwds + i, key); + } + + /* Copy pto_keywords with overlapping call keywords merged + * Note, tail is already coppied. */ + Py_ssize_t pos = 0, i = 0; + while (PyDict_Next(n_merges ? pto_kw_merged : pto->kw, &pos, &key, &val)) { + assert(i < pto_nkwds); + PyTuple_SET_ITEM(tot_kwnames, i, Py_NewRef(key)); + stack[tot_nargs + i] = val; + i++; + } + assert(i == pto_nkwds); + Py_XDECREF(pto_kw_merged); + + /* Resize Stack if the removing overallocation saves some noticable memory + * NOTE: This whole block can be removed without breaking anything */ + Py_ssize_t noveralloc = n_merges + nkwds; + if (stack != small_stack && noveralloc > 6 && noveralloc > init_stack_size / 10) { + tmp_stack = PyMem_Realloc(stack, (tot_nargskw - n_merges) * sizeof(PyObject *)); + if (tmp_stack == NULL) { + Py_DECREF(tot_kwnames); + if (stack != small_stack) { + PyMem_Free(stack); + } + return PyErr_NoMemory(); + } + stack = tmp_stack; + } + } + + /* Copy Positionals to stack */ if (pto_phcount) { - tot_nargs = pto_nargs + nargs - pto_phcount; Py_ssize_t j = 0; // New args index for (Py_ssize_t i = 0; i < pto_nargs; i++) { if (pto_args[i] == pto->placeholder) { @@ -455,22 +532,31 @@ partial_vectorcall(PyObject *self, PyObject *const *args, } } assert(j == pto_phcount); - if (nargskw > pto_phcount) { - memcpy(stack + pto_nargs, args + j, (nargskw - j) * sizeof(PyObject*)); + /* Add remaining args from new_args */ + if (nargs > pto_phcount) { + memcpy(stack + pto_nargs, args + j, (nargs - j) * sizeof(PyObject*)); } } else { - tot_nargs = pto_nargs + nargs; - /* Copy to new stack, using borrowed references */ memcpy(stack, pto_args, pto_nargs * sizeof(PyObject*)); - memcpy(stack + pto_nargs, args, nargskw * sizeof(PyObject*)); + memcpy(stack + pto_nargs, args, nargs * sizeof(PyObject*)); } - PyObject *ret = _PyObject_VectorcallTstate(tstate, pto->fn, - stack, tot_nargs, kwnames); + + PyObject *ret = _PyObject_VectorcallTstate(tstate, pto->fn, stack, + tot_nargs, tot_kwnames); if (stack != small_stack) { PyMem_Free(stack); } + if (pto_nkwds) { + Py_DECREF(tot_kwnames); + } return ret; + + error: + if (stack != small_stack) { + PyMem_Free(stack); + } + return NULL; } /* Set pto->vectorcall depending on the parameters of the partial object */ From 301b29dd306030ad4b02909d822c49cc8e05ca7d Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Wed, 9 Jul 2025 08:30:56 +0100 Subject: [PATCH 822/989] gh-94503: Update logging cookbook example with info on addressing log injection. (GH-136446) Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- Doc/howto/logging-cookbook.rst | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index ae2697fbce30ad..52537a91df542c 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -4140,6 +4140,42 @@ The script, when run, prints something like: 2025-07-02 13:54:47,234 DEBUG fool me ... 2025-07-02 13:54:47,234 DEBUG can't get fooled again +If, on the other hand, you are concerned about `log injection +`_, you can use a +formatter which escapes newlines, as per the following example: + +.. code-block:: python + + import logging + + logger = logging.getLogger(__name__) + + class EscapingFormatter(logging.Formatter): + def format(self, record): + s = super().format(record) + return s.replace('\n', r'\n') + + if __name__ == '__main__': + h = logging.StreamHandler() + h.setFormatter(EscapingFormatter('%(asctime)s %(levelname)-9s %(message)s')) + logging.basicConfig(level=logging.DEBUG, handlers = [h]) + logger.debug('Single line') + logger.debug('Multiple lines:\nfool me once ...') + logger.debug('Another single line') + logger.debug('Multiple lines:\n%s', 'fool me ...\ncan\'t get fooled again') + +You can, of course, use whatever escaping scheme makes the most sense for you. +The script, when run, should produce output like this: + +.. code-block:: text + + 2025-07-09 06:47:33,783 DEBUG Single line + 2025-07-09 06:47:33,783 DEBUG Multiple lines:\nfool me once ... + 2025-07-09 06:47:33,783 DEBUG Another single line + 2025-07-09 06:47:33,783 DEBUG Multiple lines:\nfool me ...\ncan't get fooled again + +Escaping behaviour can't be the stdlib default , as it would break backwards +compatibility. .. patterns-to-avoid: From f1dcf3c7bf90961b8d5475154d3f28cfef0a054f Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Wed, 9 Jul 2025 09:39:55 +0200 Subject: [PATCH 823/989] gh-53243: Document `codecs.readbuffer_encode()` (#136284) Closes #53243 --- Doc/library/codecs.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index efe211cd2db6c8..0e84f18dd4d5d5 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -265,6 +265,20 @@ wider range of codecs when working with binary files: :func:`iterencode`. +.. function:: readbuffer_encode(buffer, errors=None, /) + + Return a :class:`tuple` containing the raw bytes of *buffer*, a + :ref:`buffer-compatible object ` or :class:`str` + (encoded to UTF-8 before processing), and their length in bytes. + + The *errors* argument is ignored. + + .. code-block:: pycon + + >>> codecs.readbuffer_encode(b"Zito") + (b'Zito', 4) + + The module also provides the following constants which are useful for reading and writing to platform dependent files: From 797abd1f7fdeb744bf9f683ef844e7279aad3d72 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Wed, 9 Jul 2025 00:55:46 -0700 Subject: [PATCH 824/989] gh-134657: Remove newly added private names from asyncio.__all__ (#134665) --- Lib/asyncio/__init__.py | 29 ++++++++++------ Lib/asyncio/events.py | 8 ++--- Lib/asyncio/unix_events.py | 1 - Lib/test/libregrtest/save_env.py | 2 +- Lib/test/test_asyncgen.py | 2 +- Lib/test/test_asyncio/test_base_events.py | 2 +- Lib/test/test_asyncio/test_buffered_proto.py | 2 +- Lib/test/test_asyncio/test_context.py | 2 +- .../test_asyncio/test_eager_task_factory.py | 2 +- Lib/test/test_asyncio/test_events.py | 34 +++++++++---------- Lib/test/test_asyncio/test_free_threading.py | 2 +- Lib/test/test_asyncio/test_futures.py | 2 +- Lib/test/test_asyncio/test_futures2.py | 2 +- Lib/test/test_asyncio/test_graph.py | 2 +- Lib/test/test_asyncio/test_locks.py | 2 +- Lib/test/test_asyncio/test_pep492.py | 2 +- Lib/test/test_asyncio/test_proactor_events.py | 2 +- Lib/test/test_asyncio/test_protocols.py | 2 +- Lib/test/test_asyncio/test_queues.py | 2 +- Lib/test/test_asyncio/test_runners.py | 16 ++++----- Lib/test/test_asyncio/test_selector_events.py | 2 +- Lib/test/test_asyncio/test_sendfile.py | 2 +- Lib/test/test_asyncio/test_server.py | 2 +- Lib/test/test_asyncio/test_sock_lowlevel.py | 2 +- Lib/test/test_asyncio/test_ssl.py | 2 +- Lib/test/test_asyncio/test_sslproto.py | 2 +- Lib/test/test_asyncio/test_staggered.py | 2 +- Lib/test/test_asyncio/test_streams.py | 2 +- Lib/test/test_asyncio/test_subprocess.py | 2 +- Lib/test/test_asyncio/test_taskgroups.py | 2 +- Lib/test/test_asyncio/test_tasks.py | 2 +- Lib/test/test_asyncio/test_threads.py | 2 +- Lib/test/test_asyncio/test_timeouts.py | 2 +- Lib/test/test_asyncio/test_transports.py | 2 +- Lib/test/test_asyncio/test_unix_events.py | 2 +- Lib/test/test_asyncio/test_waitfor.py | 2 +- Lib/test/test_asyncio/test_windows_events.py | 14 ++++---- Lib/test/test_asyncio/test_windows_utils.py | 2 +- Lib/test/test_asyncio/utils.py | 6 ++++ .../test_interpreter_pool.py | 2 +- Lib/test/test_coroutines.py | 2 +- Lib/test/test_inspect/test_inspect.py | 2 +- Lib/test/test_logging.py | 6 ++-- Lib/test/test_os.py | 2 +- Lib/test/test_unittest/test_async_case.py | 6 ++-- Lib/test/test_unittest/testmock/testasync.py | 2 +- ...-05-25-11-02-05.gh-issue-134657.3YFhR9.rst | 1 + 47 files changed, 105 insertions(+), 90 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-25-11-02-05.gh-issue-134657.3YFhR9.rst diff --git a/Lib/asyncio/__init__.py b/Lib/asyncio/__init__.py index 4be7112fa017d4..32a5dbae03af21 100644 --- a/Lib/asyncio/__init__.py +++ b/Lib/asyncio/__init__.py @@ -51,15 +51,24 @@ def __getattr__(name: str): import warnings - deprecated = { - "AbstractEventLoopPolicy", - "DefaultEventLoopPolicy", - "WindowsSelectorEventLoopPolicy", - "WindowsProactorEventLoopPolicy", - } - if name in deprecated: - warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) - # deprecated things have underscores in front of them - return globals()["_" + name] + match name: + case "AbstractEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return events._AbstractEventLoopPolicy + case "DefaultEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + if sys.platform == 'win32': + return windows_events._DefaultEventLoopPolicy + return unix_events._DefaultEventLoopPolicy + case "WindowsSelectorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsSelectorEventLoopPolicy + # Else fall through to the AttributeError below. + case "WindowsProactorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsProactorEventLoopPolicy + # Else fall through to the AttributeError below. raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/asyncio/events.py b/Lib/asyncio/events.py index 2913f901dca65f..a7fb55982abe9c 100644 --- a/Lib/asyncio/events.py +++ b/Lib/asyncio/events.py @@ -5,14 +5,11 @@ # SPDX-FileCopyrightText: Copyright (c) 2015-2021 MagicStack Inc. http://magic.io __all__ = ( - "_AbstractEventLoopPolicy", "AbstractEventLoop", "AbstractServer", "Handle", "TimerHandle", - "_get_event_loop_policy", "get_event_loop_policy", - "_set_event_loop_policy", "set_event_loop_policy", "get_event_loop", "set_event_loop", @@ -791,7 +788,10 @@ def _init_event_loop_policy(): global _event_loop_policy with _lock: if _event_loop_policy is None: # pragma: no branch - from . import _DefaultEventLoopPolicy + if sys.platform == 'win32': + from .windows_events import _DefaultEventLoopPolicy + else: + from .unix_events import _DefaultEventLoopPolicy _event_loop_policy = _DefaultEventLoopPolicy() diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py index f69c6a64c39ae6..1c1458127db5ac 100644 --- a/Lib/asyncio/unix_events.py +++ b/Lib/asyncio/unix_events.py @@ -28,7 +28,6 @@ __all__ = ( 'SelectorEventLoop', - '_DefaultEventLoopPolicy', 'EventLoop', ) diff --git a/Lib/test/libregrtest/save_env.py b/Lib/test/libregrtest/save_env.py index ffc29fa8dc686a..4cf1a075b30013 100644 --- a/Lib/test/libregrtest/save_env.py +++ b/Lib/test/libregrtest/save_env.py @@ -97,7 +97,7 @@ def get_asyncio_events__event_loop_policy(self): return support.maybe_get_event_loop_policy() def restore_asyncio_events__event_loop_policy(self, policy): asyncio = self.get_module('asyncio') - asyncio._set_event_loop_policy(policy) + asyncio.events._set_event_loop_policy(policy) def get_sys_argv(self): return id(sys.argv), sys.argv, sys.argv[:] diff --git a/Lib/test/test_asyncgen.py b/Lib/test/test_asyncgen.py index 636cb33dd9884a..cd33878d6c74ba 100644 --- a/Lib/test/test_asyncgen.py +++ b/Lib/test/test_asyncgen.py @@ -629,7 +629,7 @@ def setUp(self): def tearDown(self): self.loop.close() self.loop = None - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) def check_async_iterator_anext(self, ait_class): with self.subTest(anext="pure-Python"): diff --git a/Lib/test/test_asyncio/test_base_events.py b/Lib/test/test_asyncio/test_base_events.py index 22ae0ef3581156..8c02de77c24740 100644 --- a/Lib/test/test_asyncio/test_base_events.py +++ b/Lib/test/test_asyncio/test_base_events.py @@ -29,7 +29,7 @@ class CustomError(Exception): def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) def mock_socket_module(): diff --git a/Lib/test/test_asyncio/test_buffered_proto.py b/Lib/test/test_asyncio/test_buffered_proto.py index 9c386dd2e63815..6d3edcc36f5c79 100644 --- a/Lib/test/test_asyncio/test_buffered_proto.py +++ b/Lib/test/test_asyncio/test_buffered_proto.py @@ -5,7 +5,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class ReceiveStuffProto(asyncio.BufferedProtocol): diff --git a/Lib/test/test_asyncio/test_context.py b/Lib/test/test_asyncio/test_context.py index ad394f44e7e5f6..f85f39839cbd79 100644 --- a/Lib/test/test_asyncio/test_context.py +++ b/Lib/test/test_asyncio/test_context.py @@ -4,7 +4,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) @unittest.skipUnless(decimal.HAVE_CONTEXTVAR, "decimal is built with a thread-local context") diff --git a/Lib/test/test_asyncio/test_eager_task_factory.py b/Lib/test/test_asyncio/test_eager_task_factory.py index 9f3b6f9acef3b6..da79ee9260a0e3 100644 --- a/Lib/test/test_asyncio/test_eager_task_factory.py +++ b/Lib/test/test_asyncio/test_eager_task_factory.py @@ -13,7 +13,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class EagerTaskFactoryLoopTests: diff --git a/Lib/test/test_asyncio/test_events.py b/Lib/test/test_asyncio/test_events.py index 873c503fa02b21..919d543b0329e9 100644 --- a/Lib/test/test_asyncio/test_events.py +++ b/Lib/test/test_asyncio/test_events.py @@ -38,7 +38,7 @@ from test.support import ALWAYS_EQ, LARGEST, SMALLEST def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) def broken_unix_getsockname(): @@ -2843,13 +2843,13 @@ def test_default_event_loop_policy_deprecation(self): self.assertIsInstance(policy, asyncio.DefaultEventLoopPolicy) def test_event_loop_policy(self): - policy = asyncio._AbstractEventLoopPolicy() + policy = asyncio.events._AbstractEventLoopPolicy() self.assertRaises(NotImplementedError, policy.get_event_loop) self.assertRaises(NotImplementedError, policy.set_event_loop, object()) self.assertRaises(NotImplementedError, policy.new_event_loop) def test_get_event_loop(self): - policy = asyncio._DefaultEventLoopPolicy() + policy = test_utils.DefaultEventLoopPolicy() self.assertIsNone(policy._local._loop) with self.assertRaises(RuntimeError): @@ -2857,7 +2857,7 @@ def test_get_event_loop(self): self.assertIsNone(policy._local._loop) def test_get_event_loop_does_not_call_set_event_loop(self): - policy = asyncio._DefaultEventLoopPolicy() + policy = test_utils.DefaultEventLoopPolicy() with mock.patch.object( policy, "set_event_loop", @@ -2869,7 +2869,7 @@ def test_get_event_loop_does_not_call_set_event_loop(self): m_set_event_loop.assert_not_called() def test_get_event_loop_after_set_none(self): - policy = asyncio._DefaultEventLoopPolicy() + policy = test_utils.DefaultEventLoopPolicy() policy.set_event_loop(None) self.assertRaises(RuntimeError, policy.get_event_loop) @@ -2877,7 +2877,7 @@ def test_get_event_loop_after_set_none(self): def test_get_event_loop_thread(self, m_current_thread): def f(): - policy = asyncio._DefaultEventLoopPolicy() + policy = test_utils.DefaultEventLoopPolicy() self.assertRaises(RuntimeError, policy.get_event_loop) th = threading.Thread(target=f) @@ -2885,14 +2885,14 @@ def f(): th.join() def test_new_event_loop(self): - policy = asyncio._DefaultEventLoopPolicy() + policy = test_utils.DefaultEventLoopPolicy() loop = policy.new_event_loop() self.assertIsInstance(loop, asyncio.AbstractEventLoop) loop.close() def test_set_event_loop(self): - policy = asyncio._DefaultEventLoopPolicy() + policy = test_utils.DefaultEventLoopPolicy() old_loop = policy.new_event_loop() policy.set_event_loop(old_loop) @@ -2909,7 +2909,7 @@ def test_get_event_loop_policy(self): with self.assertWarnsRegex( DeprecationWarning, "'asyncio.get_event_loop_policy' is deprecated"): policy = asyncio.get_event_loop_policy() - self.assertIsInstance(policy, asyncio._AbstractEventLoopPolicy) + self.assertIsInstance(policy, asyncio.events._AbstractEventLoopPolicy) self.assertIs(policy, asyncio.get_event_loop_policy()) def test_set_event_loop_policy(self): @@ -2922,7 +2922,7 @@ def test_set_event_loop_policy(self): DeprecationWarning, "'asyncio.get_event_loop_policy' is deprecated"): old_policy = asyncio.get_event_loop_policy() - policy = asyncio._DefaultEventLoopPolicy() + policy = test_utils.DefaultEventLoopPolicy() with self.assertWarnsRegex( DeprecationWarning, "'asyncio.set_event_loop_policy' is deprecated"): asyncio.set_event_loop_policy(policy) @@ -3034,13 +3034,13 @@ def test_get_event_loop_returns_running_loop(self): class TestError(Exception): pass - class Policy(asyncio._DefaultEventLoopPolicy): + class Policy(test_utils.DefaultEventLoopPolicy): def get_event_loop(self): raise TestError - old_policy = asyncio._get_event_loop_policy() + old_policy = asyncio.events._get_event_loop_policy() try: - asyncio._set_event_loop_policy(Policy()) + asyncio.events._set_event_loop_policy(Policy()) loop = asyncio.new_event_loop() with self.assertRaises(TestError): @@ -3068,7 +3068,7 @@ async def func(): asyncio.get_event_loop() finally: - asyncio._set_event_loop_policy(old_policy) + asyncio.events._set_event_loop_policy(old_policy) if loop is not None: loop.close() @@ -3078,9 +3078,9 @@ async def func(): self.assertIs(asyncio._get_running_loop(), None) def test_get_event_loop_returns_running_loop2(self): - old_policy = asyncio._get_event_loop_policy() + old_policy = asyncio.events._get_event_loop_policy() try: - asyncio._set_event_loop_policy(asyncio._DefaultEventLoopPolicy()) + asyncio.events._set_event_loop_policy(test_utils.DefaultEventLoopPolicy()) loop = asyncio.new_event_loop() self.addCleanup(loop.close) @@ -3106,7 +3106,7 @@ async def func(): asyncio.get_event_loop() finally: - asyncio._set_event_loop_policy(old_policy) + asyncio.events._set_event_loop_policy(old_policy) if loop is not None: loop.close() diff --git a/Lib/test/test_asyncio/test_free_threading.py b/Lib/test/test_asyncio/test_free_threading.py index 110996c348554c..d874ed00bd7e7a 100644 --- a/Lib/test/test_asyncio/test_free_threading.py +++ b/Lib/test/test_asyncio/test_free_threading.py @@ -15,7 +15,7 @@ class MyException(Exception): def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class TestFreeThreading: diff --git a/Lib/test/test_asyncio/test_futures.py b/Lib/test/test_asyncio/test_futures.py index 39bef465bdb717..666f9c9ee18783 100644 --- a/Lib/test/test_asyncio/test_futures.py +++ b/Lib/test/test_asyncio/test_futures.py @@ -17,7 +17,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) def _fakefunc(f): diff --git a/Lib/test/test_asyncio/test_futures2.py b/Lib/test/test_asyncio/test_futures2.py index e2cddea01ecd93..c7c0ebdac1b676 100644 --- a/Lib/test/test_asyncio/test_futures2.py +++ b/Lib/test/test_asyncio/test_futures2.py @@ -7,7 +7,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class FutureTests: diff --git a/Lib/test/test_asyncio/test_graph.py b/Lib/test/test_asyncio/test_graph.py index 62f6593c31d2d1..2f22fbccba42bc 100644 --- a/Lib/test/test_asyncio/test_graph.py +++ b/Lib/test/test_asyncio/test_graph.py @@ -5,7 +5,7 @@ # To prevent a warning "test altered the execution environment" def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) def capture_test_stack(*, fut=None, depth=1): diff --git a/Lib/test/test_asyncio/test_locks.py b/Lib/test/test_asyncio/test_locks.py index 047f03cbb14b56..e025d2990a3f8a 100644 --- a/Lib/test/test_asyncio/test_locks.py +++ b/Lib/test/test_asyncio/test_locks.py @@ -20,7 +20,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class LockTests(unittest.IsolatedAsyncioTestCase): diff --git a/Lib/test/test_asyncio/test_pep492.py b/Lib/test/test_asyncio/test_pep492.py index 48f4a75e0fd56c..a0c8434c9457d2 100644 --- a/Lib/test/test_asyncio/test_pep492.py +++ b/Lib/test/test_asyncio/test_pep492.py @@ -11,7 +11,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) # Test that asyncio.iscoroutine() uses collections.abc.Coroutine diff --git a/Lib/test/test_asyncio/test_proactor_events.py b/Lib/test/test_asyncio/test_proactor_events.py index 24c4e8546b17aa..b25daaface0807 100644 --- a/Lib/test/test_asyncio/test_proactor_events.py +++ b/Lib/test/test_asyncio/test_proactor_events.py @@ -18,7 +18,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) def close_transport(transport): diff --git a/Lib/test/test_asyncio/test_protocols.py b/Lib/test/test_asyncio/test_protocols.py index 4484a031988533..29d3bd22705c6a 100644 --- a/Lib/test/test_asyncio/test_protocols.py +++ b/Lib/test/test_asyncio/test_protocols.py @@ -7,7 +7,7 @@ def tearDownModule(): # not needed for the test file but added for uniformness with all other # asyncio test files for the sake of unified cleanup - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class ProtocolsAbsTests(unittest.TestCase): diff --git a/Lib/test/test_asyncio/test_queues.py b/Lib/test/test_asyncio/test_queues.py index 090b9774c2289f..54bbe79f81ff69 100644 --- a/Lib/test/test_asyncio/test_queues.py +++ b/Lib/test/test_asyncio/test_queues.py @@ -6,7 +6,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class QueueBasicTests(unittest.IsolatedAsyncioTestCase): diff --git a/Lib/test/test_asyncio/test_runners.py b/Lib/test/test_asyncio/test_runners.py index 21f277bc2d8d5f..8a4d7f5c796661 100644 --- a/Lib/test/test_asyncio/test_runners.py +++ b/Lib/test/test_asyncio/test_runners.py @@ -12,14 +12,14 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) def interrupt_self(): _thread.interrupt_main() -class TestPolicy(asyncio._AbstractEventLoopPolicy): +class TestPolicy(asyncio.events._AbstractEventLoopPolicy): def __init__(self, loop_factory): self.loop_factory = loop_factory @@ -61,15 +61,15 @@ def setUp(self): super().setUp() policy = TestPolicy(self.new_loop) - asyncio._set_event_loop_policy(policy) + asyncio.events._set_event_loop_policy(policy) def tearDown(self): - policy = asyncio._get_event_loop_policy() + policy = asyncio.events._get_event_loop_policy() if policy.loop is not None: self.assertTrue(policy.loop.is_closed()) self.assertTrue(policy.loop.shutdown_ag_run) - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) super().tearDown() @@ -208,7 +208,7 @@ async def main(): await asyncio.sleep(0) return 42 - policy = asyncio._get_event_loop_policy() + policy = asyncio.events._get_event_loop_policy() policy.set_event_loop = mock.Mock() asyncio.run(main()) self.assertTrue(policy.set_event_loop.called) @@ -259,7 +259,7 @@ def new_event_loop(): loop.set_task_factory(Task) return loop - asyncio._set_event_loop_policy(TestPolicy(new_event_loop)) + asyncio.events._set_event_loop_policy(TestPolicy(new_event_loop)) with self.assertRaises(asyncio.CancelledError): asyncio.run(main()) @@ -495,7 +495,7 @@ def test_set_event_loop_called_once(self): async def coro(): pass - policy = asyncio._get_event_loop_policy() + policy = asyncio.events._get_event_loop_policy() policy.set_event_loop = mock.Mock() runner = asyncio.Runner() runner.run(coro()) diff --git a/Lib/test/test_asyncio/test_selector_events.py b/Lib/test/test_asyncio/test_selector_events.py index aab6a779170eb9..7b6d1bce5e460f 100644 --- a/Lib/test/test_asyncio/test_selector_events.py +++ b/Lib/test/test_asyncio/test_selector_events.py @@ -24,7 +24,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class TestBaseSelectorEventLoop(BaseSelectorEventLoop): diff --git a/Lib/test/test_asyncio/test_sendfile.py b/Lib/test/test_asyncio/test_sendfile.py index e1b766d06cbe1e..dcd963b3355ef8 100644 --- a/Lib/test/test_asyncio/test_sendfile.py +++ b/Lib/test/test_asyncio/test_sendfile.py @@ -22,7 +22,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class MySendfileProto(asyncio.Protocol): diff --git a/Lib/test/test_asyncio/test_server.py b/Lib/test/test_asyncio/test_server.py index 32211f4cba32cb..5bd0f7e2af4f84 100644 --- a/Lib/test/test_asyncio/test_server.py +++ b/Lib/test/test_asyncio/test_server.py @@ -11,7 +11,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class BaseStartServer(func_tests.FunctionalTestCaseMixin): diff --git a/Lib/test/test_asyncio/test_sock_lowlevel.py b/Lib/test/test_asyncio/test_sock_lowlevel.py index 4f7b9a1dda6b78..df4ec7948975f6 100644 --- a/Lib/test/test_asyncio/test_sock_lowlevel.py +++ b/Lib/test/test_asyncio/test_sock_lowlevel.py @@ -15,7 +15,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class MyProto(asyncio.Protocol): diff --git a/Lib/test/test_asyncio/test_ssl.py b/Lib/test/test_asyncio/test_ssl.py index 3a7185cd8974d0..06118f3a61587b 100644 --- a/Lib/test/test_asyncio/test_ssl.py +++ b/Lib/test/test_asyncio/test_ssl.py @@ -30,7 +30,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class MyBaseProto(asyncio.Protocol): diff --git a/Lib/test/test_asyncio/test_sslproto.py b/Lib/test/test_asyncio/test_sslproto.py index aa248c5786f634..3e304c166425b0 100644 --- a/Lib/test/test_asyncio/test_sslproto.py +++ b/Lib/test/test_asyncio/test_sslproto.py @@ -21,7 +21,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) @unittest.skipIf(ssl is None, 'No ssl module') diff --git a/Lib/test/test_asyncio/test_staggered.py b/Lib/test/test_asyncio/test_staggered.py index ad34aa6da01f54..32e4817b70d717 100644 --- a/Lib/test/test_asyncio/test_staggered.py +++ b/Lib/test/test_asyncio/test_staggered.py @@ -8,7 +8,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class StaggeredTests(unittest.IsolatedAsyncioTestCase): diff --git a/Lib/test/test_asyncio/test_streams.py b/Lib/test/test_asyncio/test_streams.py index 4fa4384346f9e9..f93ee54abc6469 100644 --- a/Lib/test/test_asyncio/test_streams.py +++ b/Lib/test/test_asyncio/test_streams.py @@ -18,7 +18,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class StreamTests(test_utils.TestCase): diff --git a/Lib/test/test_asyncio/test_subprocess.py b/Lib/test/test_asyncio/test_subprocess.py index 341e3e979e002b..3a17c169c34f12 100644 --- a/Lib/test/test_asyncio/test_subprocess.py +++ b/Lib/test/test_asyncio/test_subprocess.py @@ -37,7 +37,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class TestSubprocessTransport(base_subprocess.BaseSubprocessTransport): diff --git a/Lib/test/test_asyncio/test_taskgroups.py b/Lib/test/test_asyncio/test_taskgroups.py index 0d69a436fdb840..91f6b03b4597a5 100644 --- a/Lib/test/test_asyncio/test_taskgroups.py +++ b/Lib/test/test_asyncio/test_taskgroups.py @@ -15,7 +15,7 @@ # To prevent a warning "test altered the execution environment" def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class MyExc(Exception): diff --git a/Lib/test/test_asyncio/test_tasks.py b/Lib/test/test_asyncio/test_tasks.py index f6f976f213ac02..931a43816a257a 100644 --- a/Lib/test/test_asyncio/test_tasks.py +++ b/Lib/test/test_asyncio/test_tasks.py @@ -24,7 +24,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) async def coroutine_function(): diff --git a/Lib/test/test_asyncio/test_threads.py b/Lib/test/test_asyncio/test_threads.py index c98c9a9b395ff9..8ad5f9b2c9e750 100644 --- a/Lib/test/test_asyncio/test_threads.py +++ b/Lib/test/test_asyncio/test_threads.py @@ -8,7 +8,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class ToThreadTests(unittest.IsolatedAsyncioTestCase): diff --git a/Lib/test/test_asyncio/test_timeouts.py b/Lib/test/test_asyncio/test_timeouts.py index 3ba84d63b2ca5f..f60722c48b7617 100644 --- a/Lib/test/test_asyncio/test_timeouts.py +++ b/Lib/test/test_asyncio/test_timeouts.py @@ -9,7 +9,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class TimeoutTests(unittest.IsolatedAsyncioTestCase): diff --git a/Lib/test/test_asyncio/test_transports.py b/Lib/test/test_asyncio/test_transports.py index af10d3dc2a80df..dbb572e2e1536b 100644 --- a/Lib/test/test_asyncio/test_transports.py +++ b/Lib/test/test_asyncio/test_transports.py @@ -10,7 +10,7 @@ def tearDownModule(): # not needed for the test file but added for uniformness with all other # asyncio test files for the sake of unified cleanup - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class TransportTests(unittest.TestCase): diff --git a/Lib/test/test_asyncio/test_unix_events.py b/Lib/test/test_asyncio/test_unix_events.py index e020c1f3e4f677..22982dc9d8aefe 100644 --- a/Lib/test/test_asyncio/test_unix_events.py +++ b/Lib/test/test_asyncio/test_unix_events.py @@ -30,7 +30,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) MOCK_ANY = mock.ANY diff --git a/Lib/test/test_asyncio/test_waitfor.py b/Lib/test/test_asyncio/test_waitfor.py index d083f6b4d2a535..dedc6bf69d770e 100644 --- a/Lib/test/test_asyncio/test_waitfor.py +++ b/Lib/test/test_asyncio/test_waitfor.py @@ -5,7 +5,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) # The following value can be used as a very small timeout: diff --git a/Lib/test/test_asyncio/test_windows_events.py b/Lib/test/test_asyncio/test_windows_events.py index 69e9905205eee0..0af3368627afca 100644 --- a/Lib/test/test_asyncio/test_windows_events.py +++ b/Lib/test/test_asyncio/test_windows_events.py @@ -19,7 +19,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class UpperProto(asyncio.Protocol): @@ -330,16 +330,16 @@ def test_selector_win_policy(self): async def main(): self.assertIsInstance(asyncio.get_running_loop(), asyncio.SelectorEventLoop) - old_policy = asyncio._get_event_loop_policy() + old_policy = asyncio.events._get_event_loop_policy() try: with self.assertWarnsRegex( DeprecationWarning, "'asyncio.WindowsSelectorEventLoopPolicy' is deprecated", ): - asyncio._set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + asyncio.events._set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) asyncio.run(main()) finally: - asyncio._set_event_loop_policy(old_policy) + asyncio.events._set_event_loop_policy(old_policy) def test_proactor_win_policy(self): async def main(): @@ -347,16 +347,16 @@ async def main(): asyncio.get_running_loop(), asyncio.ProactorEventLoop) - old_policy = asyncio._get_event_loop_policy() + old_policy = asyncio.events._get_event_loop_policy() try: with self.assertWarnsRegex( DeprecationWarning, "'asyncio.WindowsProactorEventLoopPolicy' is deprecated", ): - asyncio._set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) + asyncio.events._set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) asyncio.run(main()) finally: - asyncio._set_event_loop_policy(old_policy) + asyncio.events._set_event_loop_policy(old_policy) if __name__ == '__main__': diff --git a/Lib/test/test_asyncio/test_windows_utils.py b/Lib/test/test_asyncio/test_windows_utils.py index a6b207567c4f00..97f078ff911b5a 100644 --- a/Lib/test/test_asyncio/test_windows_utils.py +++ b/Lib/test/test_asyncio/test_windows_utils.py @@ -16,7 +16,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class PipeTests(unittest.TestCase): diff --git a/Lib/test/test_asyncio/utils.py b/Lib/test/test_asyncio/utils.py index 0a96573a81c173..a480e16e81bb91 100644 --- a/Lib/test/test_asyncio/utils.py +++ b/Lib/test/test_asyncio/utils.py @@ -601,3 +601,9 @@ def func(): await asyncio.sleep(0) if exc is not None: raise exc + + +if sys.platform == 'win32': + DefaultEventLoopPolicy = asyncio.windows_events._DefaultEventLoopPolicy +else: + DefaultEventLoopPolicy = asyncio.unix_events._DefaultEventLoopPolicy diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py index b10bbebd0984c4..d5c032d01cdf5d 100644 --- a/Lib/test/test_concurrent_futures/test_interpreter_pool.py +++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py @@ -512,7 +512,7 @@ def setUpClass(cls): # tests left a policy in place, just in case. policy = support.maybe_get_event_loop_policy() assert policy is None, policy - cls.addClassCleanup(lambda: asyncio._set_event_loop_policy(None)) + cls.addClassCleanup(lambda: asyncio.events._set_event_loop_policy(None)) def setUp(self): super().setUp() diff --git a/Lib/test/test_coroutines.py b/Lib/test/test_coroutines.py index 4755046fe1904d..42b13064befaa9 100644 --- a/Lib/test/test_coroutines.py +++ b/Lib/test/test_coroutines.py @@ -2307,7 +2307,7 @@ async def f(): pass finally: loop.close() - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) self.assertEqual(buffer, [1, 2, 'MyException']) diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index 79eb103224b956..0ea029b977b3fc 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -2820,7 +2820,7 @@ async def asyncTearDown(self): @classmethod def tearDownClass(cls): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) def _asyncgenstate(self): return inspect.getasyncgenstate(self.asyncgen) diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 3819965ed2cbf9..275f7ce47d09b5 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -5421,7 +5421,7 @@ def test_taskName_with_asyncio_imported(self): logging.logAsyncioTasks = False runner.run(make_record(self.assertIsNone)) finally: - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) @support.requires_working_socket() def test_taskName_without_asyncio_imported(self): @@ -5433,7 +5433,7 @@ def test_taskName_without_asyncio_imported(self): logging.logAsyncioTasks = False runner.run(make_record(self.assertIsNone)) finally: - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class BasicConfigTest(unittest.TestCase): @@ -5758,7 +5758,7 @@ async def log_record(): data = f.read().strip() self.assertRegex(data, r'Task-\d+ - hello world') finally: - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) if handler: handler.close() diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 5217037ae9d812..1e50dc43c35f5c 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -104,7 +104,7 @@ def create_file(filename, content=b'content'): def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class MiscTests(unittest.TestCase): diff --git a/Lib/test/test_unittest/test_async_case.py b/Lib/test/test_unittest/test_async_case.py index 993e6bf013cfbf..91d45283eb3b1b 100644 --- a/Lib/test/test_unittest/test_async_case.py +++ b/Lib/test/test_unittest/test_async_case.py @@ -12,7 +12,7 @@ class MyException(Exception): def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class TestCM: @@ -480,7 +480,7 @@ def test_setup_get_event_loop(self): class TestCase1(unittest.IsolatedAsyncioTestCase): def setUp(self): - asyncio._get_event_loop_policy().get_event_loop() + asyncio.events._get_event_loop_policy().get_event_loop() async def test_demo1(self): pass @@ -490,7 +490,7 @@ async def test_demo1(self): self.assertTrue(result.wasSuccessful()) def test_loop_factory(self): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class TestCase1(unittest.IsolatedAsyncioTestCase): loop_factory = asyncio.EventLoop diff --git a/Lib/test/test_unittest/testmock/testasync.py b/Lib/test/test_unittest/testmock/testasync.py index 0791675b5401ca..dc36ceeb6502e8 100644 --- a/Lib/test/test_unittest/testmock/testasync.py +++ b/Lib/test/test_unittest/testmock/testasync.py @@ -15,7 +15,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class AsyncClass: diff --git a/Misc/NEWS.d/next/Library/2025-05-25-11-02-05.gh-issue-134657.3YFhR9.rst b/Misc/NEWS.d/next/Library/2025-05-25-11-02-05.gh-issue-134657.3YFhR9.rst new file mode 100644 index 00000000000000..1bf8ee504ef180 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-25-11-02-05.gh-issue-134657.3YFhR9.rst @@ -0,0 +1 @@ +:mod:`asyncio`: Remove some private names from ``asyncio.__all__``. From 77fa7a4dcc771bf4d297ebfd4f357483d0750a1c Mon Sep 17 00:00:00 2001 From: Justin Su Date: Wed, 9 Jul 2025 04:57:20 -0400 Subject: [PATCH 825/989] gh-136447: Use `self.loop` instead of global `loop` variable in asyncio REPL (#136448) --- Lib/asyncio/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/asyncio/__main__.py b/Lib/asyncio/__main__.py index 21ca5c5f62ae83..ff3a69d1e17297 100644 --- a/Lib/asyncio/__main__.py +++ b/Lib/asyncio/__main__.py @@ -64,7 +64,7 @@ def callback(): except BaseException as exc: future.set_exception(exc) - loop.call_soon_threadsafe(callback, context=self.context) + self.loop.call_soon_threadsafe(callback, context=self.context) try: return future.result() From 3c43df4dbdb0a41b8e5503b7fa4741a29b6007e0 Mon Sep 17 00:00:00 2001 From: Geoffrey Thomas Date: Wed, 9 Jul 2025 09:52:39 -0400 Subject: [PATCH 826/989] Docs: unittest.enterModuleContext is not a classmethod (#136464) --- Doc/library/unittest.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index d526e835caa18c..ec96e8416120fa 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -2563,7 +2563,7 @@ To add cleanup code that must be run even in the case of an exception, use .. versionadded:: 3.8 -.. classmethod:: enterModuleContext(cm) +.. function:: enterModuleContext(cm) Enter the supplied :term:`context manager`. If successful, also add its :meth:`~object.__exit__` method as a cleanup function by From 6a6cd3c07c0300c8799878a48d555470be2a52f7 Mon Sep 17 00:00:00 2001 From: NekrodNIK <60639354+NekrodNIK@users.noreply.github.com> Date: Wed, 9 Jul 2025 17:06:42 +0300 Subject: [PATCH 827/989] gh-131825: Fix `sqlite3` timezone-naive adapter recipe (GH-136270) --- Doc/library/sqlite3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index 641e1f1de03a1d..a14af6d3d88df2 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -2288,7 +2288,7 @@ This section shows recipes for common adapters and converters. def adapt_datetime_iso(val): """Adapt datetime.datetime to timezone-naive ISO 8601 date.""" - return val.isoformat() + return val.replace(tzinfo=None).isoformat() def adapt_datetime_epoch(val): """Adapt datetime.datetime to Unix timestamp.""" From 591abcc01fcf1c65c7fdfaca7274f5d3f9f022da Mon Sep 17 00:00:00 2001 From: Oskar Roesler Date: Wed, 9 Jul 2025 17:54:58 +0200 Subject: [PATCH 828/989] gh-81520: Document unexpected `os.path.ismount` behaviour with btrfs subvolumes (GH-136058) --- Doc/library/os.path.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index f72aee19d8f332..1c1cf07a655ae7 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -298,9 +298,10 @@ the :mod:`glob` module.) device than *path*, or whether :file:`{path}/..` and *path* point to the same i-node on the same device --- this should detect mount points for all Unix and POSIX variants. It is not able to reliably detect bind mounts on the - same filesystem. On Windows, a drive letter root and a share UNC are - always mount points, and for any other path ``GetVolumePathName`` is called - to see if it is different from the input path. + same filesystem. On Linux systems, it will always return ``True`` for btrfs + subvolumes, even if they aren't mount points. On Windows, a drive letter root + and a share UNC are always mount points, and for any other path + ``GetVolumePathName`` is called to see if it is different from the input path. .. versionchanged:: 3.4 Added support for detecting non-root mount points on Windows. From 798f791dafb9cc5109ac19b086e98cfa6b943cfd Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 9 Jul 2025 10:23:46 -0700 Subject: [PATCH 829/989] Minor edit: Move comments closer to the code they describe (gh-136477) --- Lib/random.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index 86d562f0b8aaf6..c89cbb755abac8 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -844,8 +844,8 @@ def binomialvariate(self, n=1, p=0.5): # BTRS: Transformed rejection with squeeze method by Wolfgang Hörmann # https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.8407&rep=rep1&type=pdf assert n*p >= 10.0 and p <= 0.5 - setup_complete = False + setup_complete = False spq = _sqrt(n * p * (1.0 - p)) # Standard deviation of the distribution b = 1.15 + 2.53 * spq a = -0.0873 + 0.0248 * b + 0.01 * p @@ -860,22 +860,23 @@ def binomialvariate(self, n=1, p=0.5): k = _floor((2.0 * a / us + b) * u + c) if k < 0 or k > n: continue + v = random() # The early-out "squeeze" test substantially reduces # the number of acceptance condition evaluations. - v = random() if us >= 0.07 and v <= vr: return k - # Acceptance-rejection test. - # Note, the original paper erroneously omits the call to log(v) - # when comparing to the log of the rescaled binomial distribution. if not setup_complete: alpha = (2.83 + 5.1 / b) * spq lpq = _log(p / (1.0 - p)) m = _floor((n + 1) * p) # Mode of the distribution h = _lgamma(m + 1) + _lgamma(n - m + 1) setup_complete = True # Only needs to be done once + + # Acceptance-rejection test. + # Note, the original paper erroneously omits the call to log(v) + # when comparing to the log of the rescaled binomial distribution. v *= alpha / (a / (us * us) + b) if _log(v) <= h - _lgamma(k + 1) - _lgamma(n - k + 1) + (k - m) * lpq: return k From c49dc3bd0f0ac57a038e9ba63e83cfa424e91b2a Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 9 Jul 2025 12:11:28 -0700 Subject: [PATCH 830/989] GH-115802: Optimize JIT stencils for size (GH-136393) --- Tools/jit/_targets.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index ed10329d25d2f9..728f48128ce79c 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -137,7 +137,15 @@ async def _compile( f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}", f"-I{CPYTHON / 'Python'}", f"-I{CPYTHON / 'Tools' / 'jit'}", - "-O3", + # -O2 and -O3 include some optimizations that make sense for + # standalone functions, but not for snippets of code that are going + # to be laid out end-to-end (like ours)... common examples include + # passes like tail-duplication, or aligning jump targets with nops. + # -Os is equivalent to -O2 with many of these problematic passes + # disabled. Based on manual review, for *our* purposes it usually + # generates better code than -O2 (and -O2 usually generates better + # code than -O3). As a nice benefit, it uses less memory too: + "-Os", "-S", # Shorten full absolute file paths in the generated code (like the # __FILE__ macro and assert failure messages) for reproducibility: From e697f5e7c05c48d98188b4d144a71f6d3a6e724f Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Wed, 9 Jul 2025 21:18:06 +0100 Subject: [PATCH 831/989] Add Diego Russo as code owner of the JIT (#136460) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 08d7a80d7726d3..98efdb65146374 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -26,7 +26,7 @@ Modules/Setup* @erlend-aasland **/*context* @1st1 **/*genobject* @markshannon **/*hamt* @1st1 -**/*jit* @brandtbucher @savannahostrowski +**/*jit* @brandtbucher @savannahostrowski @diegorusso Objects/set* @rhettinger Objects/dict* @methane @markshannon Objects/typevarobject.c @JelleZijlstra From 92f392ad9e5e10ff98eac319e58ec79df5951ce0 Mon Sep 17 00:00:00 2001 From: Zachary Ware Date: Wed, 9 Jul 2025 16:33:45 -0500 Subject: [PATCH 832/989] gh-136145: Define 'standard library' and 'stdlib' in the glossary (GH-136146) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --------- Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Éric --- Doc/glossary.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index c5c7994f1262a9..705b0a9279c6d4 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -1280,6 +1280,16 @@ Glossary and ending with double underscores. Special methods are documented in :ref:`specialnames`. + standard library + The collection of :term:`packages `, :term:`modules ` + and :term:`extension modules ` distributed as a part + of the official Python interpreter package. The exact membership of the + collection may vary based on platform, available system libraries, or + other criteria. Documentation can be found at :ref:`library-index`. + + See also :data:`sys.stdlib_module_names` for a list of all possible + standard library module names. + statement A statement is part of a suite (a "block" of code). A statement is either an :term:`expression` or one of several constructs with a keyword, such @@ -1290,6 +1300,9 @@ Glossary issues such as incorrect types. See also :term:`type hints ` and the :mod:`typing` module. + stdlib + An abbreviation of :term:`standard library`. + strong reference In Python's C API, a strong reference is a reference to an object which is owned by the code holding the reference. The strong From 92b33c9590140263343c3e780bfc2ea1c1aded5c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Jul 2025 00:17:21 +0200 Subject: [PATCH 833/989] gh-136156: Skip test_tempfile.test_link_tmpfile() on Android (#136430) Adds a test skip on platforms where hard links are not available (which includes Android). --- Lib/test/test_tempfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index 36151b016ea35b..aeca62cf2564bc 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -1594,6 +1594,7 @@ def test_unexpected_error(self): mock_close.assert_called() self.assertEqual(os.listdir(dir), []) + @os_helper.skip_unless_hardlink @unittest.skipUnless(tempfile._O_TMPFILE_WORKS, 'need os.O_TMPFILE') @unittest.skipUnless(os.path.exists('/proc/self/fd'), 'need /proc/self/fd') From 9c4d28777526e9975b212d49fb0a530f773a3209 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Jul 2025 01:06:48 +0200 Subject: [PATCH 834/989] gh-102740: Clarify time.monotonic() "system-wide" in the doc (#136431) --- Doc/library/time.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Doc/library/time.rst b/Doc/library/time.rst index 29b695a9b193d6..df9be68bf4f69d 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -306,10 +306,11 @@ Functions .. versionadded:: 3.3 .. versionchanged:: 3.5 - The function is now always available and always system-wide. + The function is now always available and the clock is now the same for + all processes. .. versionchanged:: 3.10 - On macOS, the function is now system-wide. + On macOS, the clock is now the same for all processes. .. function:: monotonic_ns() -> int @@ -325,7 +326,8 @@ Functions Return the value (in fractional seconds) of a performance counter, i.e. a clock with the highest available resolution to measure a short duration. It - does include time elapsed during sleep and is system-wide. The reference + does include time elapsed during sleep. The clock is the same for all + processes. The reference point of the returned value is undefined, so that only the difference between the results of two calls is valid. @@ -340,7 +342,7 @@ Functions .. versionadded:: 3.3 .. versionchanged:: 3.10 - On Windows, the function is now system-wide. + On Windows, the clock is now the same for all processes. .. versionchanged:: 3.13 Use the same clock as :func:`time.monotonic`. @@ -987,8 +989,8 @@ The following constant is the only parameter that can be sent to .. data:: CLOCK_REALTIME - System-wide real-time clock. Setting this clock requires appropriate - privileges. + Real-time clock. Setting this clock requires appropriate privileges. + The clock is the same for all processes. .. availability:: Unix. From ea45a2f97cb1d4774a6f88e63c6ce0a487f83031 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Thu, 10 Jul 2025 00:11:17 +0100 Subject: [PATCH 835/989] gh-136476: Show the full stack in get_async_stack_trace in _remote_debugging (#136483) --- Lib/test/test_external_inspection.py | 430 +++++-- ...-07-09-20-29-30.gh-issue-136476.HyLLzh.rst | 2 + Modules/_remote_debugging_module.c | 1032 ++++++++--------- Modules/clinic/_remote_debugging_module.c.h | 47 +- 4 files changed, 868 insertions(+), 643 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-09-20-29-30.gh-issue-136476.HyLLzh.rst diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 44890ebfe5f76d..a709b837161f48 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -8,7 +8,12 @@ import time from asyncio import staggered, taskgroups, base_events, tasks from unittest.mock import ANY -from test.support import os_helper, SHORT_TIMEOUT, busy_retry, requires_gil_enabled +from test.support import ( + os_helper, + SHORT_TIMEOUT, + busy_retry, + requires_gil_enabled, +) from test.support.script_helper import make_script from test.support.socket_helper import find_unused_port @@ -236,55 +241,162 @@ def new_eager_loop(): p.terminate() p.wait(timeout=SHORT_TIMEOUT) - # sets are unordered, so we want to sort "awaited_by"s - stack_trace[2].sort(key=lambda x: x[1]) + # First check all the tasks are present + tasks_names = [ + task.task_name for task in stack_trace[0].awaited_by + ] + for task_name in ["c2_root", "sub_main_1", "sub_main_2"]: + self.assertIn(task_name, tasks_names) + + # Now ensure that the awaited_by_relationships are correct + id_to_task = { + task.task_id: task for task in stack_trace[0].awaited_by + } + task_name_to_awaited_by = { + task.task_name: set( + id_to_task[awaited.task_name].task_name + for awaited in task.awaited_by + ) + for task in stack_trace[0].awaited_by + } + self.assertEqual( + task_name_to_awaited_by, + { + "c2_root": {"Task-1", "sub_main_1", "sub_main_2"}, + "Task-1": set(), + "sub_main_1": {"Task-1"}, + "sub_main_2": {"Task-1"}, + }, + ) - expected_stack_trace = [ - [ - FrameInfo([script_name, 10, "c5"]), - FrameInfo([script_name, 14, "c4"]), - FrameInfo([script_name, 17, "c3"]), - FrameInfo([script_name, 20, "c2"]), - ], - "c2_root", - [ - CoroInfo( - [ - [ - FrameInfo( + # Now ensure that the coroutine stacks are correct + coroutine_stacks = { + task.task_name: sorted( + tuple(tuple(frame) for frame in coro.call_stack) + for coro in task.coroutine_stack + ) + for task in stack_trace[0].awaited_by + } + self.assertEqual( + coroutine_stacks, + { + "Task-1": [ + ( + tuple( + [ + taskgroups.__file__, + ANY, + "TaskGroup._aexit", + ] + ), + tuple( + [ + taskgroups.__file__, + ANY, + "TaskGroup.__aexit__", + ] + ), + tuple([script_name, 26, "main"]), + ) + ], + "c2_root": [ + ( + tuple([script_name, 10, "c5"]), + tuple([script_name, 14, "c4"]), + tuple([script_name, 17, "c3"]), + tuple([script_name, 20, "c2"]), + ) + ], + "sub_main_1": [(tuple([script_name, 23, "c1"]),)], + "sub_main_2": [(tuple([script_name, 23, "c1"]),)], + }, + ) + + # Now ensure the coroutine stacks for the awaited_by relationships are correct. + awaited_by_coroutine_stacks = { + task.task_name: sorted( + ( + id_to_task[coro.task_name].task_name, + tuple(tuple(frame) for frame in coro.call_stack), + ) + for coro in task.awaited_by + ) + for task in stack_trace[0].awaited_by + } + self.assertEqual( + awaited_by_coroutine_stacks, + { + "Task-1": [], + "c2_root": [ + ( + "Task-1", + ( + tuple( [ taskgroups.__file__, ANY, "TaskGroup._aexit", ] ), - FrameInfo( + tuple( [ taskgroups.__file__, ANY, "TaskGroup.__aexit__", ] ), - FrameInfo([script_name, 26, "main"]), - ], + tuple([script_name, 26, "main"]), + ), + ), + ("sub_main_1", (tuple([script_name, 23, "c1"]),)), + ("sub_main_2", (tuple([script_name, 23, "c1"]),)), + ], + "sub_main_1": [ + ( "Task-1", - ] - ), - CoroInfo( - [ - [FrameInfo([script_name, 23, "c1"])], - "sub_main_1", - ] - ), - CoroInfo( - [ - [FrameInfo([script_name, 23, "c1"])], - "sub_main_2", - ] - ), - ], - ] - self.assertEqual(stack_trace, expected_stack_trace) + ( + tuple( + [ + taskgroups.__file__, + ANY, + "TaskGroup._aexit", + ] + ), + tuple( + [ + taskgroups.__file__, + ANY, + "TaskGroup.__aexit__", + ] + ), + tuple([script_name, 26, "main"]), + ), + ) + ], + "sub_main_2": [ + ( + "Task-1", + ( + tuple( + [ + taskgroups.__file__, + ANY, + "TaskGroup._aexit", + ] + ), + tuple( + [ + taskgroups.__file__, + ANY, + "TaskGroup.__aexit__", + ] + ), + tuple([script_name, 26, "main"]), + ), + ) + ], + }, + ) @skip_if_not_supported @unittest.skipIf( @@ -350,19 +462,29 @@ async def main(): p.terminate() p.wait(timeout=SHORT_TIMEOUT) - # sets are unordered, so we want to sort "awaited_by"s - stack_trace[2].sort(key=lambda x: x[1]) - - expected_stack_trace = [ + # For this simple asyncgen test, we only expect one task with the full coroutine stack + self.assertEqual(len(stack_trace[0].awaited_by), 1) + task = stack_trace[0].awaited_by[0] + self.assertEqual(task.task_name, "Task-1") + + # Check the coroutine stack - based on actual output, only shows main + coroutine_stack = sorted( + tuple(tuple(frame) for frame in coro.call_stack) + for coro in task.coroutine_stack + ) + self.assertEqual( + coroutine_stack, [ - FrameInfo([script_name, 10, "gen_nested_call"]), - FrameInfo([script_name, 16, "gen"]), - FrameInfo([script_name, 19, "main"]), + ( + tuple([script_name, 10, "gen_nested_call"]), + tuple([script_name, 16, "gen"]), + tuple([script_name, 19, "main"]), + ) ], - "Task-1", - [], - ] - self.assertEqual(stack_trace, expected_stack_trace) + ) + + # No awaited_by relationships expected for this simple case + self.assertEqual(task.awaited_by, []) @skip_if_not_supported @unittest.skipIf( @@ -429,18 +551,73 @@ async def main(): p.terminate() p.wait(timeout=SHORT_TIMEOUT) - # sets are unordered, so we want to sort "awaited_by"s - stack_trace[2].sort(key=lambda x: x[1]) - - expected_stack_trace = [ - [ - FrameInfo([script_name, 11, "deep"]), - FrameInfo([script_name, 15, "c1"]), - ], - "Task-2", - [CoroInfo([[FrameInfo([script_name, 21, "main"])], "Task-1"])], + # First check all the tasks are present + tasks_names = [ + task.task_name for task in stack_trace[0].awaited_by ] - self.assertEqual(stack_trace, expected_stack_trace) + for task_name in ["Task-1", "Task-2"]: + self.assertIn(task_name, tasks_names) + + # Now ensure that the awaited_by_relationships are correct + id_to_task = { + task.task_id: task for task in stack_trace[0].awaited_by + } + task_name_to_awaited_by = { + task.task_name: set( + id_to_task[awaited.task_name].task_name + for awaited in task.awaited_by + ) + for task in stack_trace[0].awaited_by + } + self.assertEqual( + task_name_to_awaited_by, + { + "Task-1": set(), + "Task-2": {"Task-1"}, + }, + ) + + # Now ensure that the coroutine stacks are correct + coroutine_stacks = { + task.task_name: sorted( + tuple(tuple(frame) for frame in coro.call_stack) + for coro in task.coroutine_stack + ) + for task in stack_trace[0].awaited_by + } + self.assertEqual( + coroutine_stacks, + { + "Task-1": [(tuple([script_name, 21, "main"]),)], + "Task-2": [ + ( + tuple([script_name, 11, "deep"]), + tuple([script_name, 15, "c1"]), + ) + ], + }, + ) + + # Now ensure the coroutine stacks for the awaited_by relationships are correct. + awaited_by_coroutine_stacks = { + task.task_name: sorted( + ( + id_to_task[coro.task_name].task_name, + tuple(tuple(frame) for frame in coro.call_stack), + ) + for coro in task.awaited_by + ) + for task in stack_trace[0].awaited_by + } + self.assertEqual( + awaited_by_coroutine_stacks, + { + "Task-1": [], + "Task-2": [ + ("Task-1", (tuple([script_name, 21, "main"]),)) + ], + }, + ) @skip_if_not_supported @unittest.skipIf( @@ -510,36 +687,93 @@ async def main(): p.terminate() p.wait(timeout=SHORT_TIMEOUT) - # sets are unordered, so we want to sort "awaited_by"s - stack_trace[2].sort(key=lambda x: x[1]) - expected_stack_trace = [ - [ - FrameInfo([script_name, 11, "deep"]), - FrameInfo([script_name, 15, "c1"]), - FrameInfo( - [ - staggered.__file__, - ANY, - "staggered_race..run_one_coro", - ] - ), - ], - "Task-2", - [ - CoroInfo( - [ - [ - FrameInfo( + # First check all the tasks are present + tasks_names = [ + task.task_name for task in stack_trace[0].awaited_by + ] + for task_name in ["Task-1", "Task-2"]: + self.assertIn(task_name, tasks_names) + + # Now ensure that the awaited_by_relationships are correct + id_to_task = { + task.task_id: task for task in stack_trace[0].awaited_by + } + task_name_to_awaited_by = { + task.task_name: set( + id_to_task[awaited.task_name].task_name + for awaited in task.awaited_by + ) + for task in stack_trace[0].awaited_by + } + self.assertEqual( + task_name_to_awaited_by, + { + "Task-1": set(), + "Task-2": {"Task-1"}, + }, + ) + + # Now ensure that the coroutine stacks are correct + coroutine_stacks = { + task.task_name: sorted( + tuple(tuple(frame) for frame in coro.call_stack) + for coro in task.coroutine_stack + ) + for task in stack_trace[0].awaited_by + } + self.assertEqual( + coroutine_stacks, + { + "Task-1": [ + ( + tuple([staggered.__file__, ANY, "staggered_race"]), + tuple([script_name, 21, "main"]), + ) + ], + "Task-2": [ + ( + tuple([script_name, 11, "deep"]), + tuple([script_name, 15, "c1"]), + tuple( + [ + staggered.__file__, + ANY, + "staggered_race..run_one_coro", + ] + ), + ) + ], + }, + ) + + # Now ensure the coroutine stacks for the awaited_by relationships are correct. + awaited_by_coroutine_stacks = { + task.task_name: sorted( + ( + id_to_task[coro.task_name].task_name, + tuple(tuple(frame) for frame in coro.call_stack), + ) + for coro in task.awaited_by + ) + for task in stack_trace[0].awaited_by + } + self.assertEqual( + awaited_by_coroutine_stacks, + { + "Task-1": [], + "Task-2": [ + ( + "Task-1", + ( + tuple( [staggered.__file__, ANY, "staggered_race"] ), - FrameInfo([script_name, 21, "main"]), - ], - "Task-1", - ] - ) - ], - ] - self.assertEqual(stack_trace, expected_stack_trace) + tuple([script_name, 21, "main"]), + ), + ) + ], + }, + ) @skip_if_not_supported @unittest.skipIf( @@ -973,7 +1207,10 @@ def main_work(): if not stack: continue current_frame = stack[0] - if current_frame.funcname == "main_work" and current_frame.lineno >15: + if ( + current_frame.funcname == "main_work" + and current_frame.lineno > 15 + ): found = True if found: @@ -981,7 +1218,9 @@ def main_work(): # Give a bit of time to take the next sample time.sleep(0.1) else: - self.fail("Main thread did not start its busy work on time") + self.fail( + "Main thread did not start its busy work on time" + ) # Get stack trace with only GIL holder unwinder_gil = RemoteUnwinder(p.pid, only_active_thread=True) @@ -999,16 +1238,23 @@ def main_work(): p.wait(timeout=SHORT_TIMEOUT) # Verify we got multiple threads in all_traces - self.assertGreater(len(all_traces), 1, "Should have multiple threads") + self.assertGreater( + len(all_traces), 1, "Should have multiple threads" + ) # Verify we got exactly one thread in gil_traces - self.assertEqual(len(gil_traces), 1, "Should have exactly one GIL holder") + self.assertEqual( + len(gil_traces), 1, "Should have exactly one GIL holder" + ) # The GIL holder should be in the all_traces list gil_thread_id = gil_traces[0][0] all_thread_ids = [trace[0] for trace in all_traces] - self.assertIn(gil_thread_id, all_thread_ids, - "GIL holder should be among all threads") + self.assertIn( + gil_thread_id, + all_thread_ids, + "GIL holder should be among all threads", + ) if __name__ == "__main__": diff --git a/Misc/NEWS.d/next/Library/2025-07-09-20-29-30.gh-issue-136476.HyLLzh.rst b/Misc/NEWS.d/next/Library/2025-07-09-20-29-30.gh-issue-136476.HyLLzh.rst new file mode 100644 index 00000000000000..7634bd3be9346d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-09-20-29-30.gh-issue-136476.HyLLzh.rst @@ -0,0 +1,2 @@ +Fix a bug that was causing the ``get_async_stack_trace`` function to miss +some frames in the stack trace. diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index b2d4d1ae650814..178c6d11357e5e 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -257,20 +257,25 @@ is_frame_valid( uintptr_t code_object_addr ); -static int -parse_tasks_in_set( +typedef int (*thread_processor_func)( RemoteUnwinderObject *unwinder, - uintptr_t set_addr, - PyObject *awaited_by, - int recurse_task + uintptr_t thread_state_addr, + unsigned long tid, + void *context +); + +typedef int (*set_entry_processor_func)( + RemoteUnwinderObject *unwinder, + uintptr_t key_addr, + void *context ); + static int parse_task( RemoteUnwinderObject *unwinder, uintptr_t task_address, - PyObject *render_to, - int recurse_task + PyObject *render_to ); static int @@ -285,9 +290,27 @@ static int parse_frame_object( RemoteUnwinderObject *unwinder, PyObject** result, uintptr_t address, + uintptr_t* address_of_code_object, uintptr_t* previous_frame ); +static int +parse_async_frame_chain( + RemoteUnwinderObject *unwinder, + PyObject *calls, + uintptr_t address_of_thread, + uintptr_t running_task_code_obj +); + +static int read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr); +static int read_Py_ssize_t(RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t *size); + +static int process_task_and_waiters(RemoteUnwinderObject *unwinder, uintptr_t task_addr, PyObject *result); +static int process_task_awaited_by(RemoteUnwinderObject *unwinder, uintptr_t task_address, set_entry_processor_func processor, void *context); +static int find_running_task_in_thread(RemoteUnwinderObject *unwinder, uintptr_t thread_state_addr, uintptr_t *running_task_addr); +static int get_task_code_object(RemoteUnwinderObject *unwinder, uintptr_t task_addr, uintptr_t *code_obj_addr); +static int append_awaited_by(RemoteUnwinderObject *unwinder, unsigned long tid, uintptr_t head_addr, PyObject *result); + /* ============================================================================ * UTILITY FUNCTIONS AND HELPERS * ============================================================================ */ @@ -405,32 +428,149 @@ validate_debug_offsets(struct _Py_DebugOffsets *debug_offsets) return 0; } -/* ============================================================================ - * MEMORY READING FUNCTIONS - * ============================================================================ */ +// Generic function to iterate through all threads +static int +iterate_threads( + RemoteUnwinderObject *unwinder, + thread_processor_func processor, + void *context +) { + uintptr_t thread_state_addr; + unsigned long tid = 0; -static inline int -read_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr) -{ - int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(void*), ptr_addr); - if (result < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read pointer from remote memory"); + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + unwinder->interpreter_addr + unwinder->debug_offsets.interpreter_state.threads_main, + sizeof(void*), + &thread_state_addr)) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state"); return -1; } + + while (thread_state_addr != 0) { + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + thread_state_addr + unwinder->debug_offsets.thread_state.native_thread_id, + sizeof(tid), + &tid)) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread ID"); + return -1; + } + + // Call the processor function for this thread + if (processor(unwinder, thread_state_addr, tid, context) < 0) { + return -1; + } + + // Move to next thread + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + thread_state_addr + unwinder->debug_offsets.thread_state.next, + sizeof(void*), + &thread_state_addr)) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next thread state"); + return -1; + } + } + return 0; } -static inline int -read_Py_ssize_t(RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t *size) -{ - int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(Py_ssize_t), size); - if (result < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read Py_ssize_t from remote memory"); +// Generic function to iterate through set entries +static int +iterate_set_entries( + RemoteUnwinderObject *unwinder, + uintptr_t set_addr, + set_entry_processor_func processor, + void *context +) { + char set_object[SIZEOF_SET_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, set_addr, + SIZEOF_SET_OBJ, set_object) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set object"); return -1; } + + Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used); + Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1; + uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table); + + Py_ssize_t i = 0; + Py_ssize_t els = 0; + while (i < set_len && els < num_els) { + uintptr_t key_addr; + if (read_py_ptr(unwinder, table_ptr, &key_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry key"); + return -1; + } + + if ((void*)key_addr != NULL) { + Py_ssize_t ref_cnt; + if (read_Py_ssize_t(unwinder, table_ptr, &ref_cnt) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry ref count"); + return -1; + } + + if (ref_cnt) { + // Process this valid set entry + if (processor(unwinder, key_addr, context) < 0) { + return -1; + } + els++; + } + } + table_ptr += sizeof(void*) * 2; + i++; + } + return 0; } +// Processor function for task waiters +static int +process_waiter_task( + RemoteUnwinderObject *unwinder, + uintptr_t key_addr, + void *context +) { + PyObject *result = (PyObject *)context; + return process_task_and_waiters(unwinder, key_addr, result); +} + +// Processor function for parsing tasks in sets +static int +process_task_parser( + RemoteUnwinderObject *unwinder, + uintptr_t key_addr, + void *context +) { + PyObject *awaited_by = (PyObject *)context; + return parse_task(unwinder, key_addr, awaited_by); +} + +/* ============================================================================ + * MEMORY READING FUNCTIONS + * ============================================================================ */ + +#define DEFINE_MEMORY_READER(type_name, c_type, error_msg) \ +static inline int \ +read_##type_name(RemoteUnwinderObject *unwinder, uintptr_t address, c_type *result) \ +{ \ + int res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(c_type), result); \ + if (res < 0) { \ + set_exception_cause(unwinder, PyExc_RuntimeError, error_msg); \ + return -1; \ + } \ + return 0; \ +} + +DEFINE_MEMORY_READER(ptr, uintptr_t, "Failed to read pointer from remote memory") +DEFINE_MEMORY_READER(Py_ssize_t, Py_ssize_t, "Failed to read Py_ssize_t from remote memory") +DEFINE_MEMORY_READER(char, char, "Failed to read char from remote memory") + static int read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr) { @@ -442,17 +582,6 @@ read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_ad return 0; } -static int -read_char(RemoteUnwinderObject *unwinder, uintptr_t address, char *result) -{ - int res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(char), result); - if (res < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read char from remote memory"); - return -1; - } - return 0; -} - /* ============================================================================ * PYTHON OBJECT READING FUNCTIONS * ============================================================================ */ @@ -799,39 +928,9 @@ parse_task_name( static int parse_task_awaited_by( RemoteUnwinderObject *unwinder, uintptr_t task_address, - PyObject *awaited_by, - int recurse_task + PyObject *awaited_by ) { - // Read the entire TaskObj at once - char task_obj[SIZEOF_TASK_OBJ]; - if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, - unwinder->async_debug_offsets.asyncio_task_object.size, - task_obj) < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object in awaited_by parsing"); - return -1; - } - - uintptr_t task_ab_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by); - - if ((void*)task_ab_addr == NULL) { - return 0; - } - - char awaited_by_is_a_set = GET_MEMBER(char, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by_is_set); - - if (awaited_by_is_a_set) { - if (parse_tasks_in_set(unwinder, task_ab_addr, awaited_by, recurse_task)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse tasks in awaited_by set"); - return -1; - } - } else { - if (parse_task(unwinder, task_ab_addr, awaited_by, recurse_task)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse single awaited_by task"); - return -1; - } - } - - return 0; + return process_task_awaited_by(unwinder, task_address, process_task_parser, awaited_by); } static int @@ -940,12 +1039,13 @@ parse_coro_chain( // Parse the previous frame using the gi_iframe from local copy uintptr_t prev_frame; uintptr_t gi_iframe_addr = coro_address + unwinder->debug_offsets.gen_object.gi_iframe; - if (parse_frame_object(unwinder, &name, gi_iframe_addr, &prev_frame) < 0) { + uintptr_t address_of_code_object = 0; + if (parse_frame_object(unwinder, &name, gi_iframe_addr, &address_of_code_object, &prev_frame) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in coro chain"); return -1; } - if (name == NULL) { + if (!name) { return 0; } @@ -966,8 +1066,7 @@ parse_coro_chain( static PyObject* create_task_result( RemoteUnwinderObject *unwinder, - uintptr_t task_address, - int recurse_task + uintptr_t task_address ) { PyObject* result = NULL; PyObject *call_stack = NULL; @@ -983,11 +1082,7 @@ create_task_result( } // Create task name/address for second tuple element - if (recurse_task) { - tn = parse_task_name(unwinder, task_address); - } else { - tn = PyLong_FromUnsignedLongLong(task_address); - } + tn = PyLong_FromUnsignedLongLong(task_address); if (tn == NULL) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name/address"); goto error; @@ -1040,8 +1135,7 @@ static int parse_task( RemoteUnwinderObject *unwinder, uintptr_t task_address, - PyObject *render_to, - int recurse_task + PyObject *render_to ) { char is_task; PyObject* result = NULL; @@ -1057,7 +1151,7 @@ parse_task( } if (is_task) { - result = create_task_result(unwinder, task_address, recurse_task); + result = create_task_result(unwinder, task_address); if (!result) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task result"); goto error; @@ -1084,11 +1178,11 @@ parse_task( PyStructSequence_SetItem(result, 0, empty_list); // This steals the reference PyStructSequence_SetItem(result, 1, task_name); // This steals the reference } - if (PyList_Append(render_to, result)) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task result to render list"); goto error; } + Py_DECREF(result); return 0; @@ -1098,234 +1192,335 @@ parse_task( } static int -process_set_entry( +process_single_task_node( RemoteUnwinderObject *unwinder, - uintptr_t table_ptr, - PyObject *awaited_by, - int recurse_task + uintptr_t task_addr, + PyObject **task_info, + PyObject *result ) { - uintptr_t key_addr; - if (read_py_ptr(unwinder, table_ptr, &key_addr)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry key"); - return -1; + PyObject *tn = NULL; + PyObject *current_awaited_by = NULL; + PyObject *task_id = NULL; + PyObject *result_item = NULL; + PyObject *coroutine_stack = NULL; + + tn = parse_task_name(unwinder, task_addr); + if (tn == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task name in single task node"); + goto error; } - if ((void*)key_addr != NULL) { - Py_ssize_t ref_cnt; - if (read_Py_ssize_t(unwinder, table_ptr, &ref_cnt)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry reference count"); - return -1; - } + current_awaited_by = PyList_New(0); + if (current_awaited_by == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list in single task node"); + goto error; + } - if (ref_cnt) { - // if 'ref_cnt=0' it's a set dummy marker - if (parse_task(unwinder, key_addr, awaited_by, recurse_task)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task in set entry"); - return -1; - } - return 1; // Successfully processed a valid entry - } + // Extract the coroutine stack for this task + coroutine_stack = PyList_New(0); + if (coroutine_stack == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create coroutine stack list in single task node"); + goto error; } - return 0; // Entry was NULL or dummy marker -} -static int -parse_tasks_in_set( - RemoteUnwinderObject *unwinder, - uintptr_t set_addr, - PyObject *awaited_by, - int recurse_task -) { - char set_object[SIZEOF_SET_OBJ]; - int err = _Py_RemoteDebug_PagedReadRemoteMemory( - &unwinder->handle, - set_addr, - SIZEOF_SET_OBJ, - set_object); - if (err < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set object"); - return -1; + if (parse_task(unwinder, task_addr, coroutine_stack) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task coroutine stack in single task node"); + goto error; } - Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used); - Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1; // The set contains the `mask+1` element slots. - uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table); + task_id = PyLong_FromUnsignedLongLong(task_addr); + if (task_id == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task ID in single task node"); + goto error; + } - Py_ssize_t i = 0; - Py_ssize_t els = 0; - while (i < set_len && els < num_els) { - int result = process_set_entry(unwinder, table_ptr, awaited_by, recurse_task); + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result_item = PyStructSequence_New(state->TaskInfo_Type); + if (result_item == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create TaskInfo in single task node"); + goto error; + } - if (result < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process set entry"); - return -1; - } - if (result > 0) { - els++; - } + PyStructSequence_SetItem(result_item, 0, task_id); // steals ref + PyStructSequence_SetItem(result_item, 1, tn); // steals ref + PyStructSequence_SetItem(result_item, 2, coroutine_stack); // steals ref + PyStructSequence_SetItem(result_item, 3, current_awaited_by); // steals ref - table_ptr += sizeof(void*) * 2; - i++; + // References transferred to tuple + task_id = NULL; + tn = NULL; + coroutine_stack = NULL; + current_awaited_by = NULL; + + if (PyList_Append(result, result_item)) { + Py_DECREF(result_item); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append result item in single task node"); + return -1; + } + if (task_info != NULL) { + *task_info = result_item; + } + Py_DECREF(result_item); + + // Get back current_awaited_by reference for parse_task_awaited_by + current_awaited_by = PyStructSequence_GetItem(result_item, 3); + if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by in single task node"); + // No cleanup needed here since all references were transferred to result_item + // and result_item was already added to result list and decreffed + return -1; } + return 0; + +error: + Py_XDECREF(tn); + Py_XDECREF(current_awaited_by); + Py_XDECREF(task_id); + Py_XDECREF(result_item); + Py_XDECREF(coroutine_stack); + return -1; } +// Thread processor for get_all_awaited_by +static int +process_thread_for_awaited_by( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + unsigned long tid, + void *context +) { + PyObject *result = (PyObject *)context; + uintptr_t head_addr = thread_state_addr + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_tasks_head; + return append_awaited_by(unwinder, tid, head_addr, result); +} +// Generic function to process task awaited_by static int -setup_async_result_structure(RemoteUnwinderObject *unwinder, PyObject **result, PyObject **calls) -{ - *result = PyList_New(1); - if (*result == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create async result structure"); +process_task_awaited_by( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + set_entry_processor_func processor, + void *context +) { + // Read the entire TaskObj at once + char task_obj[SIZEOF_TASK_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object"); return -1; } - *calls = PyList_New(0); - if (*calls == NULL) { - Py_DECREF(*result); - *result = NULL; - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create calls list in async result"); - return -1; + uintptr_t task_ab_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by); + if ((void*)task_ab_addr == NULL) { + return 0; // No tasks waiting for this one } - if (PyList_SetItem(*result, 0, *calls)) { /* steals ref to 'calls' */ - Py_DECREF(*calls); - Py_DECREF(*result); - *result = NULL; - *calls = NULL; - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to set calls list in async result"); - return -1; - } + char awaited_by_is_a_set = GET_MEMBER(char, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by_is_set); - return 0; + if (awaited_by_is_a_set) { + return iterate_set_entries(unwinder, task_ab_addr, processor, context); + } else { + // Single task waiting + return processor(unwinder, task_ab_addr, context); + } } static int -add_task_info_to_result( +process_running_task_chain( RemoteUnwinderObject *unwinder, - PyObject *result, - uintptr_t running_task_addr + uintptr_t running_task_addr, + uintptr_t thread_state_addr, + PyObject *result ) { - PyObject *tn = parse_task_name(unwinder, running_task_addr); - if (tn == NULL) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task name for result"); + uintptr_t running_task_code_obj = 0; + if(get_task_code_object(unwinder, running_task_addr, &running_task_code_obj) < 0) { + return -1; + } + + // First, add this task to the result + PyObject *task_info = NULL; + if (process_single_task_node(unwinder, running_task_addr, &task_info, result) < 0) { return -1; } - if (PyList_Append(result, tn)) { - Py_DECREF(tn); - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task name to result"); + // Get the chain from the current frame to this task + PyObject *coro_chain = PyStructSequence_GET_ITEM(task_info, 2); + assert(coro_chain != NULL); + if (PyList_GET_SIZE(coro_chain) != 1) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Coro chain is not a single item"); return -1; } - Py_DECREF(tn); + PyObject *coro_info = PyList_GET_ITEM(coro_chain, 0); + assert(coro_info != NULL); + PyObject *frame_chain = PyStructSequence_GET_ITEM(coro_info, 0); + assert(frame_chain != NULL); - PyObject* awaited_by = PyList_New(0); - if (awaited_by == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list for result"); + // Clear the coro_chain + if (PyList_Clear(frame_chain) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to clear coroutine chain"); return -1; } - if (PyList_Append(result, awaited_by)) { - Py_DECREF(awaited_by); - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by to result"); + // Add the chain from the current frame to this task + if (parse_async_frame_chain(unwinder, frame_chain, thread_state_addr, running_task_code_obj) < 0) { return -1; } - Py_DECREF(awaited_by); - if (parse_task_awaited_by( - unwinder, running_task_addr, awaited_by, 1) < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by for result"); + // Now find all tasks that are waiting for this task and process them + if (process_task_awaited_by(unwinder, running_task_addr, process_waiter_task, result) < 0) { return -1; } return 0; } +// Thread processor for get_async_stack_trace static int -process_single_task_node( +process_thread_for_async_stack_trace( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + unsigned long tid, + void *context +) { + PyObject *result = (PyObject *)context; + + // Find running task in this thread + uintptr_t running_task_addr; + if (find_running_task_in_thread(unwinder, thread_state_addr, &running_task_addr) < 0) { + return 0; + } + + // If we found a running task, process it and its waiters + if ((void*)running_task_addr != NULL) { + PyObject *task_list = PyList_New(0); + if (task_list == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create task list for thread"); + return -1; + } + + if (process_running_task_chain(unwinder, running_task_addr, thread_state_addr, task_list) < 0) { + Py_DECREF(task_list); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process running task chain"); + return -1; + } + + // Create AwaitedInfo structure for this thread + PyObject *tid_py = PyLong_FromUnsignedLong(tid); + if (tid_py == NULL) { + Py_DECREF(task_list); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID"); + return -1; + } + + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *awaited_info = PyStructSequence_New(state->AwaitedInfo_Type); + if (awaited_info == NULL) { + Py_DECREF(tid_py); + Py_DECREF(task_list); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo"); + return -1; + } + + PyStructSequence_SetItem(awaited_info, 0, tid_py); // steals ref + PyStructSequence_SetItem(awaited_info, 1, task_list); // steals ref + + if (PyList_Append(result, awaited_info)) { + Py_DECREF(awaited_info); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append AwaitedInfo to result"); + return -1; + } + Py_DECREF(awaited_info); + } + + return 0; +} + +static int +process_task_and_waiters( RemoteUnwinderObject *unwinder, uintptr_t task_addr, PyObject *result ) { - PyObject *tn = NULL; - PyObject *current_awaited_by = NULL; - PyObject *task_id = NULL; - PyObject *result_item = NULL; - PyObject *coroutine_stack = NULL; - - tn = parse_task_name(unwinder, task_addr); - if (tn == NULL) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task name in single task node"); - goto error; + // First, add this task to the result + if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { + return -1; } - current_awaited_by = PyList_New(0); - if (current_awaited_by == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list in single task node"); - goto error; + // Now find all tasks that are waiting for this task and process them + return process_task_awaited_by(unwinder, task_addr, process_waiter_task, result); +} + +static int +find_running_task_in_thread( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + uintptr_t *running_task_addr +) { + *running_task_addr = (uintptr_t)NULL; + + uintptr_t address_of_running_loop; + int bytes_read = read_py_ptr( + unwinder, + thread_state_addr + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop, + &address_of_running_loop); + if (bytes_read == -1) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running loop address"); + return -1; } - // Extract the coroutine stack for this task - coroutine_stack = PyList_New(0); - if (coroutine_stack == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create coroutine stack list in single task node"); - goto error; + // no asyncio loop is now running + if ((void*)address_of_running_loop == NULL) { + return 0; } - if (parse_task(unwinder, task_addr, coroutine_stack, 0) < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task coroutine stack in single task node"); - goto error; + int err = read_ptr( + unwinder, + thread_state_addr + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task, + running_task_addr); + if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task address"); + return -1; } - task_id = PyLong_FromUnsignedLongLong(task_addr); - if (task_id == NULL) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task ID in single task node"); - goto error; + return 0; +} + +static int +get_task_code_object(RemoteUnwinderObject *unwinder, uintptr_t task_addr, uintptr_t *code_obj_addr) { + uintptr_t running_coro_addr = 0; + + if(read_py_ptr( + unwinder, + task_addr + unwinder->async_debug_offsets.asyncio_task_object.task_coro, + &running_coro_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro read failed"); + return -1; } - RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); - result_item = PyStructSequence_New(state->TaskInfo_Type); - if (result_item == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create TaskInfo in single task node"); - goto error; + if (running_coro_addr == 0) { + PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro address is NULL"); + return -1; } - PyStructSequence_SetItem(result_item, 0, task_id); // steals ref - PyStructSequence_SetItem(result_item, 1, tn); // steals ref - PyStructSequence_SetItem(result_item, 2, coroutine_stack); // steals ref - PyStructSequence_SetItem(result_item, 3, current_awaited_by); // steals ref - - // References transferred to tuple - task_id = NULL; - tn = NULL; - coroutine_stack = NULL; - current_awaited_by = NULL; - - if (PyList_Append(result, result_item)) { - Py_DECREF(result_item); - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append result item in single task node"); + // note: genobject's gi_iframe is an embedded struct so the address to + // the offset leads directly to its first field: f_executable + if (read_py_ptr( + unwinder, + running_coro_addr + unwinder->debug_offsets.gen_object.gi_iframe, code_obj_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task code object"); return -1; } - Py_DECREF(result_item); - // Get back current_awaited_by reference for parse_task_awaited_by - current_awaited_by = PyStructSequence_GetItem(result_item, 3); - if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by, 0) < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by in single task node"); - // No cleanup needed here since all references were transferred to result_item - // and result_item was already added to result list and decreffed + if (*code_obj_addr == 0) { + PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task code object address is NULL"); return -1; } return 0; - -error: - Py_XDECREF(tn); - Py_XDECREF(current_awaited_by); - Py_XDECREF(task_id); - Py_XDECREF(result_item); - Py_XDECREF(coroutine_stack); - return -1; } /* ============================================================================ @@ -1908,45 +2103,13 @@ populate_initial_state_data( return 0; } + static int find_running_frame( RemoteUnwinderObject *unwinder, - uintptr_t runtime_start_address, + uintptr_t address_of_thread, uintptr_t *frame ) { - uint64_t interpreter_state_list_head = - unwinder->debug_offsets.runtime_state.interpreters_head; - - uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( - &unwinder->handle, - runtime_start_address + interpreter_state_list_head, - sizeof(void*), - &address_of_interpreter_state); - if (bytes_read < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state for running frame"); - return -1; - } - - if (address_of_interpreter_state == 0) { - PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); - set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL in running frame search"); - return -1; - } - - uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( - &unwinder->handle, - address_of_interpreter_state + - unwinder->debug_offsets.interpreter_state.threads_main, - sizeof(void*), - &address_of_thread); - if (bytes_read < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread address for running frame"); - return -1; - } - - // No Python frames are available for us (can happen at tear-down). if ((void*)address_of_thread != NULL) { int err = read_ptr( unwinder, @@ -1963,133 +2126,6 @@ find_running_frame( return 0; } -static int -find_running_task( - RemoteUnwinderObject *unwinder, - uintptr_t *running_task_addr -) { - *running_task_addr = (uintptr_t)NULL; - - uint64_t interpreter_state_list_head = - unwinder->debug_offsets.runtime_state.interpreters_head; - - uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( - &unwinder->handle, - unwinder->runtime_start_address + interpreter_state_list_head, - sizeof(void*), - &address_of_interpreter_state); - if (bytes_read < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state for running task"); - return -1; - } - - if (address_of_interpreter_state == 0) { - PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); - set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL in running task search"); - return -1; - } - - uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( - &unwinder->handle, - address_of_interpreter_state + - unwinder->debug_offsets.interpreter_state.threads_head, - sizeof(void*), - &address_of_thread); - if (bytes_read < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread head for running task"); - return -1; - } - - uintptr_t address_of_running_loop; - // No Python frames are available for us (can happen at tear-down). - if ((void*)address_of_thread == NULL) { - return 0; - } - - bytes_read = read_py_ptr( - unwinder, - address_of_thread - + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop, - &address_of_running_loop); - if (bytes_read == -1) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running loop address"); - return -1; - } - - // no asyncio loop is now running - if ((void*)address_of_running_loop == NULL) { - return 0; - } - - int err = read_ptr( - unwinder, - address_of_thread - + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task, - running_task_addr); - if (err) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task address"); - return -1; - } - - return 0; -} - -static int -find_running_task_and_coro( - RemoteUnwinderObject *unwinder, - uintptr_t *running_task_addr, - uintptr_t *running_coro_addr, - uintptr_t *running_task_code_obj -) { - *running_task_addr = (uintptr_t)NULL; - if (find_running_task( - unwinder, running_task_addr) < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Running task search failed"); - return -1; - } - - if ((void*)*running_task_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "No running task found"); - set_exception_cause(unwinder, PyExc_RuntimeError, "Running task address is NULL"); - return -1; - } - - if (read_py_ptr( - unwinder, - *running_task_addr + unwinder->async_debug_offsets.asyncio_task_object.task_coro, - running_coro_addr) < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro read failed"); - return -1; - } - - if ((void*)*running_coro_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); - set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro address is NULL"); - return -1; - } - - // note: genobject's gi_iframe is an embedded struct so the address to - // the offset leads directly to its first field: f_executable - if (read_py_ptr( - unwinder, - *running_coro_addr + unwinder->debug_offsets.gen_object.gi_iframe, - running_task_code_obj) < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task code object"); - return -1; - } - - if ((void*)*running_task_code_obj == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); - set_exception_cause(unwinder, PyExc_RuntimeError, "Running task code object address is NULL"); - return -1; - } - - return 0; -} - - /* ============================================================================ * FRAME PARSING FUNCTIONS * ============================================================================ */ @@ -2126,9 +2162,11 @@ parse_frame_object( RemoteUnwinderObject *unwinder, PyObject** result, uintptr_t address, + uintptr_t* address_of_code_object, uintptr_t* previous_frame ) { char frame[SIZEOF_INTERP_FRAME]; + *address_of_code_object = 0; Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, @@ -2158,77 +2196,38 @@ parse_frame_object( } #endif - return parse_code_object(unwinder, result, code_object,instruction_pointer, previous_frame, tlbc_index); -} - -static int -parse_async_frame_object( - RemoteUnwinderObject *unwinder, - PyObject** result, - uintptr_t address, - uintptr_t* previous_frame, - uintptr_t* code_object -) { - char frame[SIZEOF_INTERP_FRAME]; - - Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( - &unwinder->handle, - address, - SIZEOF_INTERP_FRAME, - frame - ); - if (bytes_read < 0) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read async frame"); - return -1; - } - - *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); - *code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); - int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, *code_object); - if (frame_valid != 1) { - return frame_valid; - } - - uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); - - // Get tlbc_index for free threading builds - int32_t tlbc_index = 0; -#ifdef Py_GIL_DISABLED - if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { - tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); - } -#endif - - if (parse_code_object( - unwinder, result, *code_object, instruction_pointer, previous_frame, tlbc_index)) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse code object in async frame"); - return -1; - } - - return 1; + *address_of_code_object = code_object; + return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index); } static int -parse_async_frame_chain( + parse_async_frame_chain( RemoteUnwinderObject *unwinder, PyObject *calls, + uintptr_t address_of_thread, uintptr_t running_task_code_obj ) { uintptr_t address_of_current_frame; - if (find_running_frame(unwinder, unwinder->runtime_start_address, &address_of_current_frame) < 0) { + if (find_running_frame(unwinder, address_of_thread, &address_of_current_frame) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Running frame search failed in async chain"); return -1; } - uintptr_t address_of_code_object; + PyObject *frames = PyList_New(0); + if (frames == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create frames list"); + return -1; + } + while ((void*)address_of_current_frame != NULL) { PyObject* frame_info = NULL; - int res = parse_async_frame_object( + uintptr_t address_of_code_object; + int res = parse_frame_object( unwinder, &frame_info, address_of_current_frame, - &address_of_current_frame, - &address_of_code_object + &address_of_code_object, + &address_of_current_frame ); if (res < 0) { @@ -2294,7 +2293,7 @@ append_awaited_by_for_thread( uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) - unwinder->async_debug_offsets.asyncio_task_object.task_node; - if (process_single_task_node(unwinder, task_addr, result) < 0) { + if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process task node in awaited_by"); return -1; } @@ -2389,7 +2388,8 @@ process_frame_chain( // Try chunks first, fallback to direct memory read if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, chunks) < 0) { PyErr_Clear(); - if (parse_frame_object(unwinder, &frame, frame_addr, &next_frame_addr) < 0) { + uintptr_t address_of_code_object = 0; + if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in chain"); return -1; } @@ -2830,53 +2830,12 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s goto result_err; } - uintptr_t thread_state_addr; - unsigned long tid = 0; - if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( - &self->handle, - self->interpreter_addr - + self->debug_offsets.interpreter_state.threads_main, - sizeof(void*), - &thread_state_addr)) - { - set_exception_cause(self, PyExc_RuntimeError, "Failed to read main thread state in get_all_awaited_by"); + // Process all threads + if (iterate_threads(self, process_thread_for_awaited_by, result) < 0) { goto result_err; } - uintptr_t head_addr; - while (thread_state_addr != 0) { - if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( - &self->handle, - thread_state_addr - + self->debug_offsets.thread_state.native_thread_id, - sizeof(tid), - &tid)) - { - set_exception_cause(self, PyExc_RuntimeError, "Failed to read thread ID in get_all_awaited_by"); - goto result_err; - } - - head_addr = thread_state_addr - + self->async_debug_offsets.asyncio_thread_state.asyncio_tasks_head; - - if (append_awaited_by(self, tid, head_addr, result)) - { - set_exception_cause(self, PyExc_RuntimeError, "Failed to append awaited_by for thread in get_all_awaited_by"); - goto result_err; - } - - if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( - &self->handle, - thread_state_addr + self->debug_offsets.thread_state.next, - sizeof(void*), - &thread_state_addr)) - { - set_exception_cause(self, PyExc_RuntimeError, "Failed to read next thread state in get_all_awaited_by"); - goto result_err; - } - } - - head_addr = self->interpreter_addr + uintptr_t head_addr = self->interpreter_addr + self->async_debug_offsets.asyncio_interpreter_state.asyncio_tasks_head; // On top of a per-thread task lists used by default by asyncio to avoid @@ -2903,32 +2862,50 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s @critical_section _remote_debugging.RemoteUnwinder.get_async_stack_trace -Returns information about the currently running async task and its stack trace. +Get the currently running async tasks and their dependency graphs from the remote process. -Returns a tuple of (task_info, stack_frames) where: -- task_info is a tuple of (task_id, task_name) identifying the task -- stack_frames is a list of tuples (function_name, filename, line_number) representing - the Python stack frames for the task, ordered from most recent to oldest +This returns information about running tasks and all tasks that are waiting for them, +forming a complete dependency graph for each thread's active task. -Example: - ((4345585712, 'Task-1'), [ - ('run_echo_server', 'server.py', 127), - ('serve_forever', 'server.py', 45), - ('main', 'app.py', 23) - ]) +For each thread with a running task, returns the running task plus all tasks that +transitively depend on it (tasks waiting for the running task, tasks waiting for +those tasks, etc.). + +Returns a list of per-thread results, where each thread result contains: +- Thread ID +- List of task information for the running task and all its waiters + +Each task info contains: +- Task ID (memory address) +- Task name +- Call stack frames: List of (func_name, filename, lineno) +- List of tasks waiting for this task (recursive structure) Raises: RuntimeError: If AsyncioDebug section is not available in the target process - RuntimeError: If there is an error copying memory from the target process - OSError: If there is an error accessing the target process - PermissionError: If access to the target process is denied - UnicodeDecodeError: If there is an error decoding strings from the target process + MemoryError: If memory allocation fails + OSError: If reading from the remote process fails + +Example output (similar structure to get_all_awaited_by but only for running tasks): +[ + # Thread 140234 results + (140234, [ + # Running task and its complete waiter dependency graph + (4345585712, 'main_task', + [("run_server", "server.py", 127), ("main", "app.py", 23)], + [ + # Tasks waiting for main_task + (4345585800, 'worker_1', [...], [...]), + (4345585900, 'worker_2', [...], [...]) + ]) + ]) +] [clinic start generated code]*/ static PyObject * _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self) -/*[clinic end generated code: output=6433d52b55e87bbe input=11b7150c59d4c60f]*/ +/*[clinic end generated code: output=6433d52b55e87bbe input=8744b47c9ec2220a]*/ { if (!self->async_debug_offsets_available) { PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); @@ -2936,35 +2913,20 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject return NULL; } - PyObject *result = NULL; - PyObject *calls = NULL; - - if (setup_async_result_structure(self, &result, &calls) < 0) { - set_exception_cause(self, PyExc_RuntimeError, "Failed to setup async result structure"); - goto cleanup; - } - - uintptr_t running_task_addr, running_coro_addr, running_task_code_obj; - if (find_running_task_and_coro(self, &running_task_addr, - &running_coro_addr, &running_task_code_obj) < 0) { - set_exception_cause(self, PyExc_RuntimeError, "Failed to find running task and coro"); - goto cleanup; - } - - if (parse_async_frame_chain(self, calls, running_task_code_obj) < 0) { - set_exception_cause(self, PyExc_RuntimeError, "Failed to parse async frame chain"); - goto cleanup; + PyObject *result = PyList_New(0); + if (result == NULL) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create result list in get_async_stack_trace"); + return NULL; } - if (add_task_info_to_result(self, result, running_task_addr) < 0) { - set_exception_cause(self, PyExc_RuntimeError, "Failed to add task info to result"); - goto cleanup; + // Process all threads + if (iterate_threads(self, process_thread_for_async_stack_trace, result) < 0) { + goto result_err; } _Py_RemoteDebug_ClearCache(&self->handle); return result; - -cleanup: +result_err: _Py_RemoteDebug_ClearCache(&self->handle); Py_XDECREF(result); return NULL; diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h index e80b24b54c0ffa..f6a51cdba6b401 100644 --- a/Modules/clinic/_remote_debugging_module.c.h +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -235,26 +235,41 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__, "get_async_stack_trace($self, /)\n" "--\n" "\n" -"Returns information about the currently running async task and its stack trace.\n" +"Get the currently running async tasks and their dependency graphs from the remote process.\n" "\n" -"Returns a tuple of (task_info, stack_frames) where:\n" -"- task_info is a tuple of (task_id, task_name) identifying the task\n" -"- stack_frames is a list of tuples (function_name, filename, line_number) representing\n" -" the Python stack frames for the task, ordered from most recent to oldest\n" +"This returns information about running tasks and all tasks that are waiting for them,\n" +"forming a complete dependency graph for each thread\'s active task.\n" "\n" -"Example:\n" -" ((4345585712, \'Task-1\'), [\n" -" (\'run_echo_server\', \'server.py\', 127),\n" -" (\'serve_forever\', \'server.py\', 45),\n" -" (\'main\', \'app.py\', 23)\n" -" ])\n" +"For each thread with a running task, returns the running task plus all tasks that\n" +"transitively depend on it (tasks waiting for the running task, tasks waiting for\n" +"those tasks, etc.).\n" +"\n" +"Returns a list of per-thread results, where each thread result contains:\n" +"- Thread ID\n" +"- List of task information for the running task and all its waiters\n" +"\n" +"Each task info contains:\n" +"- Task ID (memory address)\n" +"- Task name\n" +"- Call stack frames: List of (func_name, filename, lineno)\n" +"- List of tasks waiting for this task (recursive structure)\n" "\n" "Raises:\n" " RuntimeError: If AsyncioDebug section is not available in the target process\n" -" RuntimeError: If there is an error copying memory from the target process\n" -" OSError: If there is an error accessing the target process\n" -" PermissionError: If access to the target process is denied\n" -" UnicodeDecodeError: If there is an error decoding strings from the target process"); +" MemoryError: If memory allocation fails\n" +" OSError: If reading from the remote process fails\n" +"\n" +"Example output (similar structure to get_all_awaited_by but only for running tasks):\n" +"[\n" +" (140234, [\n" +" (4345585712, \'main_task\',\n" +" [(\"run_server\", \"server.py\", 127), (\"main\", \"app.py\", 23)],\n" +" [\n" +" (4345585800, \'worker_1\', [...], [...]),\n" +" (4345585900, \'worker_2\', [...], [...])\n" +" ])\n" +" ])\n" +"]"); #define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF \ {"get_async_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_async_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__}, @@ -273,4 +288,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject return return_value; } -/*[clinic end generated code: output=a37ab223d5081b16 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0dd1e6e8bab2a8b1 input=a9049054013a1b77]*/ From 61dd9fdad729fe02d91c03804659f7d0c5a89276 Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Wed, 9 Jul 2025 16:46:33 -0700 Subject: [PATCH 836/989] gh-135846: Add zstd dependency to Android build script (#136253) Adds zstd to the Android build process. --------- Co-authored-by: Malcolm Smith --- Android/android.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Android/android.py b/Android/android.py index 551168fc4b2f5a..a3a48c0c6b7027 100755 --- a/Android/android.py +++ b/Android/android.py @@ -175,7 +175,7 @@ def unpack_deps(host, prefix_dir): os.chdir(prefix_dir) deps_url = "https://github.com/beeware/cpython-android-source-deps/releases/download" for name_ver in ["bzip2-1.0.8-3", "libffi-3.4.4-3", "openssl-3.0.15-4", - "sqlite-3.49.1-0", "xz-5.4.6-1"]: + "sqlite-3.49.1-0", "xz-5.4.6-1", "zstd-1.5.7-1"]: filename = f"{name_ver}-{host}.tar.gz" download(f"{deps_url}/{name_ver}/{filename}") shutil.unpack_archive(filename) From b44316a0976fb3fcd50bae9d67b0810ee0252d93 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 10 Jul 2025 10:12:23 +0200 Subject: [PATCH 837/989] gh-136476: Remove creation of unused list (GH-136494) --- Modules/_remote_debugging_module.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 178c6d11357e5e..d72031137e0a4e 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -2213,12 +2213,6 @@ static int return -1; } - PyObject *frames = PyList_New(0); - if (frames == NULL) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create frames list"); - return -1; - } - while ((void*)address_of_current_frame != NULL) { PyObject* frame_info = NULL; uintptr_t address_of_code_object; From c17654334946b232aa296696cf70ec93a09d8156 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 10 Jul 2025 11:57:29 +0300 Subject: [PATCH 838/989] gh-136438: Make sure `test_builtins` pass with all optimization levels (#136474) --- Lib/test/test_builtin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 14fe3355239615..8830641f0abdc7 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -436,7 +436,7 @@ def f(): """doc""" # test both direct compilation and compilation via AST codeobjs = [] codeobjs.append(compile(codestr, "", "exec", optimize=optval)) - tree = ast.parse(codestr) + tree = ast.parse(codestr, optimize=optval) codeobjs.append(compile(tree, "", "exec", optimize=optval)) for code in codeobjs: ns = {} @@ -624,7 +624,7 @@ def test_compile_ast(self): for opt in [opt1, opt2]: opt_right = opt.value.right self.assertIsInstance(opt_right, ast.Constant) - self.assertEqual(opt_right.value, True) + self.assertEqual(opt_right.value, __debug__) def test_delattr(self): sys.spam = 1 From 85bc89f35f40c844df74d913fd32b2b1475fc942 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 10 Jul 2025 13:07:55 +0200 Subject: [PATCH 839/989] gh-136209: Add .. c:var:: declarations for C exception types (GH-136210) Co-authored-by: Victor Stinner --- Doc/c-api/exceptions.rst | 438 +++++++++++++++++---------------------- Doc/conf.py | 69 ------ 2 files changed, 187 insertions(+), 320 deletions(-) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index a750cda3e2d474..3ff4631a8e53c4 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -982,184 +982,135 @@ these are the C equivalent to :func:`reprlib.recursive_repr`. .. _standardexceptions: -Standard Exceptions -=================== - -All standard Python exceptions are available as global variables whose names are -``PyExc_`` followed by the Python exception name. These have the type -:c:expr:`PyObject*`; they are all class objects. For completeness, here are all -the variables: - -.. index:: - single: PyExc_BaseException (C var) - single: PyExc_BaseExceptionGroup (C var) - single: PyExc_Exception (C var) - single: PyExc_ArithmeticError (C var) - single: PyExc_AssertionError (C var) - single: PyExc_AttributeError (C var) - single: PyExc_BlockingIOError (C var) - single: PyExc_BrokenPipeError (C var) - single: PyExc_BufferError (C var) - single: PyExc_ChildProcessError (C var) - single: PyExc_ConnectionAbortedError (C var) - single: PyExc_ConnectionError (C var) - single: PyExc_ConnectionRefusedError (C var) - single: PyExc_ConnectionResetError (C var) - single: PyExc_EOFError (C var) - single: PyExc_FileExistsError (C var) - single: PyExc_FileNotFoundError (C var) - single: PyExc_FloatingPointError (C var) - single: PyExc_GeneratorExit (C var) - single: PyExc_ImportError (C var) - single: PyExc_IndentationError (C var) - single: PyExc_IndexError (C var) - single: PyExc_InterruptedError (C var) - single: PyExc_IsADirectoryError (C var) - single: PyExc_KeyError (C var) - single: PyExc_KeyboardInterrupt (C var) - single: PyExc_LookupError (C var) - single: PyExc_MemoryError (C var) - single: PyExc_ModuleNotFoundError (C var) - single: PyExc_NameError (C var) - single: PyExc_NotADirectoryError (C var) - single: PyExc_NotImplementedError (C var) - single: PyExc_OSError (C var) - single: PyExc_OverflowError (C var) - single: PyExc_PermissionError (C var) - single: PyExc_ProcessLookupError (C var) - single: PyExc_PythonFinalizationError (C var) - single: PyExc_RecursionError (C var) - single: PyExc_ReferenceError (C var) - single: PyExc_RuntimeError (C var) - single: PyExc_StopAsyncIteration (C var) - single: PyExc_StopIteration (C var) - single: PyExc_SyntaxError (C var) - single: PyExc_SystemError (C var) - single: PyExc_SystemExit (C var) - single: PyExc_TabError (C var) - single: PyExc_TimeoutError (C var) - single: PyExc_TypeError (C var) - single: PyExc_UnboundLocalError (C var) - single: PyExc_UnicodeDecodeError (C var) - single: PyExc_UnicodeEncodeError (C var) - single: PyExc_UnicodeError (C var) - single: PyExc_UnicodeTranslateError (C var) - single: PyExc_ValueError (C var) - single: PyExc_ZeroDivisionError (C var) - -+-----------------------------------------+---------------------------------+----------+ -| C Name | Python Name | Notes | -+=========================================+=================================+==========+ -| :c:data:`PyExc_BaseException` | :exc:`BaseException` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_BaseExceptionGroup` | :exc:`BaseExceptionGroup` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_Exception` | :exc:`Exception` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ArithmeticError` | :exc:`ArithmeticError` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_AssertionError` | :exc:`AssertionError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_AttributeError` | :exc:`AttributeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_BlockingIOError` | :exc:`BlockingIOError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_BrokenPipeError` | :exc:`BrokenPipeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_BufferError` | :exc:`BufferError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ChildProcessError` | :exc:`ChildProcessError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ConnectionAbortedError` | :exc:`ConnectionAbortedError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ConnectionError` | :exc:`ConnectionError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ConnectionRefusedError` | :exc:`ConnectionRefusedError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ConnectionResetError` | :exc:`ConnectionResetError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_EOFError` | :exc:`EOFError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_FileExistsError` | :exc:`FileExistsError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_FileNotFoundError` | :exc:`FileNotFoundError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_FloatingPointError` | :exc:`FloatingPointError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_GeneratorExit` | :exc:`GeneratorExit` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ImportError` | :exc:`ImportError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_IndentationError` | :exc:`IndentationError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_IndexError` | :exc:`IndexError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_InterruptedError` | :exc:`InterruptedError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_IsADirectoryError` | :exc:`IsADirectoryError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_KeyError` | :exc:`KeyError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_KeyboardInterrupt` | :exc:`KeyboardInterrupt` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_LookupError` | :exc:`LookupError` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_MemoryError` | :exc:`MemoryError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ModuleNotFoundError` | :exc:`ModuleNotFoundError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_NameError` | :exc:`NameError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_NotADirectoryError` | :exc:`NotADirectoryError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_NotImplementedError` | :exc:`NotImplementedError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_OSError` | :exc:`OSError` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_OverflowError` | :exc:`OverflowError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_PermissionError` | :exc:`PermissionError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ProcessLookupError` | :exc:`ProcessLookupError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_PythonFinalizationError` | :exc:`PythonFinalizationError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_RecursionError` | :exc:`RecursionError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ReferenceError` | :exc:`ReferenceError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_RuntimeError` | :exc:`RuntimeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_StopAsyncIteration` | :exc:`StopAsyncIteration` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_StopIteration` | :exc:`StopIteration` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_SyntaxError` | :exc:`SyntaxError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_SystemError` | :exc:`SystemError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_SystemExit` | :exc:`SystemExit` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_TabError` | :exc:`TabError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_TimeoutError` | :exc:`TimeoutError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_TypeError` | :exc:`TypeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnboundLocalError` | :exc:`UnboundLocalError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeDecodeError` | :exc:`UnicodeDecodeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeEncodeError` | :exc:`UnicodeEncodeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeError` | :exc:`UnicodeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeTranslateError` | :exc:`UnicodeTranslateError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ValueError` | :exc:`ValueError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ZeroDivisionError` | :exc:`ZeroDivisionError` | | -+-----------------------------------------+---------------------------------+----------+ +Exception and warning types +=========================== + +All standard Python exceptions and warning categories are available as global +variables whose names are ``PyExc_`` followed by the Python exception name. +These have the type :c:expr:`PyObject*`; they are all class objects. + +For completeness, here are all the variables: + +Exception types +--------------- + +.. list-table:: + :align: left + :widths: auto + :header-rows: 1 + + * * C name + * Python name + * * .. c:var:: PyObject *PyExc_BaseException + * :exc:`BaseException` + * * .. c:var:: PyObject *PyExc_BaseExceptionGroup + * :exc:`BaseExceptionGroup` + * * .. c:var:: PyObject *PyExc_Exception + * :exc:`Exception` + * * .. c:var:: PyObject *PyExc_ArithmeticError + * :exc:`ArithmeticError` + * * .. c:var:: PyObject *PyExc_AssertionError + * :exc:`AssertionError` + * * .. c:var:: PyObject *PyExc_AttributeError + * :exc:`AttributeError` + * * .. c:var:: PyObject *PyExc_BlockingIOError + * :exc:`BlockingIOError` + * * .. c:var:: PyObject *PyExc_BrokenPipeError + * :exc:`BrokenPipeError` + * * .. c:var:: PyObject *PyExc_BufferError + * :exc:`BufferError` + * * .. c:var:: PyObject *PyExc_ChildProcessError + * :exc:`ChildProcessError` + * * .. c:var:: PyObject *PyExc_ConnectionAbortedError + * :exc:`ConnectionAbortedError` + * * .. c:var:: PyObject *PyExc_ConnectionError + * :exc:`ConnectionError` + * * .. c:var:: PyObject *PyExc_ConnectionRefusedError + * :exc:`ConnectionRefusedError` + * * .. c:var:: PyObject *PyExc_ConnectionResetError + * :exc:`ConnectionResetError` + * * .. c:var:: PyObject *PyExc_EOFError + * :exc:`EOFError` + * * .. c:var:: PyObject *PyExc_FileExistsError + * :exc:`FileExistsError` + * * .. c:var:: PyObject *PyExc_FileNotFoundError + * :exc:`FileNotFoundError` + * * .. c:var:: PyObject *PyExc_FloatingPointError + * :exc:`FloatingPointError` + * * .. c:var:: PyObject *PyExc_GeneratorExit + * :exc:`GeneratorExit` + * * .. c:var:: PyObject *PyExc_ImportError + * :exc:`ImportError` + * * .. c:var:: PyObject *PyExc_IndentationError + * :exc:`IndentationError` + * * .. c:var:: PyObject *PyExc_IndexError + * :exc:`IndexError` + * * .. c:var:: PyObject *PyExc_InterruptedError + * :exc:`InterruptedError` + * * .. c:var:: PyObject *PyExc_IsADirectoryError + * :exc:`IsADirectoryError` + * * .. c:var:: PyObject *PyExc_KeyError + * :exc:`KeyError` + * * .. c:var:: PyObject *PyExc_KeyboardInterrupt + * :exc:`KeyboardInterrupt` + * * .. c:var:: PyObject *PyExc_LookupError + * :exc:`LookupError` + * * .. c:var:: PyObject *PyExc_MemoryError + * :exc:`MemoryError` + * * .. c:var:: PyObject *PyExc_ModuleNotFoundError + * :exc:`ModuleNotFoundError` + * * .. c:var:: PyObject *PyExc_NameError + * :exc:`NameError` + * * .. c:var:: PyObject *PyExc_NotADirectoryError + * :exc:`NotADirectoryError` + * * .. c:var:: PyObject *PyExc_NotImplementedError + * :exc:`NotImplementedError` + * * .. c:var:: PyObject *PyExc_OSError + * :exc:`OSError` + * * .. c:var:: PyObject *PyExc_OverflowError + * :exc:`OverflowError` + * * .. c:var:: PyObject *PyExc_PermissionError + * :exc:`PermissionError` + * * .. c:var:: PyObject *PyExc_ProcessLookupError + * :exc:`ProcessLookupError` + * * .. c:var:: PyObject *PyExc_PythonFinalizationError + * :exc:`PythonFinalizationError` + * * .. c:var:: PyObject *PyExc_RecursionError + * :exc:`RecursionError` + * * .. c:var:: PyObject *PyExc_ReferenceError + * :exc:`ReferenceError` + * * .. c:var:: PyObject *PyExc_RuntimeError + * :exc:`RuntimeError` + * * .. c:var:: PyObject *PyExc_StopAsyncIteration + * :exc:`StopAsyncIteration` + * * .. c:var:: PyObject *PyExc_StopIteration + * :exc:`StopIteration` + * * .. c:var:: PyObject *PyExc_SyntaxError + * :exc:`SyntaxError` + * * .. c:var:: PyObject *PyExc_SystemError + * :exc:`SystemError` + * * .. c:var:: PyObject *PyExc_SystemExit + * :exc:`SystemExit` + * * .. c:var:: PyObject *PyExc_TabError + * :exc:`TabError` + * * .. c:var:: PyObject *PyExc_TimeoutError + * :exc:`TimeoutError` + * * .. c:var:: PyObject *PyExc_TypeError + * :exc:`TypeError` + * * .. c:var:: PyObject *PyExc_UnboundLocalError + * :exc:`UnboundLocalError` + * * .. c:var:: PyObject *PyExc_UnicodeDecodeError + * :exc:`UnicodeDecodeError` + * * .. c:var:: PyObject *PyExc_UnicodeEncodeError + * :exc:`UnicodeEncodeError` + * * .. c:var:: PyObject *PyExc_UnicodeError + * :exc:`UnicodeError` + * * .. c:var:: PyObject *PyExc_UnicodeTranslateError + * :exc:`UnicodeTranslateError` + * * .. c:var:: PyObject *PyExc_ValueError + * :exc:`ValueError` + * * .. c:var:: PyObject *PyExc_ZeroDivisionError + * :exc:`ZeroDivisionError` .. versionadded:: 3.3 :c:data:`PyExc_BlockingIOError`, :c:data:`PyExc_BrokenPipeError`, @@ -1180,94 +1131,79 @@ the variables: .. versionadded:: 3.11 :c:data:`PyExc_BaseExceptionGroup`. -These are compatibility aliases to :c:data:`PyExc_OSError`: -.. index:: - single: PyExc_EnvironmentError (C var) - single: PyExc_IOError (C var) - single: PyExc_WindowsError (C var) +OSError aliases +--------------- -+-------------------------------------+----------+ -| C Name | Notes | -+=====================================+==========+ -| :c:data:`!PyExc_EnvironmentError` | | -+-------------------------------------+----------+ -| :c:data:`!PyExc_IOError` | | -+-------------------------------------+----------+ -| :c:data:`!PyExc_WindowsError` | [2]_ | -+-------------------------------------+----------+ +The following are a compatibility aliases to :c:data:`PyExc_OSError`. .. versionchanged:: 3.3 These aliases used to be separate exception types. +.. list-table:: + :align: left + :widths: auto + :header-rows: 1 + + * * C name + * Python name + * Notes + * * .. c:var:: PyObject *PyExc_EnvironmentError + * :exc:`OSError` + * + * * .. c:var:: PyObject *PyExc_IOError + * :exc:`OSError` + * + * * .. c:var:: PyObject *PyExc_WindowsError + * :exc:`OSError` + * [win]_ + Notes: -.. [1] - This is a base class for other standard exceptions. +.. [win] + :c:var:`!PyExc_WindowsError` is only defined on Windows; protect code that + uses this by testing that the preprocessor macro ``MS_WINDOWS`` is defined. -.. [2] - Only defined on Windows; protect code that uses this by testing that the - preprocessor macro ``MS_WINDOWS`` is defined. .. _standardwarningcategories: -Standard Warning Categories -=========================== - -All standard Python warning categories are available as global variables whose -names are ``PyExc_`` followed by the Python exception name. These have the type -:c:expr:`PyObject*`; they are all class objects. For completeness, here are all -the variables: - -.. index:: - single: PyExc_Warning (C var) - single: PyExc_BytesWarning (C var) - single: PyExc_DeprecationWarning (C var) - single: PyExc_EncodingWarning (C var) - single: PyExc_FutureWarning (C var) - single: PyExc_ImportWarning (C var) - single: PyExc_PendingDeprecationWarning (C var) - single: PyExc_ResourceWarning (C var) - single: PyExc_RuntimeWarning (C var) - single: PyExc_SyntaxWarning (C var) - single: PyExc_UnicodeWarning (C var) - single: PyExc_UserWarning (C var) - -+------------------------------------------+---------------------------------+----------+ -| C Name | Python Name | Notes | -+==========================================+=================================+==========+ -| :c:data:`PyExc_Warning` | :exc:`Warning` | [3]_ | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_BytesWarning` | :exc:`BytesWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_DeprecationWarning` | :exc:`DeprecationWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_EncodingWarning` | :exc:`EncodingWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_FutureWarning` | :exc:`FutureWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ImportWarning` | :exc:`ImportWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_PendingDeprecationWarning`| :exc:`PendingDeprecationWarning`| | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ResourceWarning` | :exc:`ResourceWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_RuntimeWarning` | :exc:`RuntimeWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_SyntaxWarning` | :exc:`SyntaxWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeWarning` | :exc:`UnicodeWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UserWarning` | :exc:`UserWarning` | | -+------------------------------------------+---------------------------------+----------+ +Warning types +------------- + +.. list-table:: + :align: left + :widths: auto + :header-rows: 1 + + * * C name + * Python name + * * .. c:var:: PyObject *PyExc_Warning + * :exc:`Warning` + * * .. c:var:: PyObject *PyExc_BytesWarning + * :exc:`BytesWarning` + * * .. c:var:: PyObject *PyExc_DeprecationWarning + * :exc:`DeprecationWarning` + * * .. c:var:: PyObject *PyExc_EncodingWarning + * :exc:`EncodingWarning` + * * .. c:var:: PyObject *PyExc_FutureWarning + * :exc:`FutureWarning` + * * .. c:var:: PyObject *PyExc_ImportWarning + * :exc:`ImportWarning` + * * .. c:var:: PyObject *PyExc_PendingDeprecationWarning + * :exc:`PendingDeprecationWarning` + * * .. c:var:: PyObject *PyExc_ResourceWarning + * :exc:`ResourceWarning` + * * .. c:var:: PyObject *PyExc_RuntimeWarning + * :exc:`RuntimeWarning` + * * .. c:var:: PyObject *PyExc_SyntaxWarning + * :exc:`SyntaxWarning` + * * .. c:var:: PyObject *PyExc_UnicodeWarning + * :exc:`UnicodeWarning` + * * .. c:var:: PyObject *PyExc_UserWarning + * :exc:`UserWarning` .. versionadded:: 3.2 :c:data:`PyExc_ResourceWarning`. .. versionadded:: 3.10 :c:data:`PyExc_EncodingWarning`. - -Notes: - -.. [3] - This is a base class for other standard warning categories. diff --git a/Doc/conf.py b/Doc/conf.py index 8b2a8f20fcc558..c1ed94d7b46ec2 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -233,75 +233,6 @@ # Temporary undocumented names. # In future this list must be empty. nitpick_ignore += [ - # C API: Standard Python exception classes - ('c:data', 'PyExc_ArithmeticError'), - ('c:data', 'PyExc_AssertionError'), - ('c:data', 'PyExc_AttributeError'), - ('c:data', 'PyExc_BaseException'), - ('c:data', 'PyExc_BaseExceptionGroup'), - ('c:data', 'PyExc_BlockingIOError'), - ('c:data', 'PyExc_BrokenPipeError'), - ('c:data', 'PyExc_BufferError'), - ('c:data', 'PyExc_ChildProcessError'), - ('c:data', 'PyExc_ConnectionAbortedError'), - ('c:data', 'PyExc_ConnectionError'), - ('c:data', 'PyExc_ConnectionRefusedError'), - ('c:data', 'PyExc_ConnectionResetError'), - ('c:data', 'PyExc_EOFError'), - ('c:data', 'PyExc_Exception'), - ('c:data', 'PyExc_FileExistsError'), - ('c:data', 'PyExc_FileNotFoundError'), - ('c:data', 'PyExc_FloatingPointError'), - ('c:data', 'PyExc_GeneratorExit'), - ('c:data', 'PyExc_ImportError'), - ('c:data', 'PyExc_IndentationError'), - ('c:data', 'PyExc_IndexError'), - ('c:data', 'PyExc_InterruptedError'), - ('c:data', 'PyExc_IsADirectoryError'), - ('c:data', 'PyExc_KeyboardInterrupt'), - ('c:data', 'PyExc_KeyError'), - ('c:data', 'PyExc_LookupError'), - ('c:data', 'PyExc_MemoryError'), - ('c:data', 'PyExc_ModuleNotFoundError'), - ('c:data', 'PyExc_NameError'), - ('c:data', 'PyExc_NotADirectoryError'), - ('c:data', 'PyExc_NotImplementedError'), - ('c:data', 'PyExc_OSError'), - ('c:data', 'PyExc_OverflowError'), - ('c:data', 'PyExc_PermissionError'), - ('c:data', 'PyExc_ProcessLookupError'), - ('c:data', 'PyExc_PythonFinalizationError'), - ('c:data', 'PyExc_RecursionError'), - ('c:data', 'PyExc_ReferenceError'), - ('c:data', 'PyExc_RuntimeError'), - ('c:data', 'PyExc_StopAsyncIteration'), - ('c:data', 'PyExc_StopIteration'), - ('c:data', 'PyExc_SyntaxError'), - ('c:data', 'PyExc_SystemError'), - ('c:data', 'PyExc_SystemExit'), - ('c:data', 'PyExc_TabError'), - ('c:data', 'PyExc_TimeoutError'), - ('c:data', 'PyExc_TypeError'), - ('c:data', 'PyExc_UnboundLocalError'), - ('c:data', 'PyExc_UnicodeDecodeError'), - ('c:data', 'PyExc_UnicodeEncodeError'), - ('c:data', 'PyExc_UnicodeError'), - ('c:data', 'PyExc_UnicodeTranslateError'), - ('c:data', 'PyExc_ValueError'), - ('c:data', 'PyExc_ZeroDivisionError'), - # C API: Standard Python warning classes - ('c:data', 'PyExc_BytesWarning'), - ('c:data', 'PyExc_DeprecationWarning'), - ('c:data', 'PyExc_EncodingWarning'), - ('c:data', 'PyExc_FutureWarning'), - ('c:data', 'PyExc_ImportWarning'), - ('c:data', 'PyExc_PendingDeprecationWarning'), - ('c:data', 'PyExc_ResourceWarning'), - ('c:data', 'PyExc_RuntimeWarning'), - ('c:data', 'PyExc_SyntaxWarning'), - ('c:data', 'PyExc_UnicodeWarning'), - ('c:data', 'PyExc_UserWarning'), - ('c:data', 'PyExc_Warning'), # Undocumented public C macros ('c:macro', 'Py_BUILD_ASSERT'), ('c:macro', 'Py_BUILD_ASSERT_EXPR'), From d754f75f42f040267d818ab804ada340f55e5925 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Thu, 10 Jul 2025 13:16:01 +0200 Subject: [PATCH 840/989] gh-82088: Improve performance of PyLong_As*() for multi-digit ints (#135585) Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Victor Stinner --- ...5-06-17-08-37-45.gh-issue-82088.TgPvLg.rst | 5 + Objects/longobject.c | 110 ++++++++++++------ 2 files changed, 80 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-08-37-45.gh-issue-82088.TgPvLg.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-08-37-45.gh-issue-82088.TgPvLg.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-08-37-45.gh-issue-82088.TgPvLg.rst new file mode 100644 index 00000000000000..74bd182c1990f8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-08-37-45.gh-issue-82088.TgPvLg.rst @@ -0,0 +1,5 @@ +Improve performance of ``PyLongObject`` conversion functions +``PyLong_AsLongAndOverflow()``, ``PyLong_AsSsize_t()``, ``PyLong_AsUnsignedLong()``, ``PyLong_AsSize_t()``, +``PyLong_AsUnsignedLongMask()``, ``PyLong_AsUnsignedLongLongMask()``, ``PyLong_AsLongLongAndOverflow()`` +for integers larger than 2**30 up to 30%. + diff --git a/Objects/longobject.c b/Objects/longobject.c index 557bb6e1dd956c..581db10b54ab57 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -526,6 +526,54 @@ PyLong_FromDouble(double dval) #define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) #define PY_ABS_SSIZE_T_MIN (0-(size_t)PY_SSIZE_T_MIN) +static inline unsigned long +unroll_digits_ulong(PyLongObject *v, Py_ssize_t *iptr) +{ + assert(ULONG_MAX >= ((1UL << PyLong_SHIFT) - 1)); + + Py_ssize_t i = *iptr; + assert(i >= 2); + + /* unroll 1 digit */ + --i; + digit *digits = v->long_value.ob_digit; + unsigned long x = digits[i]; + +#if (ULONG_MAX >> PyLong_SHIFT) >= ((1UL << PyLong_SHIFT) - 1) + /* unroll another digit */ + x <<= PyLong_SHIFT; + --i; + x |= digits[i]; +#endif + + *iptr = i; + return x; +} + +static inline size_t +unroll_digits_size_t(PyLongObject *v, Py_ssize_t *iptr) +{ + assert(SIZE_MAX >= ((1UL << PyLong_SHIFT) - 1)); + + Py_ssize_t i = *iptr; + assert(i >= 2); + + /* unroll 1 digit */ + --i; + digit *digits = v->long_value.ob_digit; + size_t x = digits[i]; + +#if (SIZE_MAX >> PyLong_SHIFT) >= ((1 << PyLong_SHIFT) - 1) + /* unroll another digit */ + x <<= PyLong_SHIFT; + --i; + x |= digits[i]; +#endif + + *iptr = i; + return x; +} + /* Get a C long int from an int object or any object that has an __index__ method. @@ -535,13 +583,11 @@ PyLong_FromDouble(double dval) For other errors (e.g., TypeError), return -1 and set an error condition. In this case *overflow will be 0. */ - long PyLong_AsLongAndOverflow(PyObject *vv, int *overflow) { - /* This version by Tim Peters */ + /* This version originally by Tim Peters */ PyLongObject *v; - unsigned long x, prev; long res; Py_ssize_t i; int sign; @@ -584,14 +630,14 @@ PyLong_AsLongAndOverflow(PyObject *vv, int *overflow) res = -1; i = _PyLong_DigitCount(v); sign = _PyLong_NonCompactSign(v); - x = 0; + + unsigned long x = unroll_digits_ulong(v, &i); while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) { + if (x > (ULONG_MAX >> PyLong_SHIFT)) { *overflow = sign; goto exit; } + x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; } /* Haven't lost any bits, but casting to long requires extra * care (see comment above). @@ -655,7 +701,6 @@ PyLong_AsInt(PyObject *obj) Py_ssize_t PyLong_AsSsize_t(PyObject *vv) { PyLongObject *v; - size_t x, prev; Py_ssize_t i; int sign; @@ -674,12 +719,13 @@ PyLong_AsSsize_t(PyObject *vv) { } i = _PyLong_DigitCount(v); sign = _PyLong_NonCompactSign(v); - x = 0; + + size_t x = unroll_digits_size_t(v, &i); while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) + if (x > (SIZE_MAX >> PyLong_SHIFT)) { goto overflow; + } + x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; } /* Haven't lost any bits, but casting to a signed type requires * extra care (see comment above). @@ -705,7 +751,6 @@ unsigned long PyLong_AsUnsignedLong(PyObject *vv) { PyLongObject *v; - unsigned long x, prev; Py_ssize_t i; if (vv == NULL) { @@ -736,13 +781,13 @@ PyLong_AsUnsignedLong(PyObject *vv) return (unsigned long) -1; } i = _PyLong_DigitCount(v); - x = 0; + + unsigned long x = unroll_digits_ulong(v, &i); while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) { + if (x > (ULONG_MAX >> PyLong_SHIFT)) { goto overflow; } + x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; } return x; overflow: @@ -759,7 +804,6 @@ size_t PyLong_AsSize_t(PyObject *vv) { PyLongObject *v; - size_t x, prev; Py_ssize_t i; if (vv == NULL) { @@ -781,16 +825,16 @@ PyLong_AsSize_t(PyObject *vv) return (size_t) -1; } i = _PyLong_DigitCount(v); - x = 0; + + size_t x = unroll_digits_size_t(v, &i); while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) { - PyErr_SetString(PyExc_OverflowError, - "Python int too large to convert to C size_t"); - return (size_t) -1; + if (x > (SIZE_MAX >> PyLong_SHIFT)) { + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C size_t"); + return (size_t) -1; + } + x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; } - } return x; } @@ -801,7 +845,6 @@ static unsigned long _PyLong_AsUnsignedLongMask(PyObject *vv) { PyLongObject *v; - unsigned long x; Py_ssize_t i; if (vv == NULL || !PyLong_Check(vv)) { @@ -818,7 +861,7 @@ _PyLong_AsUnsignedLongMask(PyObject *vv) } i = _PyLong_DigitCount(v); int sign = _PyLong_NonCompactSign(v); - x = 0; + unsigned long x = unroll_digits_ulong(v, &i); while (--i >= 0) { x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; } @@ -1619,7 +1662,6 @@ static unsigned long long _PyLong_AsUnsignedLongLongMask(PyObject *vv) { PyLongObject *v; - unsigned long long x; Py_ssize_t i; int sign; @@ -1637,7 +1679,7 @@ _PyLong_AsUnsignedLongLongMask(PyObject *vv) } i = _PyLong_DigitCount(v); sign = _PyLong_NonCompactSign(v); - x = 0; + unsigned long long x = unroll_digits_ulong(v, &i); while (--i >= 0) { x = (x << PyLong_SHIFT) | v->long_value.ob_digit[i]; } @@ -1683,7 +1725,6 @@ PyLong_AsLongLongAndOverflow(PyObject *vv, int *overflow) { /* This version by Tim Peters */ PyLongObject *v; - unsigned long long x, prev; long long res; Py_ssize_t i; int sign; @@ -1725,15 +1766,14 @@ PyLong_AsLongLongAndOverflow(PyObject *vv, int *overflow) else { i = _PyLong_DigitCount(v); sign = _PyLong_NonCompactSign(v); - x = 0; + unsigned long long x = unroll_digits_ulong(v, &i); while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) + v->long_value.ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) { + if (x > ULLONG_MAX >> PyLong_SHIFT) { *overflow = sign; res = -1; goto exit; } + x = (x << PyLong_SHIFT) + v->long_value.ob_digit[i]; } /* Haven't lost any bits, but casting to long requires extra * care (see comment above). From f519918ec6c125715d4efc9713ba80e83346e466 Mon Sep 17 00:00:00 2001 From: Rogdham <3994389+Rogdham@users.noreply.github.com> Date: Thu, 10 Jul 2025 14:47:27 +0200 Subject: [PATCH 841/989] gh-136394: Fix race condition in test_zstd (GH-136432) --- Lib/test/test_zstd.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index 90b2adc9665480..6358cc78739cd9 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -2673,8 +2673,12 @@ def test_compress_locking(self): input = b'a'* (16*_1K) num_threads = 8 + # gh-136394: the first output of .compress() includes the frame header + # we run the first .compress() call outside of the threaded portion + # to make the test order-independent + comp = ZstdCompressor() - parts = [] + parts = [comp.compress(input, ZstdCompressor.FLUSH_BLOCK)] for _ in range(num_threads): res = comp.compress(input, ZstdCompressor.FLUSH_BLOCK) if res: @@ -2683,7 +2687,7 @@ def test_compress_locking(self): expected = b''.join(parts) + rest1 comp = ZstdCompressor() - output = [] + output = [comp.compress(input, ZstdCompressor.FLUSH_BLOCK)] def run_method(method, input_data, output_data): res = method(input_data, ZstdCompressor.FLUSH_BLOCK) if res: From f1b8d01c802165b9807172965b7097b6aaf6fcd3 Mon Sep 17 00:00:00 2001 From: Dave Peck Date: Thu, 10 Jul 2025 07:27:41 -0700 Subject: [PATCH 842/989] gh-132661: Add default value (of `""`) for `Interpolation.expression` (#136441) --- Lib/test/test_string/_support.py | 44 ++++++++++++------- Lib/test/test_string/test_templatelib.py | 13 ++++++ ...-07-08-23-53-51.gh-issue-132661.B84iYt.rst | 1 + Objects/clinic/interpolationobject.c.h | 23 ++++++---- Objects/interpolationobject.c | 4 +- 5 files changed, 58 insertions(+), 27 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-08-23-53-51.gh-issue-132661.B84iYt.rst diff --git a/Lib/test/test_string/_support.py b/Lib/test/test_string/_support.py index abdddaf187b4fe..e1d7f6f6500aeb 100644 --- a/Lib/test/test_string/_support.py +++ b/Lib/test/test_string/_support.py @@ -2,33 +2,45 @@ class TStringBaseCase: + def assertInterpolationEqual(self, i, exp): + """Test Interpolation equality. + + The *i* argument must be an Interpolation instance. + + The *exp* argument must be a tuple of the form + (value, expression, conversion, format_spec) where the final three + items may be omitted and are assumed to be '', None and '' respectively. + """ + if len(exp) == 4: + actual = (i.value, i.expression, i.conversion, i.format_spec) + self.assertEqual(actual, exp) + elif len(exp) == 3: + self.assertEqual((i.value, i.expression, i.conversion), exp) + self.assertEqual(i.format_spec, "") + elif len(exp) == 2: + self.assertEqual((i.value, i.expression), exp) + self.assertEqual(i.conversion, None) + self.assertEqual(i.format_spec, "") + elif len(exp) == 1: + self.assertEqual((i.value,), exp) + self.assertEqual(i.expression, "") + self.assertEqual(i.conversion, None) + self.assertEqual(i.format_spec, "") + def assertTStringEqual(self, t, strings, interpolations): """Test template string literal equality. The *strings* argument must be a tuple of strings equal to *t.strings*. The *interpolations* argument must be a sequence of tuples which are - compared against *t.interpolations*. Each tuple consists of - (value, expression, conversion, format_spec), though the final two - items may be omitted, and are assumed to be None and '' respectively. + compared against *t.interpolations*. Each tuple must match the form + described in the `assertInterpolationEqual` method. """ self.assertEqual(t.strings, strings) self.assertEqual(len(t.interpolations), len(interpolations)) for i, exp in zip(t.interpolations, interpolations, strict=True): - if len(exp) == 4: - actual = (i.value, i.expression, i.conversion, i.format_spec) - self.assertEqual(actual, exp) - continue - - if len(exp) == 3: - self.assertEqual((i.value, i.expression, i.conversion), exp) - self.assertEqual(i.format_spec, '') - continue - - self.assertEqual((i.value, i.expression), exp) - self.assertEqual(i.format_spec, '') - self.assertIsNone(i.conversion) + self.assertInterpolationEqual(i, exp) def convert(value, conversion): diff --git a/Lib/test/test_string/test_templatelib.py b/Lib/test/test_string/test_templatelib.py index 85fcff486d6616..adaf590e64dad6 100644 --- a/Lib/test/test_string/test_templatelib.py +++ b/Lib/test/test_string/test_templatelib.py @@ -45,6 +45,19 @@ def test_basic_creation(self): self.assertEqual(len(t.interpolations), 0) self.assertEqual(fstring(t), 'Hello,\nworld') + def test_interpolation_creation(self): + i = Interpolation('Maria', 'name', 'a', 'fmt') + self.assertInterpolationEqual(i, ('Maria', 'name', 'a', 'fmt')) + + i = Interpolation('Maria', 'name', 'a') + self.assertInterpolationEqual(i, ('Maria', 'name', 'a')) + + i = Interpolation('Maria', 'name') + self.assertInterpolationEqual(i, ('Maria', 'name')) + + i = Interpolation('Maria') + self.assertInterpolationEqual(i, ('Maria',)) + def test_creation_interleaving(self): # Should add strings on either side t = Template(Interpolation('Maria', 'name', None, '')) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-08-23-53-51.gh-issue-132661.B84iYt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-08-23-53-51.gh-issue-132661.B84iYt.rst new file mode 100644 index 00000000000000..9930413b53c150 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-08-23-53-51.gh-issue-132661.B84iYt.rst @@ -0,0 +1 @@ +``Interpolation.expression`` now has a default, the empty string. diff --git a/Objects/clinic/interpolationobject.c.h b/Objects/clinic/interpolationobject.c.h index 7a94dabafc92f2..2030e17e49e47a 100644 --- a/Objects/clinic/interpolationobject.c.h +++ b/Objects/clinic/interpolationobject.c.h @@ -47,26 +47,31 @@ interpolation_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *argsbuf[4]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); - Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 2; + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; PyObject *value; - PyObject *expression; + PyObject *expression = &_Py_STR(empty); PyObject *conversion = Py_None; PyObject *format_spec = &_Py_STR(empty); fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, - /*minpos*/ 2, /*maxpos*/ 4, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + /*minpos*/ 1, /*maxpos*/ 4, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } value = fastargs[0]; - if (!PyUnicode_Check(fastargs[1])) { - _PyArg_BadArgument("Interpolation", "argument 'expression'", "str", fastargs[1]); - goto exit; - } - expression = fastargs[1]; if (!noptargs) { goto skip_optional_pos; } + if (fastargs[1]) { + if (!PyUnicode_Check(fastargs[1])) { + _PyArg_BadArgument("Interpolation", "argument 'expression'", "str", fastargs[1]); + goto exit; + } + expression = fastargs[1]; + if (!--noptargs) { + goto skip_optional_pos; + } + } if (fastargs[2]) { if (!_conversion_converter(fastargs[2], &conversion)) { goto exit; @@ -86,4 +91,4 @@ interpolation_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=599742a5ccd6f060 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2391391e2d7708c0 input=a9049054013a1b77]*/ diff --git a/Objects/interpolationobject.c b/Objects/interpolationobject.c index a5d407a7b0e296..b58adb693f0cae 100644 --- a/Objects/interpolationobject.c +++ b/Objects/interpolationobject.c @@ -54,7 +54,7 @@ typedef struct { Interpolation.__new__ as interpolation_new value: object - expression: object(subclass_of='&PyUnicode_Type') + expression: object(subclass_of='&PyUnicode_Type', c_default='&_Py_STR(empty)') = "" conversion: object(converter='_conversion_converter') = None format_spec: object(subclass_of='&PyUnicode_Type', c_default='&_Py_STR(empty)') = "" [clinic start generated code]*/ @@ -63,7 +63,7 @@ static PyObject * interpolation_new_impl(PyTypeObject *type, PyObject *value, PyObject *expression, PyObject *conversion, PyObject *format_spec) -/*[clinic end generated code: output=6488e288765bc1a9 input=d91711024068528c]*/ +/*[clinic end generated code: output=6488e288765bc1a9 input=fc5c285c1dd23d36]*/ { interpolationobject *self = PyObject_GC_New(interpolationobject, type); if (!self) { From 4b41b2043b110a38616ff86ddb3f065ae7f15c3e Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 10 Jul 2025 17:31:08 +0200 Subject: [PATCH 843/989] gh-82663: Clarify `codecs.iterdecode/encode` docs (#136497) Closes #82663 --- Doc/library/codecs.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 0e84f18dd4d5d5..37bd913b765d21 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -243,8 +243,8 @@ wider range of codecs when working with binary files: .. function:: iterencode(iterator, encoding, errors='strict', **kwargs) Uses an incremental encoder to iteratively encode the input provided by - *iterator*. This function is a :term:`generator`. - The *errors* argument (as well as any + *iterator*. *iterator* must yield :class:`str` objects. + This function is a :term:`generator`. The *errors* argument (as well as any other keyword argument) is passed through to the incremental encoder. This function requires that the codec accept text :class:`str` objects @@ -255,8 +255,8 @@ wider range of codecs when working with binary files: .. function:: iterdecode(iterator, encoding, errors='strict', **kwargs) Uses an incremental decoder to iteratively decode the input provided by - *iterator*. This function is a :term:`generator`. - The *errors* argument (as well as any + *iterator*. *iterator* must yield :class:`bytes` objects. + This function is a :term:`generator`. The *errors* argument (as well as any other keyword argument) is passed through to the incremental decoder. This function requires that the codec accept :class:`bytes` objects From 35e2c359703e076256c1249b74b87043972e04d6 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 10 Jul 2025 17:42:14 +0200 Subject: [PATCH 844/989] gh-52876: Implement missing parameter in `codecs.StreamReaderWriter` functions (#136498) Closes #52876 --- Lib/codecs.py | 10 +++++----- .../2025-07-10-10-18-19.gh-issue-52876.9Vjrd8.rst | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-10-10-18-19.gh-issue-52876.9Vjrd8.rst diff --git a/Lib/codecs.py b/Lib/codecs.py index fc38e922257644..e4a8010aba90a5 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -618,7 +618,7 @@ def readlines(self, sizehint=None, keepends=True): method and are included in the list entries. sizehint, if given, is ignored since there is no efficient - way to finding the true end-of-line. + way of finding the true end-of-line. """ data = self.read() @@ -709,13 +709,13 @@ def read(self, size=-1): return self.reader.read(size) - def readline(self, size=None): + def readline(self, size=None, keepends=True): - return self.reader.readline(size) + return self.reader.readline(size, keepends) - def readlines(self, sizehint=None): + def readlines(self, sizehint=None, keepends=True): - return self.reader.readlines(sizehint) + return self.reader.readlines(sizehint, keepends) def __next__(self): diff --git a/Misc/NEWS.d/next/Library/2025-07-10-10-18-19.gh-issue-52876.9Vjrd8.rst b/Misc/NEWS.d/next/Library/2025-07-10-10-18-19.gh-issue-52876.9Vjrd8.rst new file mode 100644 index 00000000000000..a835306868d3dc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-10-10-18-19.gh-issue-52876.9Vjrd8.rst @@ -0,0 +1,3 @@ +Add missing ``keepends`` (default ``True``) parameter to +:meth:`!codecs.StreamReaderWriter.readline` and +:meth:`!codecs.StreamReaderWriter.readlines`. From 59acdba820f75081cfb47ad6e71044d022854cbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Kiss=20Koll=C3=A1r?= Date: Thu, 10 Jul 2025 18:44:24 +0100 Subject: [PATCH 845/989] gh-135953: Implement sampling tool under profile.sample (#135998) Implement a statistical sampling profiler that can profile external Python processes by PID. Uses the _remote_debugging module and converts the results to pstats-compatible format for analysis. Co-authored-by: Pablo Galindo --- Doc/library/profile.rst | 306 ++- Doc/whatsnew/3.15.rst | 97 + Lib/profile/__init__.py | 6 + Lib/profile/__main__.py | 69 + Lib/profile/collector.py | 11 + Lib/{ => profile}/profile.py | 63 - Lib/profile/pstats_collector.py | 81 + Lib/profile/sample.py | 730 +++++++ Lib/profile/stack_collector.py | 37 + Lib/pstats.py | 68 +- Lib/test/test_external_inspection.py | 16 + Lib/test/test_sample_profiler.py | 1877 +++++++++++++++++ Makefile.pre.in | 1 + ...-07-06-18-38-10.gh-issue-135953.Z29DCz.rst | 9 + Python/remote_debug.h | 22 +- 15 files changed, 3319 insertions(+), 74 deletions(-) create mode 100644 Lib/profile/__init__.py create mode 100644 Lib/profile/__main__.py create mode 100644 Lib/profile/collector.py rename Lib/{ => profile}/profile.py (90%) create mode 100644 Lib/profile/pstats_collector.py create mode 100644 Lib/profile/sample.py create mode 100644 Lib/profile/stack_collector.py create mode 100644 Lib/test/test_sample_profiler.py create mode 100644 Misc/NEWS.d/next/Library/2025-07-06-18-38-10.gh-issue-135953.Z29DCz.rst diff --git a/Doc/library/profile.rst b/Doc/library/profile.rst index b6e51dffc40157..c5b88c22325404 100644 --- a/Doc/library/profile.rst +++ b/Doc/library/profile.rst @@ -4,7 +4,7 @@ The Python Profilers ******************** -**Source code:** :source:`Lib/profile.py` and :source:`Lib/pstats.py` +**Source code:** :source:`Lib/profile.py`, :source:`Lib/pstats.py`, and :source:`Lib/profile/sample.py` -------------- @@ -14,23 +14,32 @@ Introduction to the profilers ============================= .. index:: + single: statistical profiling + single: profiling, statistical single: deterministic profiling single: profiling, deterministic -:mod:`cProfile` and :mod:`profile` provide :dfn:`deterministic profiling` of +Python provides both :dfn:`statistical profiling` and :dfn:`deterministic profiling` of Python programs. A :dfn:`profile` is a set of statistics that describes how often and for how long various parts of the program executed. These statistics can be formatted into reports via the :mod:`pstats` module. -The Python standard library provides two different implementations of the same -profiling interface: +The Python standard library provides three different profiling implementations: -1. :mod:`cProfile` is recommended for most users; it's a C extension with +**Statistical Profiler:** + +1. :mod:`profile.sample` provides statistical profiling of running Python processes + using periodic stack sampling. It can attach to any running Python process without + requiring code modification or restart, making it ideal for production debugging. + +**Deterministic Profilers:** + +2. :mod:`cProfile` is recommended for development and testing; it's a C extension with reasonable overhead that makes it suitable for profiling long-running programs. Based on :mod:`lsprof`, contributed by Brett Rosen and Ted Czotter. -2. :mod:`profile`, a pure Python module whose interface is imitated by +3. :mod:`profile`, a pure Python module whose interface is imitated by :mod:`cProfile`, but which adds significant overhead to profiled programs. If you're trying to extend the profiler in some way, the task might be easier with this module. Originally designed and written by Jim Roskind. @@ -44,6 +53,77 @@ profiling interface: but not for C-level functions, and so the C code would seem faster than any Python one. +**Profiler Comparison:** + ++-------------------+----------------------+----------------------+----------------------+ +| Feature | Statistical | Deterministic | Deterministic | +| | (``profile.sample``) | (``cProfile``) | (``profile``) | ++===================+======================+======================+======================+ +| **Target** | Running process | Code you run | Code you run | ++-------------------+----------------------+----------------------+----------------------+ +| **Overhead** | Virtually none | Moderate | High | ++-------------------+----------------------+----------------------+----------------------+ +| **Accuracy** | Statistical approx. | Exact call counts | Exact call counts | ++-------------------+----------------------+----------------------+----------------------+ +| **Setup** | Attach to any PID | Instrument code | Instrument code | ++-------------------+----------------------+----------------------+----------------------+ +| **Use Case** | Production debugging | Development/testing | Profiler extension | ++-------------------+----------------------+----------------------+----------------------+ +| **Implementation**| C extension | C extension | Pure Python | ++-------------------+----------------------+----------------------+----------------------+ + +.. note:: + + The statistical profiler (:mod:`profile.sample`) is recommended for most production + use cases due to its extremely low overhead and ability to profile running processes + without modification. It can attach to any Python process and collect performance + data with minimal impact on execution speed, making it ideal for debugging + performance issues in live applications. + + +.. _statistical-profiling: + +What Is Statistical Profiling? +============================== + +:dfn:`Statistical profiling` works by periodically interrupting a running +program to capture its current call stack. Rather than monitoring every +function entry and exit like deterministic profilers, it takes snapshots at +regular intervals to build a statistical picture of where the program spends +its time. + +The sampling profiler uses process memory reading (via system calls like +``process_vm_readv`` on Linux, ``vm_read`` on macOS, and ``ReadProcessMemory`` on +Windows) to attach to a running Python process and extract stack trace +information without requiring any code modification or restart of the target +process. This approach provides several key advantages over traditional +profiling methods. + +The fundamental principle is that if a function appears frequently in the +collected stack samples, it is likely consuming significant CPU time. By +analyzing thousands of samples, the profiler can accurately estimate the +relative time spent in different parts of the program. The statistical nature +means that while individual measurements may vary, the aggregate results +converge to represent the true performance characteristics of the application. + +Since statistical profiling operates externally to the target process, it +introduces virtually no overhead to the running program. The profiler process +runs separately and reads the target process memory without interrupting its +execution. This makes it suitable for profiling production systems where +performance impact must be minimized. + +The accuracy of statistical profiling improves with the number of samples +collected. Short-lived functions may be missed or underrepresented, while +long-running functions will be captured proportionally to their execution time. +This characteristic makes statistical profiling particularly effective for +identifying the most significant performance bottlenecks rather than providing +exhaustive coverage of all function calls. + +Statistical profiling excels at answering questions like "which functions +consume the most CPU time?" and "where should I focus optimization efforts?" +rather than "exactly how many times was this function called?" The trade-off +between precision and practicality makes it an invaluable tool for performance +analysis in real-world applications. .. _profile-instant: @@ -54,6 +134,18 @@ This section is provided for users that "don't want to read the manual." It provides a very brief overview, and allows a user to rapidly perform profiling on an existing application. +**Statistical Profiling (Recommended for Production):** + +To profile an existing running process:: + + python -m profile.sample 1234 + +To profile with custom settings:: + + python -m profile.sample -i 50 -d 30 1234 + +**Deterministic Profiling (Development/Testing):** + To profile a function that takes a single argument, you can do:: import cProfile @@ -121,8 +213,208 @@ results to a file by specifying a filename to the :func:`run` function:: The :class:`pstats.Stats` class reads profile results from a file and formats them in various ways. +.. _sampling-profiler-cli: + +Statistical Profiler Command Line Interface +=========================================== + +.. program:: profile.sample + +The :mod:`profile.sample` module can be invoked as a script to profile running processes:: + + python -m profile.sample [options] PID + +**Basic Usage Examples:** + +Profile process 1234 for 10 seconds with default settings:: + + python -m profile.sample 1234 + +Profile with custom interval and duration, save to file:: + + python -m profile.sample -i 50 -d 30 -o profile.stats 1234 + +Generate collapsed stacks to use with tools like `flamegraph.pl +`_:: + + python -m profile.sample --collapsed 1234 + +Profile all threads, sort by total time:: + + python -m profile.sample -a --sort-tottime 1234 + +Profile with real-time sampling statistics:: + + python -m profile.sample --realtime-stats 1234 + +**Command Line Options:** + +.. option:: PID + + Process ID of the Python process to profile (required) + +.. option:: -i, --interval INTERVAL + + Sampling interval in microseconds (default: 100) + +.. option:: -d, --duration DURATION + + Sampling duration in seconds (default: 10) + +.. option:: -a, --all-threads + + Sample all threads in the process instead of just the main thread + +.. option:: --realtime-stats + + Print real-time sampling statistics during profiling + +.. option:: --pstats + + Generate pstats output (default) + +.. option:: --collapsed + + Generate collapsed stack traces for flamegraphs + +.. option:: -o, --outfile OUTFILE + + Save output to a file + +**Sorting Options (pstats format only):** + +.. option:: --sort-nsamples + + Sort by number of direct samples + +.. option:: --sort-tottime + + Sort by total time + +.. option:: --sort-cumtime + + Sort by cumulative time (default) + +.. option:: --sort-sample-pct + + Sort by sample percentage + +.. option:: --sort-cumul-pct + + Sort by cumulative sample percentage + +.. option:: --sort-nsamples-cumul + + Sort by cumulative samples + +.. option:: --sort-name + + Sort by function name + +.. option:: -l, --limit LIMIT + + Limit the number of rows in the output (default: 15) + +.. option:: --no-summary + + Disable the summary section in the output + +**Understanding Statistical Profile Output:** + +The statistical profiler produces output similar to deterministic profilers but with different column meanings:: + + Profile Stats: + nsamples sample% tottime (ms) cumul% cumtime (ms) filename:lineno(function) + 45/67 12.5 23.450 18.6 56.780 mymodule.py:42(process_data) + 23/23 6.4 15.230 6.4 15.230 :0(len) + +**Column Meanings:** + +- **nsamples**: ``direct/cumulative`` - Times function was directly executing / on call stack +- **sample%**: Percentage of total samples where function was directly executing +- **tottime**: Estimated time spent directly in this function +- **cumul%**: Percentage of samples where function was anywhere on call stack +- **cumtime**: Estimated cumulative time including called functions +- **filename:lineno(function)**: Location and name of the function + .. _profile-cli: +:mod:`profile.sample` Module Reference +======================================================= + +.. module:: profile.sample + :synopsis: Python statistical profiler. + +This section documents the programmatic interface for the :mod:`profile.sample` module. +For command-line usage, see :ref:`sampling-profiler-cli`. For conceptual information +about statistical profiling, see :ref:`statistical-profiling` + +.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False) + + Sample a Python process and generate profiling data. + + This is the main entry point for statistical profiling. It creates a + :class:`SampleProfiler`, collects stack traces from the target process, and + outputs the results in the specified format. + + :param int pid: Process ID of the target Python process + :param int sort: Sort order for pstats output (default: 2 for cumulative time) + :param int sample_interval_usec: Sampling interval in microseconds (default: 100) + :param int duration_sec: Duration to sample in seconds (default: 10) + :param str filename: Output filename (None for stdout/default naming) + :param bool all_threads: Whether to sample all threads (default: False) + :param int limit: Maximum number of functions to display (default: None) + :param bool show_summary: Whether to show summary statistics (default: True) + :param str output_format: Output format - 'pstats' or 'collapsed' (default: 'pstats') + :param bool realtime_stats: Whether to display real-time statistics (default: False) + + :raises ValueError: If output_format is not 'pstats' or 'collapsed' + + Examples:: + + # Basic usage - profile process 1234 for 10 seconds + import profile.sample + profile.sample.sample(1234) + + # Profile with custom settings + profile.sample.sample(1234, duration_sec=30, sample_interval_usec=50, all_threads=True) + + # Generate collapsed stack traces for flamegraph.pl + profile.sample.sample(1234, output_format='collapsed', filename='profile.collapsed') + +.. class:: SampleProfiler(pid, sample_interval_usec, all_threads) + + Low-level API for the statistical profiler. + + This profiler uses periodic stack sampling to collect performance data + from running Python processes with minimal overhead. It can attach to + any Python process by PID and collect stack traces at regular intervals. + + :param int pid: Process ID of the target Python process + :param int sample_interval_usec: Sampling interval in microseconds + :param bool all_threads: Whether to sample all threads or just the main thread + + .. method:: sample(collector, duration_sec=10) + + Sample the target process for the specified duration. + + Collects stack traces from the target process at regular intervals + and passes them to the provided collector for processing. + + :param collector: Object that implements ``collect()`` method to process stack traces + :param int duration_sec: Duration to sample in seconds (default: 10) + + The method tracks sampling statistics and can display real-time + information if realtime_stats is enabled. + +.. seealso:: + + :ref:`sampling-profiler-cli` + Command-line interface documentation for the statistical profiler. + +Deterministic Profiler Command Line Interface +============================================= + .. program:: cProfile The files :mod:`cProfile` and :mod:`profile` can also be invoked as a script to @@ -564,7 +856,7 @@ What Is Deterministic Profiling? call*, *function return*, and *exception* events are monitored, and precise timings are made for the intervals between these events (during which time the user's code is executing). In contrast, :dfn:`statistical profiling` (which is -not done by this module) randomly samples the effective instruction pointer, and +provided by the :mod:`profile.sample` module) periodically samples the effective instruction pointer, and deduces where time is being spent. The latter technique traditionally involves less overhead (as the code does not need to be instrumented), but provides only relative indications of where time is being spent. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 706a816f888b30..e9b88458acdd79 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -70,6 +70,103 @@ Summary --- release highlights New features ============ +.. _whatsnew315-sampling-profiler: + +High frequency statistical sampling profiler +-------------------------------------------- + +A new statistical sampling profiler has been added to the :mod:`profile` module as +:mod:`profile.sample`. This profiler enables low-overhead performance analysis of +running Python processes without requiring code modification or process restart. + +Unlike deterministic profilers (:mod:`cProfile` and :mod:`profile`) that instrument +every function call, the sampling profiler periodically captures stack traces from +running processes. This approach provides virtually zero overhead while achieving +sampling rates of **up to 200,000 Hz**, making it the fastest sampling profiler +available for Python (at the time of its contribution) and ideal for debugging +performance issues in production environments. + +Key features include: + +* **Zero-overhead profiling**: Attach to any running Python process without + affecting its performance +* **No code modification required**: Profile existing applications without restart +* **Real-time statistics**: Monitor sampling quality during data collection +* **Multiple output formats**: Generate both detailed statistics and flamegraph data +* **Thread-aware profiling**: Option to profile all threads or just the main thread + +Profile process 1234 for 10 seconds with default settings:: + + python -m profile.sample 1234 + +Profile with custom interval and duration, save to file:: + + python -m profile.sample -i 50 -d 30 -o profile.stats 1234 + +Generate collapsed stacks for flamegraph:: + + python -m profile.sample --collapsed 1234 + +Profile all threads and sort by total time:: + + python -m profile.sample -a --sort-tottime 1234 + +The profiler generates statistical estimates of where time is spent:: + + Real-time sampling stats: Mean: 100261.5Hz (9.97µs) Min: 86333.4Hz (11.58µs) Max: 118807.2Hz (8.42µs) Samples: 400001 + Captured 498841 samples in 5.00 seconds + Sample rate: 99768.04 samples/sec + Error rate: 0.72% + Profile Stats: + nsamples sample% tottime (s) cumul% cumtime (s) filename:lineno(function) + 43/418858 0.0 0.000 87.9 4.189 case.py:667(TestCase.run) + 3293/418812 0.7 0.033 87.9 4.188 case.py:613(TestCase._callTestMethod) + 158562/158562 33.3 1.586 33.3 1.586 test_compile.py:725(TestSpecifics.test_compiler_recursion_limit..check_limit) + 129553/129553 27.2 1.296 27.2 1.296 ast.py:46(parse) + 0/128129 0.0 0.000 26.9 1.281 test_ast.py:884(AST_Tests.test_ast_recursion_limit..check_limit) + 7/67446 0.0 0.000 14.2 0.674 test_compile.py:729(TestSpecifics.test_compiler_recursion_limit) + 6/60380 0.0 0.000 12.7 0.604 test_ast.py:888(AST_Tests.test_ast_recursion_limit) + 3/50020 0.0 0.000 10.5 0.500 test_compile.py:727(TestSpecifics.test_compiler_recursion_limit) + 1/38011 0.0 0.000 8.0 0.380 test_ast.py:886(AST_Tests.test_ast_recursion_limit) + 1/25076 0.0 0.000 5.3 0.251 test_compile.py:728(TestSpecifics.test_compiler_recursion_limit) + 22361/22362 4.7 0.224 4.7 0.224 test_compile.py:1368(TestSpecifics.test_big_dict_literal) + 4/18008 0.0 0.000 3.8 0.180 test_ast.py:889(AST_Tests.test_ast_recursion_limit) + 11/17696 0.0 0.000 3.7 0.177 subprocess.py:1038(Popen.__init__) + 16968/16968 3.6 0.170 3.6 0.170 subprocess.py:1900(Popen._execute_child) + 2/16941 0.0 0.000 3.6 0.169 test_compile.py:730(TestSpecifics.test_compiler_recursion_limit) + + Legend: + nsamples: Direct/Cumulative samples (direct executing / on call stack) + sample%: Percentage of total samples this function was directly executing + tottime: Estimated total time spent directly in this function + cumul%: Percentage of total samples when this function was on the call stack + cumtime: Estimated cumulative time (including time in called functions) + filename:lineno(function): Function location and name + + Summary of Interesting Functions: + + Functions with Highest Direct/Cumulative Ratio (Hot Spots): + 1.000 direct/cumulative ratio, 33.3% direct samples: test_compile.py:(TestSpecifics.test_compiler_recursion_limit..check_limit) + 1.000 direct/cumulative ratio, 27.2% direct samples: ast.py:(parse) + 1.000 direct/cumulative ratio, 3.6% direct samples: subprocess.py:(Popen._execute_child) + + Functions with Highest Call Frequency (Indirect Calls): + 418815 indirect calls, 87.9% total stack presence: case.py:(TestCase.run) + 415519 indirect calls, 87.9% total stack presence: case.py:(TestCase._callTestMethod) + 159470 indirect calls, 33.5% total stack presence: test_compile.py:(TestSpecifics.test_compiler_recursion_limit) + + Functions with Highest Call Magnification (Cumulative/Direct): + 12267.9x call magnification, 159470 indirect calls from 13 direct: test_compile.py:(TestSpecifics.test_compiler_recursion_limit) + 10581.7x call magnification, 116388 indirect calls from 11 direct: test_ast.py:(AST_Tests.test_ast_recursion_limit) + 9740.9x call magnification, 418815 indirect calls from 43 direct: case.py:(TestCase.run) + +The profiler automatically identifies performance bottlenecks through statistical +analysis, highlighting functions with high CPU usage and call frequency patterns. + +This capability is particularly valuable for debugging performance issues in +production systems where traditional profiling approaches would be too intrusive. + +(Contributed by Pablo Galindo and László Kiss Kollár in :gh:`135953`.) Other language changes diff --git a/Lib/profile/__init__.py b/Lib/profile/__init__.py new file mode 100644 index 00000000000000..21c886448ac2af --- /dev/null +++ b/Lib/profile/__init__.py @@ -0,0 +1,6 @@ +from .profile import run +from .profile import runctx +from .profile import Profile +from .profile import _Utils + +__all__ = ['run', 'runctx', 'Profile'] diff --git a/Lib/profile/__main__.py b/Lib/profile/__main__.py new file mode 100644 index 00000000000000..d900e547330e76 --- /dev/null +++ b/Lib/profile/__main__.py @@ -0,0 +1,69 @@ +import io +import importlib.machinery +import os +import sys +from optparse import OptionParser + +from .profile import runctx + + +def main(): + + usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..." + parser = OptionParser(usage=usage) + parser.allow_interspersed_args = False + parser.add_option('-o', '--outfile', dest="outfile", + help="Save stats to ", default=None) + parser.add_option('-m', dest="module", action="store_true", + help="Profile a library module.", default=False) + parser.add_option('-s', '--sort', dest="sort", + help="Sort order when printing to stdout, based on pstats.Stats class", + default=-1) + + if not sys.argv[1:]: + parser.print_usage() + sys.exit(2) + + (options, args) = parser.parse_args() + sys.argv[:] = args + + # The script that we're profiling may chdir, so capture the absolute path + # to the output file at startup. + if options.outfile is not None: + options.outfile = os.path.abspath(options.outfile) + + if len(args) > 0: + if options.module: + import runpy + code = "run_module(modname, run_name='__main__')" + globs = { + 'run_module': runpy.run_module, + 'modname': args[0] + } + else: + progname = args[0] + sys.path.insert(0, os.path.dirname(progname)) + with io.open_code(progname) as fp: + code = compile(fp.read(), progname, 'exec') + spec = importlib.machinery.ModuleSpec(name='__main__', loader=None, + origin=progname) + globs = { + '__spec__': spec, + '__file__': spec.origin, + '__name__': spec.name, + '__package__': None, + '__cached__': None, + } + try: + runctx(code, globs, None, options.outfile, options.sort) + except BrokenPipeError as exc: + # Prevent "Exception ignored" during interpreter shutdown. + sys.stdout = None + sys.exit(exc.errno) + else: + parser.print_usage() + return parser + +# When invoked as main program, invoke the profiler on a script +if __name__ == '__main__': + main() diff --git a/Lib/profile/collector.py b/Lib/profile/collector.py new file mode 100644 index 00000000000000..28286120aefc67 --- /dev/null +++ b/Lib/profile/collector.py @@ -0,0 +1,11 @@ +from abc import ABC, abstractmethod + + +class Collector(ABC): + @abstractmethod + def collect(self, stack_frames): + """Collect profiling data from stack frames.""" + + @abstractmethod + def export(self, filename): + """Export collected data to a file.""" diff --git a/Lib/profile.py b/Lib/profile/profile.py similarity index 90% rename from Lib/profile.py rename to Lib/profile/profile.py index a5afb12c9d121a..1d9e2fd41f85b4 100644 --- a/Lib/profile.py +++ b/Lib/profile/profile.py @@ -550,66 +550,3 @@ def f(m, f1=f1): return mean #**************************************************************************** - -def main(): - import os - from optparse import OptionParser - - usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..." - parser = OptionParser(usage=usage) - parser.allow_interspersed_args = False - parser.add_option('-o', '--outfile', dest="outfile", - help="Save stats to ", default=None) - parser.add_option('-m', dest="module", action="store_true", - help="Profile a library module.", default=False) - parser.add_option('-s', '--sort', dest="sort", - help="Sort order when printing to stdout, based on pstats.Stats class", - default=-1) - - if not sys.argv[1:]: - parser.print_usage() - sys.exit(2) - - (options, args) = parser.parse_args() - sys.argv[:] = args - - # The script that we're profiling may chdir, so capture the absolute path - # to the output file at startup. - if options.outfile is not None: - options.outfile = os.path.abspath(options.outfile) - - if len(args) > 0: - if options.module: - import runpy - code = "run_module(modname, run_name='__main__')" - globs = { - 'run_module': runpy.run_module, - 'modname': args[0] - } - else: - progname = args[0] - sys.path.insert(0, os.path.dirname(progname)) - with io.open_code(progname) as fp: - code = compile(fp.read(), progname, 'exec') - spec = importlib.machinery.ModuleSpec(name='__main__', loader=None, - origin=progname) - globs = { - '__spec__': spec, - '__file__': spec.origin, - '__name__': spec.name, - '__package__': None, - '__cached__': None, - } - try: - runctx(code, globs, None, options.outfile, options.sort) - except BrokenPipeError as exc: - # Prevent "Exception ignored" during interpreter shutdown. - sys.stdout = None - sys.exit(exc.errno) - else: - parser.print_usage() - return parser - -# When invoked as main program, invoke the profiler on a script -if __name__ == '__main__': - main() diff --git a/Lib/profile/pstats_collector.py b/Lib/profile/pstats_collector.py new file mode 100644 index 00000000000000..f58da1159c21a7 --- /dev/null +++ b/Lib/profile/pstats_collector.py @@ -0,0 +1,81 @@ +import collections +import marshal + +from .collector import Collector + + +class PstatsCollector(Collector): + def __init__(self, sample_interval_usec): + self.result = collections.defaultdict( + lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0) + ) + self.stats = {} + self.sample_interval_usec = sample_interval_usec + self.callers = collections.defaultdict( + lambda: collections.defaultdict(int) + ) + + def collect(self, stack_frames): + for thread_id, frames in stack_frames: + if not frames: + continue + + # Process each frame in the stack to track cumulative calls + for frame in frames: + location = (frame.filename, frame.lineno, frame.funcname) + self.result[location]["cumulative_calls"] += 1 + + # The top frame gets counted as an inline call (directly executing) + top_frame = frames[0] + top_location = ( + top_frame.filename, + top_frame.lineno, + top_frame.funcname, + ) + + self.result[top_location]["direct_calls"] += 1 + + # Track caller-callee relationships for call graph + for i in range(1, len(frames)): + callee_frame = frames[i - 1] + caller_frame = frames[i] + + callee = ( + callee_frame.filename, + callee_frame.lineno, + callee_frame.funcname, + ) + caller = ( + caller_frame.filename, + caller_frame.lineno, + caller_frame.funcname, + ) + + self.callers[callee][caller] += 1 + + def export(self, filename): + self.create_stats() + self._dump_stats(filename) + + def _dump_stats(self, file): + stats_with_marker = dict(self.stats) + stats_with_marker[("__sampled__",)] = True + with open(file, "wb") as f: + marshal.dump(stats_with_marker, f) + + # Needed for compatibility with pstats.Stats + def create_stats(self): + sample_interval_sec = self.sample_interval_usec / 1_000_000 + callers = {} + for fname, call_counts in self.result.items(): + total = call_counts["direct_calls"] * sample_interval_sec + cumulative_calls = call_counts["cumulative_calls"] + cumulative = cumulative_calls * sample_interval_sec + callers = dict(self.callers.get(fname, {})) + self.stats[fname] = ( + call_counts["direct_calls"], # cc = direct calls for sample percentage + cumulative_calls, # nc = cumulative calls for cumulative percentage + total, + cumulative, + callers, + ) diff --git a/Lib/profile/sample.py b/Lib/profile/sample.py new file mode 100644 index 00000000000000..97d23611e67ad7 --- /dev/null +++ b/Lib/profile/sample.py @@ -0,0 +1,730 @@ +import argparse +import _remote_debugging +import os +import pstats +import statistics +import sys +import sysconfig +import time +from collections import deque +from _colorize import ANSIColors + +from .pstats_collector import PstatsCollector +from .stack_collector import CollapsedStackCollector + +FREE_THREADED_BUILD = sysconfig.get_config_var("Py_GIL_DISABLED") is not None + +class SampleProfiler: + def __init__(self, pid, sample_interval_usec, all_threads): + self.pid = pid + self.sample_interval_usec = sample_interval_usec + self.all_threads = all_threads + if FREE_THREADED_BUILD: + self.unwinder = _remote_debugging.RemoteUnwinder( + self.pid, all_threads=self.all_threads + ) + else: + only_active_threads = bool(self.all_threads) + self.unwinder = _remote_debugging.RemoteUnwinder( + self.pid, only_active_thread=only_active_threads + ) + # Track sample intervals and total sample count + self.sample_intervals = deque(maxlen=100) + self.total_samples = 0 + self.realtime_stats = False + + def sample(self, collector, duration_sec=10): + sample_interval_sec = self.sample_interval_usec / 1_000_000 + running_time = 0 + num_samples = 0 + errors = 0 + start_time = next_time = time.perf_counter() + last_sample_time = start_time + realtime_update_interval = 1.0 # Update every second + last_realtime_update = start_time + + while running_time < duration_sec: + current_time = time.perf_counter() + if next_time < current_time: + try: + stack_frames = self.unwinder.get_stack_trace() + collector.collect(stack_frames) + except ProcessLookupError: + break + except (RuntimeError, UnicodeDecodeError, MemoryError, OSError): + errors += 1 + except Exception as e: + if not self._is_process_running(): + break + raise e from None + + # Track actual sampling intervals for real-time stats + if num_samples > 0: + actual_interval = current_time - last_sample_time + self.sample_intervals.append( + 1.0 / actual_interval + ) # Convert to Hz + self.total_samples += 1 + + # Print real-time statistics if enabled + if ( + self.realtime_stats + and (current_time - last_realtime_update) + >= realtime_update_interval + ): + self._print_realtime_stats() + last_realtime_update = current_time + + last_sample_time = current_time + num_samples += 1 + next_time += sample_interval_sec + + running_time = time.perf_counter() - start_time + + # Clear real-time stats line if it was being displayed + if self.realtime_stats and len(self.sample_intervals) > 0: + print() # Add newline after real-time stats + + print(f"Captured {num_samples} samples in {running_time:.2f} seconds") + print(f"Sample rate: {num_samples / running_time:.2f} samples/sec") + print(f"Error rate: {(errors / num_samples) * 100:.2f}%") + + expected_samples = int(duration_sec / sample_interval_sec) + if num_samples < expected_samples: + print( + f"Warning: missed {expected_samples - num_samples} samples " + f"from the expected total of {expected_samples} " + f"({(expected_samples - num_samples) / expected_samples * 100:.2f}%)" + ) + + def _is_process_running(self): + if sys.platform == "linux" or sys.platform == "darwin": + try: + os.kill(self.pid, 0) + return True + except ProcessLookupError: + return False + elif sys.platform == "win32": + try: + _remote_debugging.RemoteUnwinder(self.pid) + except Exception: + return False + return True + else: + raise ValueError(f"Unsupported platform: {sys.platform}") + + def _print_realtime_stats(self): + """Print real-time sampling statistics.""" + if len(self.sample_intervals) < 2: + return + + # Calculate statistics on the Hz values (deque automatically maintains rolling window) + hz_values = list(self.sample_intervals) + mean_hz = statistics.mean(hz_values) + min_hz = min(hz_values) + max_hz = max(hz_values) + + # Calculate microseconds per sample for all metrics (1/Hz * 1,000,000) + mean_us_per_sample = (1.0 / mean_hz) * 1_000_000 if mean_hz > 0 else 0 + min_us_per_sample = ( + (1.0 / max_hz) * 1_000_000 if max_hz > 0 else 0 + ) # Min time = Max Hz + max_us_per_sample = ( + (1.0 / min_hz) * 1_000_000 if min_hz > 0 else 0 + ) # Max time = Min Hz + + # Clear line and print stats + print( + f"\r\033[K{ANSIColors.BOLD_BLUE}Real-time sampling stats:{ANSIColors.RESET} " + f"{ANSIColors.YELLOW}Mean: {mean_hz:.1f}Hz ({mean_us_per_sample:.2f}µs){ANSIColors.RESET} " + f"{ANSIColors.GREEN}Min: {min_hz:.1f}Hz ({max_us_per_sample:.2f}µs){ANSIColors.RESET} " + f"{ANSIColors.RED}Max: {max_hz:.1f}Hz ({min_us_per_sample:.2f}µs){ANSIColors.RESET} " + f"{ANSIColors.CYAN}Samples: {self.total_samples}{ANSIColors.RESET}", + end="", + flush=True, + ) + + +def _determine_best_unit(max_value): + """Determine the best unit (s, ms, μs) and scale factor for a maximum value.""" + if max_value >= 1.0: + return "s", 1.0 + elif max_value >= 0.001: + return "ms", 1000.0 + else: + return "μs", 1000000.0 + + +def print_sampled_stats( + stats, sort=-1, limit=None, show_summary=True, sample_interval_usec=100 +): + # Get the stats data + stats_list = [] + for func, ( + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) in stats.stats.items(): + stats_list.append( + ( + func, + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) + ) + + # Calculate total samples for percentage calculations (using direct_calls) + total_samples = sum( + direct_calls for _, direct_calls, _, _, _, _ in stats_list + ) + + # Sort based on the requested field + sort_field = sort + if sort_field == -1: # stdname + stats_list.sort(key=lambda x: str(x[0])) + elif sort_field == 0: # nsamples (direct samples) + stats_list.sort(key=lambda x: x[1], reverse=True) # direct_calls + elif sort_field == 1: # tottime + stats_list.sort(key=lambda x: x[3], reverse=True) # total_time + elif sort_field == 2: # cumtime + stats_list.sort(key=lambda x: x[4], reverse=True) # cumulative_time + elif sort_field == 3: # sample% + stats_list.sort( + key=lambda x: (x[1] / total_samples * 100) + if total_samples > 0 + else 0, + reverse=True, # direct_calls percentage + ) + elif sort_field == 4: # cumul% + stats_list.sort( + key=lambda x: (x[2] / total_samples * 100) + if total_samples > 0 + else 0, + reverse=True, # cumulative_calls percentage + ) + elif sort_field == 5: # nsamples (cumulative samples) + stats_list.sort(key=lambda x: x[2], reverse=True) # cumulative_calls + + # Apply limit if specified + if limit is not None: + stats_list = stats_list[:limit] + + # Determine the best unit for time columns based on maximum values + max_total_time = max( + (total_time for _, _, _, total_time, _, _ in stats_list), default=0 + ) + max_cumulative_time = max( + (cumulative_time for _, _, _, _, cumulative_time, _ in stats_list), + default=0, + ) + + total_time_unit, total_time_scale = _determine_best_unit(max_total_time) + cumulative_time_unit, cumulative_time_scale = _determine_best_unit( + max_cumulative_time + ) + + # Define column widths for consistent alignment + col_widths = { + "nsamples": 15, # "nsamples" column (inline/cumulative format) + "sample_pct": 8, # "sample%" column + "tottime": max(12, len(f"tottime ({total_time_unit})")), + "cum_pct": 8, # "cumul%" column + "cumtime": max(12, len(f"cumtime ({cumulative_time_unit})")), + } + + # Print header with colors and proper alignment + print(f"{ANSIColors.BOLD_BLUE}Profile Stats:{ANSIColors.RESET}") + + header_nsamples = f"{ANSIColors.BOLD_BLUE}{'nsamples':>{col_widths['nsamples']}}{ANSIColors.RESET}" + header_sample_pct = f"{ANSIColors.BOLD_BLUE}{'sample%':>{col_widths['sample_pct']}}{ANSIColors.RESET}" + header_tottime = f"{ANSIColors.BOLD_BLUE}{f'tottime ({total_time_unit})':>{col_widths['tottime']}}{ANSIColors.RESET}" + header_cum_pct = f"{ANSIColors.BOLD_BLUE}{'cumul%':>{col_widths['cum_pct']}}{ANSIColors.RESET}" + header_cumtime = f"{ANSIColors.BOLD_BLUE}{f'cumtime ({cumulative_time_unit})':>{col_widths['cumtime']}}{ANSIColors.RESET}" + header_filename = ( + f"{ANSIColors.BOLD_BLUE}filename:lineno(function){ANSIColors.RESET}" + ) + + print( + f"{header_nsamples} {header_sample_pct} {header_tottime} {header_cum_pct} {header_cumtime} {header_filename}" + ) + + # Print each line with proper alignment + for ( + func, + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) in stats_list: + # Calculate percentages + sample_pct = ( + (direct_calls / total_samples * 100) if total_samples > 0 else 0 + ) + cum_pct = ( + (cumulative_calls / total_samples * 100) + if total_samples > 0 + else 0 + ) + + # Format values with proper alignment - always use A/B format + nsamples_str = f"{direct_calls}/{cumulative_calls}" + nsamples_str = f"{nsamples_str:>{col_widths['nsamples']}}" + sample_pct_str = f"{sample_pct:{col_widths['sample_pct']}.1f}" + tottime = f"{total_time * total_time_scale:{col_widths['tottime']}.3f}" + cum_pct_str = f"{cum_pct:{col_widths['cum_pct']}.1f}" + cumtime = f"{cumulative_time * cumulative_time_scale:{col_widths['cumtime']}.3f}" + + # Format the function name with colors + func_name = ( + f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" + f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" + f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" + ) + + # Print the formatted line with consistent spacing + print( + f"{nsamples_str} {sample_pct_str} {tottime} {cum_pct_str} {cumtime} {func_name}" + ) + + # Print legend + print(f"\n{ANSIColors.BOLD_BLUE}Legend:{ANSIColors.RESET}") + print( + f" {ANSIColors.YELLOW}nsamples{ANSIColors.RESET}: Direct/Cumulative samples (direct executing / on call stack)" + ) + print( + f" {ANSIColors.YELLOW}sample%{ANSIColors.RESET}: Percentage of total samples this function was directly executing" + ) + print( + f" {ANSIColors.YELLOW}tottime{ANSIColors.RESET}: Estimated total time spent directly in this function" + ) + print( + f" {ANSIColors.YELLOW}cumul%{ANSIColors.RESET}: Percentage of total samples when this function was on the call stack" + ) + print( + f" {ANSIColors.YELLOW}cumtime{ANSIColors.RESET}: Estimated cumulative time (including time in called functions)" + ) + print( + f" {ANSIColors.YELLOW}filename:lineno(function){ANSIColors.RESET}: Function location and name" + ) + + def _format_func_name(func): + """Format function name with colors.""" + return ( + f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" + f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" + f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" + ) + + def _print_top_functions(stats_list, title, key_func, format_line, n=3): + """Print top N functions sorted by key_func with formatted output.""" + print(f"\n{ANSIColors.BOLD_BLUE}{title}:{ANSIColors.RESET}") + sorted_stats = sorted(stats_list, key=key_func, reverse=True) + for stat in sorted_stats[:n]: + if line := format_line(stat): + print(f" {line}") + + # Print summary of interesting functions if enabled + if show_summary and stats_list: + print( + f"\n{ANSIColors.BOLD_BLUE}Summary of Interesting Functions:{ANSIColors.RESET}" + ) + + # Aggregate stats by fully qualified function name (ignoring line numbers) + func_aggregated = {} + for ( + func, + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) in stats_list: + # Use filename:function_name as the key to get fully qualified name + qualified_name = f"{func[0]}:{func[2]}" + if qualified_name not in func_aggregated: + func_aggregated[qualified_name] = [ + 0, + 0, + 0, + 0, + ] # direct_calls, cumulative_calls, total_time, cumulative_time + func_aggregated[qualified_name][0] += direct_calls + func_aggregated[qualified_name][1] += cumulative_calls + func_aggregated[qualified_name][2] += total_time + func_aggregated[qualified_name][3] += cumulative_time + + # Convert aggregated data back to list format for processing + aggregated_stats = [] + for qualified_name, ( + prim_calls, + total_calls, + total_time, + cumulative_time, + ) in func_aggregated.items(): + # Parse the qualified name back to filename and function name + if ":" in qualified_name: + filename, func_name = qualified_name.rsplit(":", 1) + else: + filename, func_name = "", qualified_name + # Create a dummy func tuple with filename and function name for display + dummy_func = (filename, "", func_name) + aggregated_stats.append( + ( + dummy_func, + prim_calls, + total_calls, + total_time, + cumulative_time, + {}, + ) + ) + + # Determine best units for summary metrics + max_total_time = max( + (total_time for _, _, _, total_time, _, _ in aggregated_stats), + default=0, + ) + max_cumulative_time = max( + ( + cumulative_time + for _, _, _, _, cumulative_time, _ in aggregated_stats + ), + default=0, + ) + + total_unit, total_scale = _determine_best_unit(max_total_time) + cumulative_unit, cumulative_scale = _determine_best_unit( + max_cumulative_time + ) + + # Functions with highest direct/cumulative ratio (hot spots) + def format_hotspots(stat): + func, direct_calls, cumulative_calls, total_time, _, _ = stat + if direct_calls > 0 and cumulative_calls > 0: + ratio = direct_calls / cumulative_calls + direct_pct = ( + (direct_calls / total_samples * 100) + if total_samples > 0 + else 0 + ) + return ( + f"{ratio:.3f} direct/cumulative ratio, " + f"{direct_pct:.1f}% direct samples: {_format_func_name(func)}" + ) + return None + + _print_top_functions( + aggregated_stats, + "Functions with Highest Direct/Cumulative Ratio (Hot Spots)", + key_func=lambda x: (x[1] / x[2]) if x[2] > 0 else 0, + format_line=format_hotspots, + ) + + # Functions with highest call frequency (cumulative/direct difference) + def format_call_frequency(stat): + func, direct_calls, cumulative_calls, total_time, _, _ = stat + if cumulative_calls > direct_calls: + call_frequency = cumulative_calls - direct_calls + cum_pct = ( + (cumulative_calls / total_samples * 100) + if total_samples > 0 + else 0 + ) + return ( + f"{call_frequency:d} indirect calls, " + f"{cum_pct:.1f}% total stack presence: {_format_func_name(func)}" + ) + return None + + _print_top_functions( + aggregated_stats, + "Functions with Highest Call Frequency (Indirect Calls)", + key_func=lambda x: x[2] - x[1], # Sort by (cumulative - direct) + format_line=format_call_frequency, + ) + + # Functions with highest cumulative-to-direct multiplier (call magnification) + def format_call_magnification(stat): + func, direct_calls, cumulative_calls, total_time, _, _ = stat + if direct_calls > 0 and cumulative_calls > direct_calls: + multiplier = cumulative_calls / direct_calls + indirect_calls = cumulative_calls - direct_calls + return ( + f"{multiplier:.1f}x call magnification, " + f"{indirect_calls:d} indirect calls from {direct_calls:d} direct: {_format_func_name(func)}" + ) + return None + + _print_top_functions( + aggregated_stats, + "Functions with Highest Call Magnification (Cumulative/Direct)", + key_func=lambda x: (x[2] / x[1]) + if x[1] > 0 + else 0, # Sort by cumulative/direct ratio + format_line=format_call_magnification, + ) + + +def sample( + pid, + *, + sort=2, + sample_interval_usec=100, + duration_sec=10, + filename=None, + all_threads=False, + limit=None, + show_summary=True, + output_format="pstats", + realtime_stats=False, +): + profiler = SampleProfiler( + pid, sample_interval_usec, all_threads=all_threads + ) + profiler.realtime_stats = realtime_stats + + collector = None + match output_format: + case "pstats": + collector = PstatsCollector(sample_interval_usec) + case "collapsed": + collector = CollapsedStackCollector() + filename = filename or f"collapsed.{pid}.txt" + case _: + raise ValueError(f"Invalid output format: {output_format}") + + profiler.sample(collector, duration_sec) + + if output_format == "pstats" and not filename: + stats = pstats.SampledStats(collector).strip_dirs() + print_sampled_stats( + stats, sort, limit, show_summary, sample_interval_usec + ) + else: + collector.export(filename) + + +def _validate_collapsed_format_args(args, parser): + # Check for incompatible pstats options + invalid_opts = [] + + # Get list of pstats-specific options + pstats_options = {"sort": None, "limit": None, "no_summary": False} + + # Find the default values from the argument definitions + for action in parser._actions: + if action.dest in pstats_options and hasattr(action, "default"): + pstats_options[action.dest] = action.default + + # Check if any pstats-specific options were provided by comparing with defaults + for opt, default in pstats_options.items(): + if getattr(args, opt) != default: + invalid_opts.append(opt.replace("no_", "")) + + if invalid_opts: + parser.error( + f"The following options are only valid with --pstats format: {', '.join(invalid_opts)}" + ) + + # Set default output filename for collapsed format + if not args.outfile: + args.outfile = f"collapsed.{args.pid}.txt" + + +def main(): + # Create the main parser + parser = argparse.ArgumentParser( + description=( + "Sample a process's stack frames and generate profiling data.\n" + "Supports two output formats:\n" + " - pstats: Detailed profiling statistics with sorting options\n" + " - collapsed: Stack traces for generating flamegraphs\n" + "\n" + "Examples:\n" + " # Profile process 1234 for 10 seconds with default settings\n" + " python -m profile.sample 1234\n" + "\n" + " # Profile with custom interval and duration, save to file\n" + " python -m profile.sample -i 50 -d 30 -o profile.stats 1234\n" + "\n" + " # Generate collapsed stacks for flamegraph\n" + " python -m profile.sample --collapsed 1234\n" + "\n" + " # Profile all threads, sort by total time\n" + " python -m profile.sample -a --sort-tottime 1234\n" + "\n" + " # Profile for 1 minute with 1ms sampling interval\n" + " python -m profile.sample -i 1000 -d 60 1234\n" + "\n" + " # Show only top 20 functions sorted by direct samples\n" + " python -m profile.sample --sort-nsamples -l 20 1234\n" + "\n" + " # Profile all threads and save collapsed stacks\n" + " python -m profile.sample -a --collapsed -o stacks.txt 1234\n" + "\n" + " # Profile with real-time sampling statistics\n" + " python -m profile.sample --realtime-stats 1234\n" + "\n" + " # Sort by sample percentage to find most sampled functions\n" + " python -m profile.sample --sort-sample-pct 1234\n" + "\n" + " # Sort by cumulative samples to find functions most on call stack\n" + " python -m profile.sample --sort-nsamples-cumul 1234" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + # Required arguments + parser.add_argument("pid", type=int, help="Process ID to sample") + + # Sampling options + sampling_group = parser.add_argument_group("Sampling configuration") + sampling_group.add_argument( + "-i", + "--interval", + type=int, + default=100, + help="Sampling interval in microseconds (default: 100)", + ) + sampling_group.add_argument( + "-d", + "--duration", + type=int, + default=10, + help="Sampling duration in seconds (default: 10)", + ) + sampling_group.add_argument( + "-a", + "--all-threads", + action="store_true", + help="Sample all threads in the process instead of just the main thread", + ) + sampling_group.add_argument( + "--realtime-stats", + action="store_true", + default=False, + help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling", + ) + + # Output format selection + output_group = parser.add_argument_group("Output options") + output_format = output_group.add_mutually_exclusive_group() + output_format.add_argument( + "--pstats", + action="store_const", + const="pstats", + dest="format", + default="pstats", + help="Generate pstats output (default)", + ) + output_format.add_argument( + "--collapsed", + action="store_const", + const="collapsed", + dest="format", + help="Generate collapsed stack traces for flamegraphs", + ) + + output_group.add_argument( + "-o", + "--outfile", + help="Save output to a file (if omitted, prints to stdout for pstats, " + "or saves to collapsed..txt for collapsed format)", + ) + + # pstats-specific options + pstats_group = parser.add_argument_group("pstats format options") + sort_group = pstats_group.add_mutually_exclusive_group() + sort_group.add_argument( + "--sort-nsamples", + action="store_const", + const=0, + dest="sort", + help="Sort by number of direct samples (nsamples column)", + ) + sort_group.add_argument( + "--sort-tottime", + action="store_const", + const=1, + dest="sort", + help="Sort by total time (tottime column)", + ) + sort_group.add_argument( + "--sort-cumtime", + action="store_const", + const=2, + dest="sort", + help="Sort by cumulative time (cumtime column, default)", + ) + sort_group.add_argument( + "--sort-sample-pct", + action="store_const", + const=3, + dest="sort", + help="Sort by sample percentage (sample%% column)", + ) + sort_group.add_argument( + "--sort-cumul-pct", + action="store_const", + const=4, + dest="sort", + help="Sort by cumulative sample percentage (cumul%% column)", + ) + sort_group.add_argument( + "--sort-nsamples-cumul", + action="store_const", + const=5, + dest="sort", + help="Sort by cumulative samples (nsamples column, cumulative part)", + ) + sort_group.add_argument( + "--sort-name", + action="store_const", + const=-1, + dest="sort", + help="Sort by function name", + ) + + pstats_group.add_argument( + "-l", + "--limit", + type=int, + help="Limit the number of rows in the output", + default=15, + ) + pstats_group.add_argument( + "--no-summary", + action="store_true", + help="Disable the summary section in the output", + ) + + args = parser.parse_args() + + # Validate format-specific arguments + if args.format == "collapsed": + _validate_collapsed_format_args(args, parser) + + sort_value = args.sort if args.sort is not None else 2 + + sample( + args.pid, + sample_interval_usec=args.interval, + duration_sec=args.duration, + filename=args.outfile, + all_threads=args.all_threads, + limit=args.limit, + sort=sort_value, + show_summary=not args.no_summary, + output_format=args.format, + realtime_stats=args.realtime_stats, + ) + + +if __name__ == "__main__": + main() diff --git a/Lib/profile/stack_collector.py b/Lib/profile/stack_collector.py new file mode 100644 index 00000000000000..97dbcb5b2fb4b2 --- /dev/null +++ b/Lib/profile/stack_collector.py @@ -0,0 +1,37 @@ +import collections +import os + +from .collector import Collector + + +class StackTraceCollector(Collector): + def __init__(self): + self.call_trees = [] + self.function_samples = collections.defaultdict(int) + + def collect(self, stack_frames): + for thread_id, frames in stack_frames: + if frames: + # Store the complete call stack (reverse order - root first) + call_tree = list(reversed(frames)) + self.call_trees.append(call_tree) + + # Count samples per function + for frame in frames: + self.function_samples[frame] += 1 + + +class CollapsedStackCollector(StackTraceCollector): + def export(self, filename): + stack_counter = collections.Counter() + for call_tree in self.call_trees: + # Call tree is already in root->leaf order + stack_str = ";".join( + f"{os.path.basename(f[0])}:{f[2]}:{f[1]}" for f in call_tree + ) + stack_counter[stack_str] += 1 + + with open(filename, "w") as f: + for stack, count in stack_counter.items(): + f.write(f"{stack} {count}\n") + print(f"Collapsed stack output written to {filename}") diff --git a/Lib/pstats.py b/Lib/pstats.py index becaf35580eaee..079abd2c1b81df 100644 --- a/Lib/pstats.py +++ b/Lib/pstats.py @@ -139,7 +139,11 @@ def load_stats(self, arg): return elif isinstance(arg, str): with open(arg, 'rb') as f: - self.stats = marshal.load(f) + stats = marshal.load(f) + if (('__sampled__',)) in stats: + stats.pop((('__sampled__',))) + self.__class__ = SampledStats + self.stats = stats try: file_stats = os.stat(arg) arg = time.ctime(file_stats.st_mtime) + " " + arg @@ -467,7 +471,10 @@ def print_call_heading(self, name_size, column_title): subheader = isinstance(value, tuple) break if subheader: - print(" "*name_size + " ncalls tottime cumtime", file=self.stream) + self.print_call_subheading(name_size) + + def print_call_subheading(self, name_size): + print(" "*name_size + " ncalls tottime cumtime", file=self.stream) def print_call_line(self, name_size, source, call_dict, arrow="->"): print(func_std_string(source).ljust(name_size) + arrow, end=' ', file=self.stream) @@ -516,6 +523,35 @@ def print_line(self, func): # hack: should print percentages print(f8(ct/cc), end=' ', file=self.stream) print(func_std_string(func), file=self.stream) + +class SampledStats(Stats): + def __init__(self, *args, stream=None): + super().__init__(*args, stream=stream) + + self.sort_arg_dict = { + "samples" : (((1,-1), ), "sample count"), + "nsamples" : (((1,-1), ), "sample count"), + "cumtime" : (((3,-1), ), "cumulative time"), + "cumulative": (((3,-1), ), "cumulative time"), + "filename" : (((4, 1), ), "file name"), + "line" : (((5, 1), ), "line number"), + "module" : (((4, 1), ), "file name"), + "name" : (((6, 1), ), "function name"), + "nfl" : (((6, 1),(4, 1),(5, 1),), "name/file/line"), + "psamples" : (((0,-1), ), "primitive call count"), + "stdname" : (((7, 1), ), "standard name"), + "time" : (((2,-1), ), "internal time"), + "tottime" : (((2,-1), ), "internal time"), + } + + def print_call_subheading(self, name_size): + print(" "*name_size + " nsamples tottime cumtime", file=self.stream) + + def print_title(self): + print(' nsamples tottime persample cumtime persample', end=' ', file=self.stream) + print('filename:lineno(function)', file=self.stream) + + class TupleComp: """This class provides a generic function for comparing any two tuples. Each instance records a list of tuple-indices (from most significant @@ -607,6 +643,24 @@ def f8(x): # Statistics browser added by ESR, April 2001 #************************************************************************** +class StatsLoaderShim: + """Compatibility shim implementing 'create_stats' needed by Stats classes + to handle already unmarshalled data.""" + def __init__(self, raw_stats): + self.stats = raw_stats + + def create_stats(self): + pass + +def stats_factory(raw_stats): + """Return a Stats or SampledStats instance based on the marker in raw_stats.""" + if (('__sampled__',)) in raw_stats: + raw_stats = dict(raw_stats) # avoid mutating caller's dict + raw_stats.pop((('__sampled__',))) + return SampledStats(StatsLoaderShim(raw_stats)) + else: + return Stats(StatsLoaderShim(raw_stats)) + if __name__ == '__main__': import cmd try: @@ -693,7 +747,15 @@ def help_quit(self): def do_read(self, line): if line: try: - self.stats = Stats(line) + with open(line, 'rb') as f: + raw_stats = marshal.load(f) + self.stats = stats_factory(raw_stats) + try: + file_stats = os.stat(line) + arg = time.ctime(file_stats.st_mtime) + " " + line + except Exception: + arg = line + self.stats.files = [arg] except OSError as err: print(err.args[1], file=self.stream) return diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index a709b837161f48..354a82a800fd21 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -1257,5 +1257,21 @@ def main_work(): ) +class TestUnsupportedPlatformHandling(unittest.TestCase): + @unittest.skipIf( + sys.platform in ("linux", "darwin", "win32"), + "Test only runs on unsupported platforms (not Linux, macOS, or Windows)", + ) + @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception") + def test_unsupported_platform_error(self): + with self.assertRaises(RuntimeError) as cm: + RemoteUnwinder(os.getpid()) + + self.assertIn( + "Reading the PyRuntime section is not supported on this platform", + str(cm.exception) + ) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sample_profiler.py b/Lib/test/test_sample_profiler.py new file mode 100644 index 00000000000000..2c7fa1cba712c9 --- /dev/null +++ b/Lib/test/test_sample_profiler.py @@ -0,0 +1,1877 @@ +"""Tests for the sampling profiler (profile.sample).""" + +import contextlib +import io +import marshal +import os +import socket +import subprocess +import sys +import tempfile +import unittest +from unittest import mock + +from profile.pstats_collector import PstatsCollector +from profile.stack_collector import ( + CollapsedStackCollector, +) + +from test.support.os_helper import unlink +from test.support import force_not_colorized_test_class, SHORT_TIMEOUT +from test.support.socket_helper import find_unused_port +from test.support import requires_subprocess + +PROCESS_VM_READV_SUPPORTED = False + +try: + from _remote_debugging import PROCESS_VM_READV_SUPPORTED + import _remote_debugging +except ImportError: + raise unittest.SkipTest( + "Test only runs when _remote_debugging is available" + ) +else: + import profile.sample + from profile.sample import SampleProfiler + + + +class MockFrameInfo: + """Mock FrameInfo for testing since the real one isn't accessible.""" + + def __init__(self, filename, lineno, funcname): + self.filename = filename + self.lineno = lineno + self.funcname = funcname + + def __repr__(self): + return f"MockFrameInfo(filename='{self.filename}', lineno={self.lineno}, funcname='{self.funcname}')" + + +skip_if_not_supported = unittest.skipIf( + ( + sys.platform != "darwin" + and sys.platform != "linux" + and sys.platform != "win32" + ), + "Test only runs on Linux, Windows and MacOS", +) + + +@contextlib.contextmanager +def test_subprocess(script): + # Find an unused port for socket communication + port = find_unused_port() + + # Inject socket connection code at the beginning of the script + socket_code = f''' +import socket +_test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +_test_sock.connect(('localhost', {port})) +_test_sock.sendall(b"ready") +''' + + # Combine socket code with user script + full_script = socket_code + script + + # Create server socket to wait for process to be ready + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(("localhost", port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) + + proc = subprocess.Popen( + [sys.executable, "-c", full_script], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + client_socket = None + try: + # Wait for process to connect and send ready signal + client_socket, _ = server_socket.accept() + server_socket.close() + response = client_socket.recv(1024) + if response != b"ready": + raise RuntimeError(f"Unexpected response from subprocess: {response}") + + yield proc + finally: + if client_socket is not None: + client_socket.close() + if proc.poll() is None: + proc.kill() + proc.wait() + + +def close_and_unlink(file): + file.close() + unlink(file.name) + + +class TestSampleProfilerComponents(unittest.TestCase): + """Unit tests for individual profiler components.""" + + def test_mock_frame_info_with_empty_and_unicode_values(self): + """Test MockFrameInfo handles empty strings, unicode characters, and very long names correctly.""" + # Test with empty strings + frame = MockFrameInfo("", 0, "") + self.assertEqual(frame.filename, "") + self.assertEqual(frame.lineno, 0) + self.assertEqual(frame.funcname, "") + self.assertIn("filename=''", repr(frame)) + + # Test with unicode characters + frame = MockFrameInfo("文件.py", 42, "函数名") + self.assertEqual(frame.filename, "文件.py") + self.assertEqual(frame.funcname, "函数名") + + # Test with very long names + long_filename = "x" * 1000 + ".py" + long_funcname = "func_" + "x" * 1000 + frame = MockFrameInfo(long_filename, 999999, long_funcname) + self.assertEqual(frame.filename, long_filename) + self.assertEqual(frame.lineno, 999999) + self.assertEqual(frame.funcname, long_funcname) + + def test_pstats_collector_with_extreme_intervals_and_empty_data(self): + """Test PstatsCollector handles zero/large intervals, empty frames, None thread IDs, and duplicate frames.""" + # Test with zero interval + collector = PstatsCollector(sample_interval_usec=0) + self.assertEqual(collector.sample_interval_usec, 0) + + # Test with very large interval + collector = PstatsCollector(sample_interval_usec=1000000000) + self.assertEqual(collector.sample_interval_usec, 1000000000) + + # Test collecting empty frames list + collector = PstatsCollector(sample_interval_usec=1000) + collector.collect([]) + self.assertEqual(len(collector.result), 0) + + # Test collecting frames with None thread id + test_frames = [(None, [MockFrameInfo("file.py", 10, "func")])] + collector.collect(test_frames) + # Should still process the frames + self.assertEqual(len(collector.result), 1) + + # Test collecting duplicate frames in same sample + test_frames = [ + ( + 1, + [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 10, "func1"), # Duplicate + ], + ) + ] + collector = PstatsCollector(sample_interval_usec=1000) + collector.collect(test_frames) + # Should count both occurrences + self.assertEqual( + collector.result[("file.py", 10, "func1")]["cumulative_calls"], 2 + ) + + def test_pstats_collector_single_frame_stacks(self): + """Test PstatsCollector with single-frame call stacks to trigger len(frames) <= 1 branch.""" + collector = PstatsCollector(sample_interval_usec=1000) + + # Test with exactly one frame (should trigger the <= 1 condition) + single_frame = [(1, [MockFrameInfo("single.py", 10, "single_func")])] + collector.collect(single_frame) + + # Should record the single frame with inline call + self.assertEqual(len(collector.result), 1) + single_key = ("single.py", 10, "single_func") + self.assertIn(single_key, collector.result) + self.assertEqual(collector.result[single_key]["direct_calls"], 1) + self.assertEqual(collector.result[single_key]["cumulative_calls"], 1) + + # Test with empty frames (should also trigger <= 1 condition) + empty_frames = [(1, [])] + collector.collect(empty_frames) + + # Should not add any new entries + self.assertEqual( + len(collector.result), 1 + ) # Still just the single frame + + # Test mixed single and multi-frame stacks + mixed_frames = [ + ( + 1, + [MockFrameInfo("single2.py", 20, "single_func2")], + ), # Single frame + ( + 2, + [ # Multi-frame stack + MockFrameInfo("multi.py", 30, "multi_func1"), + MockFrameInfo("multi.py", 40, "multi_func2"), + ], + ), + ] + collector.collect(mixed_frames) + + # Should have recorded all functions + self.assertEqual( + len(collector.result), 4 + ) # single + single2 + multi1 + multi2 + + # Verify single frame handling + single2_key = ("single2.py", 20, "single_func2") + self.assertIn(single2_key, collector.result) + self.assertEqual(collector.result[single2_key]["direct_calls"], 1) + self.assertEqual(collector.result[single2_key]["cumulative_calls"], 1) + + # Verify multi-frame handling still works + multi1_key = ("multi.py", 30, "multi_func1") + multi2_key = ("multi.py", 40, "multi_func2") + self.assertIn(multi1_key, collector.result) + self.assertIn(multi2_key, collector.result) + self.assertEqual(collector.result[multi1_key]["direct_calls"], 1) + self.assertEqual( + collector.result[multi2_key]["cumulative_calls"], 1 + ) # Called from multi1 + + def test_collapsed_stack_collector_with_empty_and_deep_stacks(self): + """Test CollapsedStackCollector handles empty frames, single-frame stacks, and very deep call stacks.""" + collector = CollapsedStackCollector() + + # Test with empty frames + collector.collect([]) + self.assertEqual(len(collector.call_trees), 0) + + # Test with single frame stack + test_frames = [(1, [("file.py", 10, "func")])] + collector.collect(test_frames) + self.assertEqual(len(collector.call_trees), 1) + self.assertEqual(collector.call_trees[0], [("file.py", 10, "func")]) + + # Test with very deep stack + deep_stack = [(f"file{i}.py", i, f"func{i}") for i in range(100)] + test_frames = [(1, deep_stack)] + collector = CollapsedStackCollector() + collector.collect(test_frames) + self.assertEqual(len(collector.call_trees[0]), 100) + # Check it's properly reversed + self.assertEqual( + collector.call_trees[0][0], ("file99.py", 99, "func99") + ) + self.assertEqual(collector.call_trees[0][-1], ("file0.py", 0, "func0")) + + def test_pstats_collector_basic(self): + """Test basic PstatsCollector functionality.""" + collector = PstatsCollector(sample_interval_usec=1000) + + # Test empty state + self.assertEqual(len(collector.result), 0) + self.assertEqual(len(collector.stats), 0) + + # Test collecting sample data + test_frames = [ + ( + 1, + [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ], + ) + ] + collector.collect(test_frames) + + # Should have recorded calls for both functions + self.assertEqual(len(collector.result), 2) + self.assertIn(("file.py", 10, "func1"), collector.result) + self.assertIn(("file.py", 20, "func2"), collector.result) + + # Top-level function should have direct call + self.assertEqual( + collector.result[("file.py", 10, "func1")]["direct_calls"], 1 + ) + self.assertEqual( + collector.result[("file.py", 10, "func1")]["cumulative_calls"], 1 + ) + + # Calling function should have cumulative call but no direct calls + self.assertEqual( + collector.result[("file.py", 20, "func2")]["cumulative_calls"], 1 + ) + self.assertEqual( + collector.result[("file.py", 20, "func2")]["direct_calls"], 0 + ) + + def test_pstats_collector_create_stats(self): + """Test PstatsCollector stats creation.""" + collector = PstatsCollector( + sample_interval_usec=1000000 + ) # 1 second intervals + + test_frames = [ + ( + 1, + [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ], + ) + ] + collector.collect(test_frames) + collector.collect(test_frames) # Collect twice + + collector.create_stats() + + # Check stats format: (direct_calls, cumulative_calls, tt, ct, callers) + func1_stats = collector.stats[("file.py", 10, "func1")] + self.assertEqual(func1_stats[0], 2) # direct_calls (top of stack) + self.assertEqual(func1_stats[1], 2) # cumulative_calls + self.assertEqual( + func1_stats[2], 2.0 + ) # tt (total time - 2 samples * 1 sec) + self.assertEqual(func1_stats[3], 2.0) # ct (cumulative time) + + func2_stats = collector.stats[("file.py", 20, "func2")] + self.assertEqual( + func2_stats[0], 0 + ) # direct_calls (never top of stack) + self.assertEqual( + func2_stats[1], 2 + ) # cumulative_calls (appears in stack) + self.assertEqual(func2_stats[2], 0.0) # tt (no direct calls) + self.assertEqual(func2_stats[3], 2.0) # ct (cumulative time) + + def test_collapsed_stack_collector_basic(self): + collector = CollapsedStackCollector() + + # Test empty state + self.assertEqual(len(collector.call_trees), 0) + self.assertEqual(len(collector.function_samples), 0) + + # Test collecting sample data + test_frames = [ + (1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]) + ] + collector.collect(test_frames) + + # Should store call tree (reversed) + self.assertEqual(len(collector.call_trees), 1) + expected_tree = [("file.py", 20, "func2"), ("file.py", 10, "func1")] + self.assertEqual(collector.call_trees[0], expected_tree) + + # Should count function samples + self.assertEqual( + collector.function_samples[("file.py", 10, "func1")], 1 + ) + self.assertEqual( + collector.function_samples[("file.py", 20, "func2")], 1 + ) + + def test_collapsed_stack_collector_export(self): + collapsed_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, collapsed_out) + + collector = CollapsedStackCollector() + + test_frames1 = [ + (1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]) + ] + test_frames2 = [ + (1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]) + ] # Same stack + test_frames3 = [(1, [("other.py", 5, "other_func")])] + + collector.collect(test_frames1) + collector.collect(test_frames2) + collector.collect(test_frames3) + + collector.export(collapsed_out.name) + # Check file contents + with open(collapsed_out.name, "r") as f: + content = f.read() + + lines = content.strip().split("\n") + self.assertEqual(len(lines), 2) # Two unique stacks + + # Check collapsed format: file:func:line;file:func:line count + stack1_expected = "file.py:func2:20;file.py:func1:10 2" + stack2_expected = "other.py:other_func:5 1" + + self.assertIn(stack1_expected, lines) + self.assertIn(stack2_expected, lines) + + def test_pstats_collector_export(self): + collector = PstatsCollector( + sample_interval_usec=1000000 + ) # 1 second intervals + + test_frames1 = [ + ( + 1, + [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ], + ) + ] + test_frames2 = [ + ( + 1, + [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ], + ) + ] # Same stack + test_frames3 = [(1, [MockFrameInfo("other.py", 5, "other_func")])] + + collector.collect(test_frames1) + collector.collect(test_frames2) + collector.collect(test_frames3) + + pstats_out = tempfile.NamedTemporaryFile( + suffix=".pstats", delete=False + ) + self.addCleanup(close_and_unlink, pstats_out) + collector.export(pstats_out.name) + + # Check file can be loaded with marshal + with open(pstats_out.name, "rb") as f: + stats_data = marshal.load(f) + + # Should be a dictionary with the sampled marker + self.assertIsInstance(stats_data, dict) + self.assertIn(("__sampled__",), stats_data) + self.assertTrue(stats_data[("__sampled__",)]) + + # Should have function data + function_entries = [ + k for k in stats_data.keys() if k != ("__sampled__",) + ] + self.assertGreater(len(function_entries), 0) + + # Check specific function stats format: (cc, nc, tt, ct, callers) + func1_key = ("file.py", 10, "func1") + func2_key = ("file.py", 20, "func2") + other_key = ("other.py", 5, "other_func") + + self.assertIn(func1_key, stats_data) + self.assertIn(func2_key, stats_data) + self.assertIn(other_key, stats_data) + + # Check func1 stats (should have 2 samples) + func1_stats = stats_data[func1_key] + self.assertEqual(func1_stats[0], 2) # total_calls + self.assertEqual(func1_stats[1], 2) # nc (non-recursive calls) + self.assertEqual(func1_stats[2], 2.0) # tt (total time) + self.assertEqual(func1_stats[3], 2.0) # ct (cumulative time) + + +class TestSampleProfiler(unittest.TestCase): + """Test the SampleProfiler class.""" + + def test_sample_profiler_initialization(self): + """Test SampleProfiler initialization with various parameters.""" + from profile.sample import SampleProfiler + + # Mock RemoteUnwinder to avoid permission issues + with mock.patch( + "_remote_debugging.RemoteUnwinder" + ) as mock_unwinder_class: + mock_unwinder_class.return_value = mock.MagicMock() + + # Test basic initialization + profiler = SampleProfiler( + pid=12345, sample_interval_usec=1000, all_threads=False + ) + self.assertEqual(profiler.pid, 12345) + self.assertEqual(profiler.sample_interval_usec, 1000) + self.assertEqual(profiler.all_threads, False) + + # Test with all_threads=True + profiler = SampleProfiler( + pid=54321, sample_interval_usec=5000, all_threads=True + ) + self.assertEqual(profiler.pid, 54321) + self.assertEqual(profiler.sample_interval_usec, 5000) + self.assertEqual(profiler.all_threads, True) + + def test_sample_profiler_sample_method_timing(self): + """Test that the sample method respects duration and handles timing correctly.""" + from profile.sample import SampleProfiler + + # Mock the unwinder to avoid needing a real process + mock_unwinder = mock.MagicMock() + mock_unwinder.get_stack_trace.return_value = [ + ( + 1, + [ + mock.MagicMock( + filename="test.py", lineno=10, funcname="test_func" + ) + ], + ) + ] + + with mock.patch( + "_remote_debugging.RemoteUnwinder" + ) as mock_unwinder_class: + mock_unwinder_class.return_value = mock_unwinder + + profiler = SampleProfiler( + pid=12345, sample_interval_usec=100000, all_threads=False + ) # 100ms interval + + # Mock collector + mock_collector = mock.MagicMock() + + # Mock time to control the sampling loop + start_time = 1000.0 + times = [ + start_time + i * 0.1 for i in range(12) + ] # 0, 0.1, 0.2, ..., 1.1 seconds + + with mock.patch("time.perf_counter", side_effect=times): + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + profiler.sample(mock_collector, duration_sec=1) + + result = output.getvalue() + + # Should have captured approximately 10 samples (1 second / 0.1 second interval) + self.assertIn("Captured", result) + self.assertIn("samples", result) + + # Verify collector was called multiple times + self.assertGreaterEqual(mock_collector.collect.call_count, 5) + self.assertLessEqual(mock_collector.collect.call_count, 11) + + def test_sample_profiler_error_handling(self): + """Test that the sample method handles errors gracefully.""" + from profile.sample import SampleProfiler + + # Mock unwinder that raises errors + mock_unwinder = mock.MagicMock() + error_sequence = [ + RuntimeError("Process died"), + [ + ( + 1, + [ + mock.MagicMock( + filename="test.py", lineno=10, funcname="test_func" + ) + ], + ) + ], + UnicodeDecodeError("utf-8", b"", 0, 1, "invalid"), + [ + ( + 1, + [ + mock.MagicMock( + filename="test.py", + lineno=20, + funcname="test_func2", + ) + ], + ) + ], + OSError("Permission denied"), + ] + mock_unwinder.get_stack_trace.side_effect = error_sequence + + with mock.patch( + "_remote_debugging.RemoteUnwinder" + ) as mock_unwinder_class: + mock_unwinder_class.return_value = mock_unwinder + + profiler = SampleProfiler( + pid=12345, sample_interval_usec=10000, all_threads=False + ) + + mock_collector = mock.MagicMock() + + # Control timing to run exactly 5 samples + times = [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06] + + with mock.patch("time.perf_counter", side_effect=times): + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + profiler.sample(mock_collector, duration_sec=0.05) + + result = output.getvalue() + + # Should report error rate + self.assertIn("Error rate:", result) + self.assertIn("%", result) + + # Collector should have been called only for successful samples (should be > 0) + self.assertGreater(mock_collector.collect.call_count, 0) + self.assertLessEqual(mock_collector.collect.call_count, 3) + + def test_sample_profiler_missed_samples_warning(self): + """Test that the profiler warns about missed samples when sampling is too slow.""" + from profile.sample import SampleProfiler + + mock_unwinder = mock.MagicMock() + mock_unwinder.get_stack_trace.return_value = [ + ( + 1, + [ + mock.MagicMock( + filename="test.py", lineno=10, funcname="test_func" + ) + ], + ) + ] + + with mock.patch( + "_remote_debugging.RemoteUnwinder" + ) as mock_unwinder_class: + mock_unwinder_class.return_value = mock_unwinder + + # Use very short interval that we'll miss + profiler = SampleProfiler( + pid=12345, sample_interval_usec=1000, all_threads=False + ) # 1ms interval + + mock_collector = mock.MagicMock() + + # Simulate slow sampling where we miss many samples + times = [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + ] # Extra time points to avoid StopIteration + + with mock.patch("time.perf_counter", side_effect=times): + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + profiler.sample(mock_collector, duration_sec=0.5) + + result = output.getvalue() + + # Should warn about missed samples + self.assertIn("Warning: missed", result) + self.assertIn("samples from the expected total", result) + + +@force_not_colorized_test_class +class TestPrintSampledStats(unittest.TestCase): + """Test the print_sampled_stats function.""" + + def setUp(self): + """Set up test data.""" + # Mock stats data + self.mock_stats = mock.MagicMock() + self.mock_stats.stats = { + ("file1.py", 10, "func1"): ( + 100, + 100, + 0.5, + 0.5, + {}, + ), # cc, nc, tt, ct, callers + ("file2.py", 20, "func2"): (50, 50, 0.25, 0.3, {}), + ("file3.py", 30, "func3"): (200, 200, 1.5, 2.0, {}), + ("file4.py", 40, "func4"): ( + 10, + 10, + 0.001, + 0.001, + {}, + ), # millisecond range + ("file5.py", 50, "func5"): ( + 5, + 5, + 0.000001, + 0.000002, + {}, + ), # microsecond range + } + + def test_print_sampled_stats_basic(self): + """Test basic print_sampled_stats functionality.""" + from profile.sample import print_sampled_stats + + # Capture output + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats(self.mock_stats, sample_interval_usec=100) + + result = output.getvalue() + + # Check header is present + self.assertIn("Profile Stats:", result) + self.assertIn("nsamples", result) + self.assertIn("tottime", result) + self.assertIn("cumtime", result) + + # Check functions are present + self.assertIn("func1", result) + self.assertIn("func2", result) + self.assertIn("func3", result) + + def test_print_sampled_stats_sorting(self): + """Test different sorting options.""" + from profile.sample import print_sampled_stats + + # Test sort by calls + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats( + self.mock_stats, sort=0, sample_interval_usec=100 + ) + + result = output.getvalue() + lines = result.strip().split("\n") + + # Find the data lines (skip header) + data_lines = [l for l in lines if "file" in l and ".py" in l] + # func3 should be first (200 calls) + self.assertIn("func3", data_lines[0]) + + # Test sort by time + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats( + self.mock_stats, sort=1, sample_interval_usec=100 + ) + + result = output.getvalue() + lines = result.strip().split("\n") + + data_lines = [l for l in lines if "file" in l and ".py" in l] + # func3 should be first (1.5s time) + self.assertIn("func3", data_lines[0]) + + def test_print_sampled_stats_limit(self): + """Test limiting output rows.""" + from profile.sample import print_sampled_stats + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats( + self.mock_stats, limit=2, sample_interval_usec=100 + ) + + result = output.getvalue() + + # Count function entries in the main stats section (not in summary) + lines = result.split("\n") + # Find where the main stats section ends (before summary) + main_section_lines = [] + for line in lines: + if "Summary of Interesting Functions:" in line: + break + main_section_lines.append(line) + + # Count function entries only in main section + func_count = sum( + 1 + for line in main_section_lines + if "func" in line and ".py" in line + ) + self.assertEqual(func_count, 2) + + def test_print_sampled_stats_time_units(self): + """Test proper time unit selection.""" + from profile.sample import print_sampled_stats + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats(self.mock_stats, sample_interval_usec=100) + + result = output.getvalue() + + # Should use seconds for the header since max time is > 1s + self.assertIn("tottime (s)", result) + self.assertIn("cumtime (s)", result) + + # Test with only microsecond-range times + micro_stats = mock.MagicMock() + micro_stats.stats = { + ("file1.py", 10, "func1"): (100, 100, 0.000005, 0.000010, {}), + } + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats(micro_stats, sample_interval_usec=100) + + result = output.getvalue() + + # Should use microseconds + self.assertIn("tottime (μs)", result) + self.assertIn("cumtime (μs)", result) + + def test_print_sampled_stats_summary(self): + """Test summary section generation.""" + from profile.sample import print_sampled_stats + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats( + self.mock_stats, + show_summary=True, + sample_interval_usec=100, + ) + + result = output.getvalue() + + # Check summary sections are present + self.assertIn("Summary of Interesting Functions:", result) + self.assertIn( + "Functions with Highest Direct/Cumulative Ratio (Hot Spots):", + result, + ) + self.assertIn( + "Functions with Highest Call Frequency (Indirect Calls):", result + ) + self.assertIn( + "Functions with Highest Call Magnification (Cumulative/Direct):", + result, + ) + + def test_print_sampled_stats_no_summary(self): + """Test disabling summary output.""" + from profile.sample import print_sampled_stats + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats( + self.mock_stats, + show_summary=False, + sample_interval_usec=100, + ) + + result = output.getvalue() + + # Summary should not be present + self.assertNotIn("Summary of Interesting Functions:", result) + + def test_print_sampled_stats_empty_stats(self): + """Test with empty stats.""" + from profile.sample import print_sampled_stats + + empty_stats = mock.MagicMock() + empty_stats.stats = {} + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats(empty_stats, sample_interval_usec=100) + + result = output.getvalue() + + # Should still print header + self.assertIn("Profile Stats:", result) + + def test_print_sampled_stats_sample_percentage_sorting(self): + """Test sample percentage sorting options.""" + from profile.sample import print_sampled_stats + + # Add a function with high sample percentage (more direct calls than func3's 200) + self.mock_stats.stats[("expensive.py", 60, "expensive_func")] = ( + 300, # direct calls (higher than func3's 200) + 300, # cumulative calls + 1.0, # total time + 1.0, # cumulative time + {}, + ) + + # Test sort by sample percentage + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats( + self.mock_stats, sort=3, sample_interval_usec=100 + ) # sample percentage + + result = output.getvalue() + lines = result.strip().split("\n") + + data_lines = [l for l in lines if ".py" in l and "func" in l] + # expensive_func should be first (highest sample percentage) + self.assertIn("expensive_func", data_lines[0]) + + def test_print_sampled_stats_with_recursive_calls(self): + """Test print_sampled_stats with recursive calls where nc != cc.""" + from profile.sample import print_sampled_stats + + # Create stats with recursive calls (nc != cc) + recursive_stats = mock.MagicMock() + recursive_stats.stats = { + # (direct_calls, cumulative_calls, tt, ct, callers) - recursive function + ("recursive.py", 10, "factorial"): ( + 5, # direct_calls + 10, # cumulative_calls (appears more times in stack due to recursion) + 0.5, + 0.6, + {}, + ), + ("normal.py", 20, "normal_func"): ( + 3, # direct_calls + 3, # cumulative_calls (same as direct for non-recursive) + 0.2, + 0.2, + {}, + ), + } + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats(recursive_stats, sample_interval_usec=100) + + result = output.getvalue() + + # Should display recursive calls as "5/10" format + self.assertIn("5/10", result) # nc/cc format for recursive calls + self.assertIn("3", result) # just nc for non-recursive calls + self.assertIn("factorial", result) + self.assertIn("normal_func", result) + + def test_print_sampled_stats_with_zero_call_counts(self): + """Test print_sampled_stats with zero call counts to trigger division protection.""" + from profile.sample import print_sampled_stats + + # Create stats with zero call counts + zero_stats = mock.MagicMock() + zero_stats.stats = { + ("file.py", 10, "zero_calls"): (0, 0, 0.0, 0.0, {}), # Zero calls + ("file.py", 20, "normal_func"): ( + 5, + 5, + 0.1, + 0.1, + {}, + ), # Normal function + } + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats(zero_stats, sample_interval_usec=100) + + result = output.getvalue() + + # Should handle zero call counts gracefully + self.assertIn("zero_calls", result) + self.assertIn("zero_calls", result) + self.assertIn("normal_func", result) + + def test_print_sampled_stats_sort_by_name(self): + """Test sort by function name option.""" + from profile.sample import print_sampled_stats + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats( + self.mock_stats, sort=-1, sample_interval_usec=100 + ) # sort by name + + result = output.getvalue() + lines = result.strip().split("\n") + + # Find the data lines (skip header and summary) + # Data lines start with whitespace and numbers, and contain filename:lineno(function) + data_lines = [] + for line in lines: + # Skip header lines and summary sections + if ( + line.startswith(" ") + and "(" in line + and ")" in line + and not line.startswith( + " 1." + ) # Skip summary lines that start with times + and not line.startswith( + " 0." + ) # Skip summary lines that start with times + and not "per call" in line # Skip summary lines + and not "calls" in line # Skip summary lines + and not "total time" in line # Skip summary lines + and not "cumulative time" in line + ): # Skip summary lines + data_lines.append(line) + + # Extract just the function names for comparison + func_names = [] + import re + + for line in data_lines: + # Function name is between the last ( and ), accounting for ANSI color codes + match = re.search(r"\(([^)]+)\)$", line) + if match: + func_name = match.group(1) + # Remove ANSI color codes + func_name = re.sub(r"\x1b\[[0-9;]*m", "", func_name) + func_names.append(func_name) + + # Verify we extracted function names and they are sorted + self.assertGreater( + len(func_names), 0, "Should have extracted some function names" + ) + self.assertEqual( + func_names, + sorted(func_names), + f"Function names {func_names} should be sorted alphabetically", + ) + + def test_print_sampled_stats_with_zero_time_functions(self): + """Test summary sections with functions that have zero time.""" + from profile.sample import print_sampled_stats + + # Create stats with zero-time functions + zero_time_stats = mock.MagicMock() + zero_time_stats.stats = { + ("file1.py", 10, "zero_time_func"): ( + 5, + 5, + 0.0, + 0.0, + {}, + ), # Zero time + ("file2.py", 20, "normal_func"): ( + 3, + 3, + 0.1, + 0.1, + {}, + ), # Normal time + } + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats( + zero_time_stats, + show_summary=True, + sample_interval_usec=100, + ) + + result = output.getvalue() + + # Should handle zero-time functions gracefully in summary + self.assertIn("Summary of Interesting Functions:", result) + self.assertIn("zero_time_func", result) + self.assertIn("normal_func", result) + + def test_print_sampled_stats_with_malformed_qualified_names(self): + """Test summary generation with function names that don't contain colons.""" + from profile.sample import print_sampled_stats + + # Create stats with function names that would create malformed qualified names + malformed_stats = mock.MagicMock() + malformed_stats.stats = { + # Function name without clear module separation + ("no_colon_func", 10, "func"): (3, 3, 0.1, 0.1, {}), + ("", 20, "empty_filename_func"): (2, 2, 0.05, 0.05, {}), + ("normal.py", 30, "normal_func"): (5, 5, 0.2, 0.2, {}), + } + + with io.StringIO() as output: + with mock.patch("sys.stdout", output): + print_sampled_stats( + malformed_stats, + show_summary=True, + sample_interval_usec=100, + ) + + result = output.getvalue() + + # Should handle malformed names gracefully in summary aggregation + self.assertIn("Summary of Interesting Functions:", result) + # All function names should appear somewhere in the output + self.assertIn("func", result) + self.assertIn("empty_filename_func", result) + self.assertIn("normal_func", result) + + def test_print_sampled_stats_with_recursive_call_stats_creation(self): + """Test create_stats with recursive call data to trigger total_rec_calls branch.""" + collector = PstatsCollector(sample_interval_usec=1000000) # 1 second + + # Simulate recursive function data where total_rec_calls would be set + # We need to manually manipulate the collector result to test this branch + collector.result = { + ("recursive.py", 10, "factorial"): { + "total_rec_calls": 3, # Non-zero recursive calls + "direct_calls": 5, + "cumulative_calls": 10, + }, + ("normal.py", 20, "normal_func"): { + "total_rec_calls": 0, # Zero recursive calls + "direct_calls": 2, + "cumulative_calls": 5, + }, + } + + collector.create_stats() + + # Check that recursive calls are handled differently from non-recursive + factorial_stats = collector.stats[("recursive.py", 10, "factorial")] + normal_stats = collector.stats[("normal.py", 20, "normal_func")] + + # factorial should use cumulative_calls (10) as nc + self.assertEqual( + factorial_stats[1], 10 + ) # nc should be cumulative_calls + self.assertEqual(factorial_stats[0], 5) # cc should be direct_calls + + # normal_func should use cumulative_calls as nc + self.assertEqual(normal_stats[1], 5) # nc should be cumulative_calls + self.assertEqual(normal_stats[0], 2) # cc should be direct_calls + + +@skip_if_not_supported +@unittest.skipIf( + sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, + "Test only runs on Linux with process_vm_readv support", +) +class TestRecursiveFunctionProfiling(unittest.TestCase): + """Test profiling of recursive functions and complex call patterns.""" + + def test_recursive_function_call_counting(self): + """Test that recursive function calls are counted correctly.""" + collector = PstatsCollector(sample_interval_usec=1000) + + # Simulate a recursive call pattern: fibonacci(5) calling itself + recursive_frames = [ + ( + 1, + [ # First sample: deep in recursion + MockFrameInfo("fib.py", 10, "fibonacci"), + MockFrameInfo("fib.py", 10, "fibonacci"), # recursive call + MockFrameInfo( + "fib.py", 10, "fibonacci" + ), # deeper recursion + MockFrameInfo("fib.py", 10, "fibonacci"), # even deeper + MockFrameInfo("main.py", 5, "main"), # main caller + ], + ), + ( + 1, + [ # Second sample: different recursion depth + MockFrameInfo("fib.py", 10, "fibonacci"), + MockFrameInfo("fib.py", 10, "fibonacci"), # recursive call + MockFrameInfo("main.py", 5, "main"), # main caller + ], + ), + ( + 1, + [ # Third sample: back to deeper recursion + MockFrameInfo("fib.py", 10, "fibonacci"), + MockFrameInfo("fib.py", 10, "fibonacci"), + MockFrameInfo("fib.py", 10, "fibonacci"), + MockFrameInfo("main.py", 5, "main"), + ], + ), + ] + + for frames in recursive_frames: + collector.collect([frames]) + + collector.create_stats() + + # Check that recursive calls are counted properly + fib_key = ("fib.py", 10, "fibonacci") + main_key = ("main.py", 5, "main") + + self.assertIn(fib_key, collector.stats) + self.assertIn(main_key, collector.stats) + + # Fibonacci should have many calls due to recursion + fib_stats = collector.stats[fib_key] + direct_calls, cumulative_calls, tt, ct, callers = fib_stats + + # Should have recorded multiple calls (9 total appearances in samples) + self.assertEqual(cumulative_calls, 9) + self.assertGreater(tt, 0) # Should have some total time + self.assertGreater(ct, 0) # Should have some cumulative time + + # Main should have fewer calls + main_stats = collector.stats[main_key] + main_direct_calls, main_cumulative_calls = main_stats[0], main_stats[1] + self.assertEqual(main_direct_calls, 0) # Never directly executing + self.assertEqual(main_cumulative_calls, 3) # Appears in all 3 samples + + def test_nested_function_hierarchy(self): + """Test profiling of deeply nested function calls.""" + collector = PstatsCollector(sample_interval_usec=1000) + + # Simulate a deep call hierarchy + deep_call_frames = [ + ( + 1, + [ + MockFrameInfo("level1.py", 10, "level1_func"), + MockFrameInfo("level2.py", 20, "level2_func"), + MockFrameInfo("level3.py", 30, "level3_func"), + MockFrameInfo("level4.py", 40, "level4_func"), + MockFrameInfo("level5.py", 50, "level5_func"), + MockFrameInfo("main.py", 5, "main"), + ], + ), + ( + 1, + [ # Same hierarchy sampled again + MockFrameInfo("level1.py", 10, "level1_func"), + MockFrameInfo("level2.py", 20, "level2_func"), + MockFrameInfo("level3.py", 30, "level3_func"), + MockFrameInfo("level4.py", 40, "level4_func"), + MockFrameInfo("level5.py", 50, "level5_func"), + MockFrameInfo("main.py", 5, "main"), + ], + ), + ] + + for frames in deep_call_frames: + collector.collect([frames]) + + collector.create_stats() + + # All levels should be recorded + for level in range(1, 6): + key = (f"level{level}.py", level * 10, f"level{level}_func") + self.assertIn(key, collector.stats) + + stats = collector.stats[key] + direct_calls, cumulative_calls, tt, ct, callers = stats + + # Each level should appear in stack twice (2 samples) + self.assertEqual(cumulative_calls, 2) + + # Only level1 (deepest) should have direct calls + if level == 1: + self.assertEqual(direct_calls, 2) + else: + self.assertEqual(direct_calls, 0) + + # Deeper levels should have lower cumulative time than higher levels + # (since they don't include time from functions they call) + if level == 1: # Deepest level with most time + self.assertGreater(ct, 0) + + def test_alternating_call_patterns(self): + """Test profiling with alternating call patterns.""" + collector = PstatsCollector(sample_interval_usec=1000) + + # Simulate alternating execution paths + pattern_frames = [ + # Pattern A: path through func_a + ( + 1, + [ + MockFrameInfo("module.py", 10, "func_a"), + MockFrameInfo("module.py", 30, "shared_func"), + MockFrameInfo("main.py", 5, "main"), + ], + ), + # Pattern B: path through func_b + ( + 1, + [ + MockFrameInfo("module.py", 20, "func_b"), + MockFrameInfo("module.py", 30, "shared_func"), + MockFrameInfo("main.py", 5, "main"), + ], + ), + # Pattern A again + ( + 1, + [ + MockFrameInfo("module.py", 10, "func_a"), + MockFrameInfo("module.py", 30, "shared_func"), + MockFrameInfo("main.py", 5, "main"), + ], + ), + # Pattern B again + ( + 1, + [ + MockFrameInfo("module.py", 20, "func_b"), + MockFrameInfo("module.py", 30, "shared_func"), + MockFrameInfo("main.py", 5, "main"), + ], + ), + ] + + for frames in pattern_frames: + collector.collect([frames]) + + collector.create_stats() + + # Check that both paths are recorded equally + func_a_key = ("module.py", 10, "func_a") + func_b_key = ("module.py", 20, "func_b") + shared_key = ("module.py", 30, "shared_func") + main_key = ("main.py", 5, "main") + + # func_a and func_b should each be directly executing twice + self.assertEqual(collector.stats[func_a_key][0], 2) # direct_calls + self.assertEqual(collector.stats[func_a_key][1], 2) # cumulative_calls + self.assertEqual(collector.stats[func_b_key][0], 2) # direct_calls + self.assertEqual(collector.stats[func_b_key][1], 2) # cumulative_calls + + # shared_func should appear in all samples (4 times) but never directly executing + self.assertEqual(collector.stats[shared_key][0], 0) # direct_calls + self.assertEqual(collector.stats[shared_key][1], 4) # cumulative_calls + + # main should appear in all samples but never directly executing + self.assertEqual(collector.stats[main_key][0], 0) # direct_calls + self.assertEqual(collector.stats[main_key][1], 4) # cumulative_calls + + def test_collapsed_stack_with_recursion(self): + """Test collapsed stack collector with recursive patterns.""" + collector = CollapsedStackCollector() + + # Recursive call pattern + recursive_frames = [ + ( + 1, + [ + ("factorial.py", 10, "factorial"), + ("factorial.py", 10, "factorial"), # recursive + ("factorial.py", 10, "factorial"), # deeper + ("main.py", 5, "main"), + ], + ), + ( + 1, + [ + ("factorial.py", 10, "factorial"), + ("factorial.py", 10, "factorial"), # different depth + ("main.py", 5, "main"), + ], + ), + ] + + for frames in recursive_frames: + collector.collect([frames]) + + # Should capture both call trees + self.assertEqual(len(collector.call_trees), 2) + + # First tree should be longer (deeper recursion) + tree1 = collector.call_trees[0] + tree2 = collector.call_trees[1] + + # Trees should be different lengths due to different recursion depths + self.assertNotEqual(len(tree1), len(tree2)) + + # Both should contain factorial calls + self.assertTrue(any("factorial" in str(frame) for frame in tree1)) + self.assertTrue(any("factorial" in str(frame) for frame in tree2)) + + # Function samples should count all occurrences + factorial_key = ("factorial.py", 10, "factorial") + main_key = ("main.py", 5, "main") + + # factorial appears 5 times total (3 + 2) + self.assertEqual(collector.function_samples[factorial_key], 5) + # main appears 2 times total + self.assertEqual(collector.function_samples[main_key], 2) + + +@requires_subprocess() +@skip_if_not_supported +class TestSampleProfilerIntegration(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.test_script = ''' +import time +import os + +def slow_fibonacci(n): + """Recursive fibonacci - should show up prominently in profiler.""" + if n <= 1: + return n + return slow_fibonacci(n-1) + slow_fibonacci(n-2) + +def cpu_intensive_work(): + """CPU intensive work that should show in profiler.""" + result = 0 + for i in range(10000): + result += i * i + if i % 100 == 0: + result = result % 1000000 + return result + +def medium_computation(): + """Medium complexity function.""" + result = 0 + for i in range(100): + result += i * i + return result + +def fast_loop(): + """Fast simple loop.""" + total = 0 + for i in range(50): + total += i + return total + +def nested_calls(): + """Test nested function calls.""" + def level1(): + def level2(): + return medium_computation() + return level2() + return level1() + +def main_loop(): + """Main test loop with different execution paths.""" + iteration = 0 + + while True: + iteration += 1 + + # Different execution paths - focus on CPU intensive work + if iteration % 3 == 0: + # Very CPU intensive + result = cpu_intensive_work() + elif iteration % 5 == 0: + # Expensive recursive operation + result = slow_fibonacci(12) + else: + # Medium operation + result = nested_calls() + + # No sleep - keep CPU busy + +if __name__ == "__main__": + main_loop() +''' + + def test_sampling_basic_functionality(self): + with ( + test_subprocess(self.test_script) as proc, + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profile.sample.sample( + proc.pid, + duration_sec=2, + sample_interval_usec=1000, # 1ms + show_summary=False, + ) + except PermissionError: + self.skipTest("Insufficient permissions for remote profiling") + + output = captured_output.getvalue() + + # Basic checks on output + self.assertIn("Captured", output) + self.assertIn("samples", output) + self.assertIn("Profile Stats", output) + + # Should see some of our test functions + self.assertIn("slow_fibonacci", output) + + def test_sampling_with_pstats_export(self): + pstats_out = tempfile.NamedTemporaryFile( + suffix=".pstats", delete=False + ) + self.addCleanup(close_and_unlink, pstats_out) + + with test_subprocess(self.test_script) as proc: + # Suppress profiler output when testing file export + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profile.sample.sample( + proc.pid, + duration_sec=1, + filename=pstats_out.name, + sample_interval_usec=10000, + ) + except PermissionError: + self.skipTest( + "Insufficient permissions for remote profiling" + ) + + # Verify file was created and contains valid data + self.assertTrue(os.path.exists(pstats_out.name)) + self.assertGreater(os.path.getsize(pstats_out.name), 0) + + # Try to load the stats file + with open(pstats_out.name, "rb") as f: + stats_data = marshal.load(f) + + # Should be a dictionary with the sampled marker + self.assertIsInstance(stats_data, dict) + self.assertIn(("__sampled__",), stats_data) + self.assertTrue(stats_data[("__sampled__",)]) + + # Should have some function data + function_entries = [ + k for k in stats_data.keys() if k != ("__sampled__",) + ] + self.assertGreater(len(function_entries), 0) + + def test_sampling_with_collapsed_export(self): + collapsed_file = tempfile.NamedTemporaryFile( + suffix=".txt", delete=False + ) + self.addCleanup(close_and_unlink, collapsed_file) + + with ( + test_subprocess(self.test_script) as proc, + ): + # Suppress profiler output when testing file export + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profile.sample.sample( + proc.pid, + duration_sec=1, + filename=collapsed_file.name, + output_format="collapsed", + sample_interval_usec=10000, + ) + except PermissionError: + self.skipTest( + "Insufficient permissions for remote profiling" + ) + + # Verify file was created and contains valid data + self.assertTrue(os.path.exists(collapsed_file.name)) + self.assertGreater(os.path.getsize(collapsed_file.name), 0) + + # Check file format + with open(collapsed_file.name, "r") as f: + content = f.read() + + lines = content.strip().split("\n") + self.assertGreater(len(lines), 0) + + # Each line should have format: stack_trace count + for line in lines: + parts = line.rsplit(" ", 1) + self.assertEqual(len(parts), 2) + + stack_trace, count_str = parts + self.assertGreater(len(stack_trace), 0) + self.assertTrue(count_str.isdigit()) + self.assertGreater(int(count_str), 0) + + # Stack trace should contain semicolon-separated entries + if ";" in stack_trace: + stack_parts = stack_trace.split(";") + for part in stack_parts: + # Each part should be file:function:line + self.assertIn(":", part) + + def test_sampling_all_threads(self): + with ( + test_subprocess(self.test_script) as proc, + # Suppress profiler output + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profile.sample.sample( + proc.pid, + duration_sec=1, + all_threads=True, + sample_interval_usec=10000, + show_summary=False, + ) + except PermissionError: + self.skipTest("Insufficient permissions for remote profiling") + + # Just verify that sampling completed without error + # We're not testing output format here + + +@skip_if_not_supported +@unittest.skipIf( + sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, + "Test only runs on Linux with process_vm_readv support", +) +class TestSampleProfilerErrorHandling(unittest.TestCase): + def test_invalid_pid(self): + with self.assertRaises((OSError, RuntimeError)): + profile.sample.sample(-1, duration_sec=1) + + def test_process_dies_during_sampling(self): + with test_subprocess("import time; time.sleep(0.5); exit()") as proc: + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profile.sample.sample( + proc.pid, + duration_sec=2, # Longer than process lifetime + sample_interval_usec=50000, + ) + except PermissionError: + self.skipTest( + "Insufficient permissions for remote profiling" + ) + + output = captured_output.getvalue() + + self.assertIn("Error rate", output) + + def test_invalid_output_format(self): + with self.assertRaises(ValueError): + profile.sample.sample( + os.getpid(), + duration_sec=1, + output_format="invalid_format", + ) + + def test_invalid_output_format_with_mocked_profiler(self): + """Test invalid output format with proper mocking to avoid permission issues.""" + with mock.patch( + "profile.sample.SampleProfiler" + ) as mock_profiler_class: + mock_profiler = mock.MagicMock() + mock_profiler_class.return_value = mock_profiler + + with self.assertRaises(ValueError) as cm: + profile.sample.sample( + 12345, + duration_sec=1, + output_format="unknown_format", + ) + + # Should raise ValueError with the invalid format name + self.assertIn( + "Invalid output format: unknown_format", str(cm.exception) + ) + + def test_is_process_running(self): + with test_subprocess("import time; time.sleep(1000)") as proc: + try: + profiler = SampleProfiler(pid=proc.pid, sample_interval_usec=1000, all_threads=False) + except PermissionError: + self.skipTest( + "Insufficient permissions to read the stack trace" + ) + self.assertTrue(profiler._is_process_running()) + self.assertIsNotNone(profiler.unwinder.get_stack_trace()) + proc.kill() + proc.wait() + # ValueError on MacOS (yeah I know), ProcessLookupError on Linux and Windows + self.assertRaises((ValueError, ProcessLookupError), profiler.unwinder.get_stack_trace) + + # Exit the context manager to ensure the process is terminated + self.assertFalse(profiler._is_process_running()) + self.assertRaises((ValueError, ProcessLookupError), profiler.unwinder.get_stack_trace) + + @unittest.skipUnless(sys.platform == "linux", "Only valid on Linux") + def test_esrch_signal_handling(self): + with test_subprocess("import time; time.sleep(1000)") as proc: + try: + unwinder = _remote_debugging.RemoteUnwinder(proc.pid) + except PermissionError: + self.skipTest( + "Insufficient permissions to read the stack trace" + ) + initial_trace = unwinder.get_stack_trace() + self.assertIsNotNone(initial_trace) + + proc.kill() + + # Wait for the process to die and try to get another trace + proc.wait() + + with self.assertRaises(ProcessLookupError): + unwinder.get_stack_trace() + + + +class TestSampleProfilerCLI(unittest.TestCase): + def test_cli_collapsed_format_validation(self): + """Test that CLI properly validates incompatible options with collapsed format.""" + test_cases = [ + # Test sort options are invalid with collapsed + ( + ["profile.sample", "--collapsed", "--sort-nsamples", "12345"], + "sort", + ), + ( + ["profile.sample", "--collapsed", "--sort-tottime", "12345"], + "sort", + ), + ( + [ + "profile.sample", + "--collapsed", + "--sort-cumtime", + "12345", + ], + "sort", + ), + ( + [ + "profile.sample", + "--collapsed", + "--sort-sample-pct", + "12345", + ], + "sort", + ), + ( + [ + "profile.sample", + "--collapsed", + "--sort-cumul-pct", + "12345", + ], + "sort", + ), + ( + ["profile.sample", "--collapsed", "--sort-name", "12345"], + "sort", + ), + # Test limit option is invalid with collapsed + (["profile.sample", "--collapsed", "-l", "20", "12345"], "limit"), + ( + ["profile.sample", "--collapsed", "--limit", "20", "12345"], + "limit", + ), + # Test no-summary option is invalid with collapsed + ( + ["profile.sample", "--collapsed", "--no-summary", "12345"], + "summary", + ), + ] + + for test_args, expected_error_keyword in test_cases: + with ( + mock.patch("sys.argv", test_args), + mock.patch("sys.stderr", io.StringIO()) as mock_stderr, + self.assertRaises(SystemExit) as cm, + ): + profile.sample.main() + + self.assertEqual(cm.exception.code, 2) # argparse error code + error_msg = mock_stderr.getvalue() + self.assertIn("error:", error_msg) + self.assertIn("--pstats format", error_msg) + + def test_cli_default_collapsed_filename(self): + """Test that collapsed format gets a default filename when not specified.""" + test_args = ["profile.sample", "--collapsed", "12345"] + + with ( + mock.patch("sys.argv", test_args), + mock.patch("profile.sample.sample") as mock_sample, + ): + profile.sample.main() + + # Check that filename was set to default collapsed format + mock_sample.assert_called_once() + call_args = mock_sample.call_args[1] + self.assertEqual(call_args["output_format"], "collapsed") + self.assertEqual(call_args["filename"], "collapsed.12345.txt") + + def test_cli_custom_output_filenames(self): + """Test custom output filenames for both formats.""" + test_cases = [ + ( + ["profile.sample", "--pstats", "-o", "custom.pstats", "12345"], + "custom.pstats", + "pstats", + ), + ( + ["profile.sample", "--collapsed", "-o", "custom.txt", "12345"], + "custom.txt", + "collapsed", + ), + ] + + for test_args, expected_filename, expected_format in test_cases: + with ( + mock.patch("sys.argv", test_args), + mock.patch("profile.sample.sample") as mock_sample, + ): + profile.sample.main() + + mock_sample.assert_called_once() + call_args = mock_sample.call_args[1] + self.assertEqual(call_args["filename"], expected_filename) + self.assertEqual(call_args["output_format"], expected_format) + + def test_cli_missing_required_arguments(self): + """Test that CLI requires PID argument.""" + with ( + mock.patch("sys.argv", ["profile.sample"]), + mock.patch("sys.stderr", io.StringIO()), + ): + with self.assertRaises(SystemExit): + profile.sample.main() + + def test_cli_mutually_exclusive_format_options(self): + """Test that pstats and collapsed options are mutually exclusive.""" + with ( + mock.patch( + "sys.argv", + ["profile.sample", "--pstats", "--collapsed", "12345"], + ), + mock.patch("sys.stderr", io.StringIO()), + ): + with self.assertRaises(SystemExit): + profile.sample.main() + + def test_argument_parsing_basic(self): + test_args = ["profile.sample", "12345"] + + with ( + mock.patch("sys.argv", test_args), + mock.patch("profile.sample.sample") as mock_sample, + ): + profile.sample.main() + + mock_sample.assert_called_once_with( + 12345, + sample_interval_usec=100, + duration_sec=10, + filename=None, + all_threads=False, + limit=15, + sort=2, + show_summary=True, + output_format="pstats", + realtime_stats=False, + ) + + def test_sort_options(self): + sort_options = [ + ("--sort-nsamples", 0), + ("--sort-tottime", 1), + ("--sort-cumtime", 2), + ("--sort-sample-pct", 3), + ("--sort-cumul-pct", 4), + ("--sort-name", -1), + ] + + for option, expected_sort_value in sort_options: + test_args = ["profile.sample", option, "12345"] + + with ( + mock.patch("sys.argv", test_args), + mock.patch("profile.sample.sample") as mock_sample, + ): + profile.sample.main() + + mock_sample.assert_called_once() + call_args = mock_sample.call_args[1] + self.assertEqual( + call_args["sort"], + expected_sort_value, + ) + mock_sample.reset_mock() + + +if __name__ == "__main__": + unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index 66b34b779f27cb..7fea799c3912dd 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2530,6 +2530,7 @@ LIBSUBDIRS= asyncio \ logging \ multiprocessing multiprocessing/dummy \ pathlib \ + profile \ pydoc_data \ re \ site-packages \ diff --git a/Misc/NEWS.d/next/Library/2025-07-06-18-38-10.gh-issue-135953.Z29DCz.rst b/Misc/NEWS.d/next/Library/2025-07-06-18-38-10.gh-issue-135953.Z29DCz.rst new file mode 100644 index 00000000000000..7e32388ed791cb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-06-18-38-10.gh-issue-135953.Z29DCz.rst @@ -0,0 +1,9 @@ +Implement a new high-frequency runtime profiler that leverages the existing +remote debugging functionality to collect detailed execution statistics +from running Python processes. This tool is exposed in the +``profile.sample`` module and enables non-intrusive observation of +production applications by attaching to already-running processes without +requiring any code modifications, restarts, or special startup flags. The +observer can perform extremely high-frequency sampling of stack traces and +interpreter state, providing detailed runtime execution analysis of live +applications. diff --git a/Python/remote_debug.h b/Python/remote_debug.h index 8f9b6cd4c4960f..5324a7aaa6f5e5 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -751,6 +751,14 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c #ifdef MS_WINDOWS +static int is_process_alive(HANDLE hProcess) { + DWORD exitCode; + if (GetExitCodeProcess(hProcess, &exitCode)) { + return exitCode == STILL_ACTIVE; + } + return 0; +} + static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* secname) { HANDLE hFile = CreateFileW(mod_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hFile == INVALID_HANDLE_VALUE) { @@ -911,7 +919,9 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) _PyErr_ChainExceptions1(exc); } #else - Py_UNREACHABLE(); + _set_debug_exception_cause(PyExc_RuntimeError, + "Reading the PyRuntime section is not supported on this platform"); + return 0; #endif return address; @@ -981,6 +991,13 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address SIZE_T result = 0; do { if (!ReadProcessMemory(handle->hProcess, (LPCVOID)(remote_address + result), (char*)dst + result, len - result, &read_bytes)) { + // Check if the process is still alive: we need to be able to tell our caller + // that the process is dead and not just that the read failed. + if (!is_process_alive(handle->hProcess)) { + _set_errno(ESRCH); + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } PyErr_SetFromWindowsErr(0); DWORD error = GetLastError(); _set_debug_exception_cause(PyExc_OSError, @@ -1013,6 +1030,9 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address return read_remote_memory_fallback(handle, remote_address, len, dst); } PyErr_SetFromErrno(PyExc_OSError); + if (errno == ESRCH) { + return -1; + } _set_debug_exception_cause(PyExc_OSError, "process_vm_readv failed for PID %d at address 0x%lx " "(size %zu, partial read %zd bytes): %s", From c560df9658f1a24edea995fe6f9c84c55b37cfb3 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Thu, 10 Jul 2025 14:13:23 -0700 Subject: [PATCH 846/989] gh-136517: Print uncollectable objects if DEBUG_UNCOLLECTABLE mode was set (#136518) --- Lib/test/test_gc.py | 5 +++++ .../2025-07-10-23-23-50.gh-issue-136517._NHJyv.rst | 2 ++ Python/gc.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-10-23-23-50.gh-issue-136517._NHJyv.rst diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 85c43055d0dcec..96ebb23f73de9d 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -732,6 +732,9 @@ def run_command(code): self.assertIn(b"ResourceWarning: gc: 2 uncollectable objects at " b"shutdown; use", stderr) self.assertNotIn(b"", stderr) + one_line_re = b"gc: uncollectable " + expected_re = one_line_re + b"\r?\n" + one_line_re + self.assertNotRegex(stderr, expected_re) # With DEBUG_UNCOLLECTABLE, the garbage list gets printed stderr = run_command(code % "gc.DEBUG_UNCOLLECTABLE") self.assertIn(b"ResourceWarning: gc: 2 uncollectable objects at " @@ -739,6 +742,8 @@ def run_command(code): self.assertTrue( (b"[, ]" in stderr) or (b"[, ]" in stderr), stderr) + # we expect two lines with uncollectable objects + self.assertRegex(stderr, expected_re) # With DEBUG_SAVEALL, no additional message should get printed # (because gc.garbage also contains normally reclaimable cyclic # references, and its elements get printed at runtime anyway). diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-10-23-23-50.gh-issue-136517._NHJyv.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-10-23-23-50.gh-issue-136517._NHJyv.rst new file mode 100644 index 00000000000000..bf26c4eb0e4c74 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-10-23-23-50.gh-issue-136517._NHJyv.rst @@ -0,0 +1,2 @@ +Fixed a typo that prevented printing of uncollectable objects when the +:const:`gc.DEBUG_UNCOLLECTABLE` mode was set. diff --git a/Python/gc.c b/Python/gc.c index 88849a43680d2e..4160f68c27a3ef 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1782,7 +1782,7 @@ gc_collect_region(PyThreadState *tstate, Py_ssize_t n = 0; for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { n++; - if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); } stats->uncollectable = n; From 56c6f04b8862c20ff6eddc4400f170ad91e55f66 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 10 Jul 2025 15:08:48 -0700 Subject: [PATCH 847/989] Omit `Python/perf_jit_trampoline.c` from the `**/*jit*` CODEOWNERS rule (#136519) Omit perf_jit_trampoline from "JIT" codeowners --- .github/CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 98efdb65146374..02a7b5d45b4627 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -27,6 +27,7 @@ Modules/Setup* @erlend-aasland **/*genobject* @markshannon **/*hamt* @1st1 **/*jit* @brandtbucher @savannahostrowski @diegorusso +Python/perf_jit_trampoline.c # Exclude the owners of "**/*jit*", above. Objects/set* @rhettinger Objects/dict* @methane @markshannon Objects/typevarobject.c @JelleZijlstra From 49365bd110a254a6a304d2f880482bfeb2e4490d Mon Sep 17 00:00:00 2001 From: Ethan Furman Date: Thu, 10 Jul 2025 16:49:09 -0700 Subject: [PATCH 848/989] gh-107538: [Enum] fix handling of inverted/negative values (GH-132273) * Fix flag mask inversion when unnamed flags exist. For example: class Flag(enum.Flag): A = 0x01 B = 0x02 MASK = 0xff ~Flag.MASK is Flag(0) * EJECT and KEEP flags (IntEnum is KEEP) use direct value. * correct Flag inversion to only flip flag bits IntFlag will flip all bits -- this only makes a difference in flag sets with missing values. * correct negative assigned values in flags negative values are no longer used as-is, but become inverted; i.e. class Y(self.enum_type): A = auto() B = auto() C = ~A # aka ~1 aka 0b1 110 (from enum.bin()) aka 6 D = auto() assert Y.C. is Y.B|Y.D --- Lib/enum.py | 30 ++++++++++++++- Lib/test/test_enum.py | 37 +++++++++++++++++++ ...-07-05-14-34-10.gh-issue-105497.HU5u89.rst | 1 + ...-04-08-07-25-10.gh-issue-107583.JGfbhq.rst | 4 ++ 4 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-07-05-14-34-10.gh-issue-105497.HU5u89.rst create mode 100644 Misc/NEWS.d/next/Library/2025-04-08-07-25-10.gh-issue-107583.JGfbhq.rst diff --git a/Lib/enum.py b/Lib/enum.py index 01fecca3e5aac0..538b9cc8e96fc1 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -535,7 +535,7 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k # now set the __repr__ for the value classdict['_value_repr_'] = metacls._find_data_repr_(cls, bases) # - # Flag structures (will be removed if final class is not a Flag + # Flag structures (will be removed if final class is not a Flag) classdict['_boundary_'] = ( boundary or getattr(first_enum, '_boundary_', None) @@ -544,6 +544,29 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k classdict['_singles_mask_'] = 0 classdict['_all_bits_'] = 0 classdict['_inverted_'] = None + # check for negative flag values and invert if found (using _proto_members) + if Flag is not None and bases and issubclass(bases[-1], Flag): + bits = 0 + inverted = [] + for n in member_names: + p = classdict[n] + if isinstance(p.value, int): + if p.value < 0: + inverted.append(p) + else: + bits |= p.value + elif p.value is None: + pass + elif isinstance(p.value, tuple) and p.value and isinstance(p.value[0], int): + if p.value[0] < 0: + inverted.append(p) + else: + bits |= p.value[0] + for p in inverted: + if isinstance(p.value, int): + p.value = bits & p.value + else: + p.value = (bits & p.value[0], ) + p.value[1:] try: classdict['_%s__in_progress' % cls] = True enum_class = super().__new__(metacls, cls, bases, classdict, **kwds) @@ -1487,7 +1510,10 @@ def _missing_(cls, value): ) if value < 0: neg_value = value - value = all_bits + 1 + value + if cls._boundary_ in (EJECT, KEEP): + value = all_bits + 1 + value + else: + value = singles_mask & value # get members and unknown unknown = value & ~flag_mask aliases = value & ~singles_mask diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index bbc7630fa83f45..2dd585f246d506 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -1002,12 +1002,18 @@ class OpenAB(self.enum_type): self.assertIs(~(A|B), OpenAB(252)) self.assertIs(~AB_MASK, OpenAB(0)) self.assertIs(~OpenAB(0), AB_MASK) + self.assertIs(OpenAB(~4), OpenAB(251)) else: self.assertIs(~A, B) self.assertIs(~B, A) + self.assertIs(OpenAB(~1), B) + self.assertIs(OpenAB(~2), A) self.assertIs(~(A|B), OpenAB(0)) self.assertIs(~AB_MASK, OpenAB(0)) self.assertIs(~OpenAB(0), (A|B)) + self.assertIs(OpenAB(~3), OpenAB(0)) + self.assertIs(OpenAB(~4), OpenAB(3)) + self.assertIs(OpenAB(~33), B) # class OpenXYZ(self.enum_type): X = 4 @@ -1031,6 +1037,9 @@ class OpenXYZ(self.enum_type): self.assertIs(~X, Y|Z) self.assertIs(~Y, X|Z) self.assertIs(~Z, X|Y) + self.assertIs(OpenXYZ(~4), Y|Z) + self.assertIs(OpenXYZ(~2), X|Z) + self.assertIs(OpenXYZ(~1), X|Y) self.assertIs(~(X|Y), Z) self.assertIs(~(X|Z), Y) self.assertIs(~(Y|Z), X) @@ -1038,6 +1047,28 @@ class OpenXYZ(self.enum_type): self.assertIs(~XYZ_MASK, OpenXYZ(0)) self.assertTrue(~OpenXYZ(0), (X|Y|Z)) + def test_assigned_negative_value(self): + class X(self.enum_type): + A = auto() + B = auto() + C = A | B + D = ~A + self.assertEqual(list(X), [X.A, X.B]) + self.assertIs(~X.A, X.B) + self.assertIs(X.D, X.B) + self.assertEqual(X.D.value, 2) + # + class Y(self.enum_type): + A = auto() + B = auto() + C = A | B + D = ~A + E = auto() + self.assertEqual(list(Y), [Y.A, Y.B, Y.E]) + self.assertIs(~Y.A, Y.B|Y.E) + self.assertIs(Y.D, Y.B|Y.E) + self.assertEqual(Y.D.value, 6) + class TestPlainEnumClass(_EnumTests, _PlainOutputTests, unittest.TestCase): enum_type = Enum @@ -3680,6 +3711,8 @@ class SkipFlag(enum.Flag): C = 4 | B # self.assertTrue(SkipFlag.C in (SkipFlag.A|SkipFlag.C)) + self.assertTrue(SkipFlag.B in SkipFlag.C) + self.assertIs(SkipFlag(~1), SkipFlag.B) self.assertRaisesRegex(ValueError, 'SkipFlag.. invalid value 42', SkipFlag, 42) # class SkipIntFlag(enum.IntFlag): @@ -3688,6 +3721,8 @@ class SkipIntFlag(enum.IntFlag): C = 4 | B # self.assertTrue(SkipIntFlag.C in (SkipIntFlag.A|SkipIntFlag.C)) + self.assertTrue(SkipIntFlag.B in SkipIntFlag.C) + self.assertIs(SkipIntFlag(~1), SkipIntFlag.B|SkipIntFlag.C) self.assertEqual(SkipIntFlag(42).value, 42) # class MethodHint(Flag): @@ -4727,6 +4762,8 @@ class Color(Flag): BLUE = 4 WHITE = -1 # no error means success + self.assertEqual(list(Color.WHITE), [Color.RED, Color.GREEN, Color.BLUE]) + self.assertEqual(Color.WHITE.value, 7) class TestInternals(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2023-07-05-14-34-10.gh-issue-105497.HU5u89.rst b/Misc/NEWS.d/next/Library/2023-07-05-14-34-10.gh-issue-105497.HU5u89.rst new file mode 100644 index 00000000000000..f4f2db08f73f50 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-07-05-14-34-10.gh-issue-105497.HU5u89.rst @@ -0,0 +1 @@ +Fix flag mask inversion when unnamed flags exist. diff --git a/Misc/NEWS.d/next/Library/2025-04-08-07-25-10.gh-issue-107583.JGfbhq.rst b/Misc/NEWS.d/next/Library/2025-04-08-07-25-10.gh-issue-107583.JGfbhq.rst new file mode 100644 index 00000000000000..4235612627341b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-08-07-25-10.gh-issue-107583.JGfbhq.rst @@ -0,0 +1,4 @@ +Fix :class:`!Flag` inversion when flag set has missing values +(:class:`!IntFlag` still flips all bits); fix negative assigned values +during flag creation (both :class:`!Flag` and :class:`!IntFlag` ignore +missing values). From 515b3d18edfce00ce441e0e41091d0dd57f201ad Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Fri, 11 Jul 2025 17:37:01 +0800 Subject: [PATCH 849/989] gh-101100: Fix sphinx warnings in `library/email.parser.rst` (#136475) --- Doc/library/email.parser.rst | 10 +++++----- Doc/tools/.nitignore | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Doc/library/email.parser.rst b/Doc/library/email.parser.rst index 439b5c8f34b65a..90796370ebb407 100644 --- a/Doc/library/email.parser.rst +++ b/Doc/library/email.parser.rst @@ -48,8 +48,8 @@ methods. FeedParser API ^^^^^^^^^^^^^^ -The :class:`BytesFeedParser`, imported from the :mod:`email.feedparser` module, -provides an API that is conducive to incremental parsing of email messages, +The :class:`BytesFeedParser`, imported from the :mod:`email.parser.FeedParser` +module, provides an API that is conducive to incremental parsing of email messages, such as would be necessary when reading the text of an email message from a source that can block (such as a socket). The :class:`BytesFeedParser` can of course be used to parse an email message fully contained in a :term:`bytes-like @@ -116,7 +116,7 @@ Here is the API for the :class:`BytesFeedParser`: Works like :class:`BytesFeedParser` except that the input to the :meth:`~BytesFeedParser.feed` method must be a string. This is of limited utility, since the only way for such a message to be valid is for it to - contain only ASCII text or, if :attr:`~email.policy.Policy.utf8` is + contain only ASCII text or, if :attr:`~email.policy.EmailPolicy.utf8` is ``True``, no binary attachments. .. versionchanged:: 3.3 Added the *policy* keyword. @@ -155,11 +155,11 @@ message body, instead setting the payload to the raw body. Read all the data from the binary file-like object *fp*, parse the resulting bytes, and return the message object. *fp* must support - both the :meth:`~io.IOBase.readline` and the :meth:`~io.IOBase.read` + both the :meth:`~io.IOBase.readline` and the :meth:`~io.TextIOBase.read` methods. The bytes contained in *fp* must be formatted as a block of :rfc:`5322` - (or, if :attr:`~email.policy.Policy.utf8` is ``True``, :rfc:`6532`) + (or, if :attr:`~email.policy.EmailPolicy.utf8` is ``True``, :rfc:`6532`) style headers and header continuation lines, optionally preceded by an envelope header. The header block is terminated either by the end of the data or by a blank line. Following the header block is the body of the diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 4f5396857f3024..897b404b8d6bb3 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -15,7 +15,6 @@ Doc/extending/extending.rst Doc/library/ast.rst Doc/library/asyncio-extending.rst Doc/library/email.charset.rst -Doc/library/email.parser.rst Doc/library/functools.rst Doc/library/http.cookiejar.rst Doc/library/http.server.rst From 975b57d945c84000949f241ded8f44413ecc6217 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Fri, 11 Jul 2025 12:50:21 +0200 Subject: [PATCH 850/989] gh-76637: Note that `undefined` Codec is for testing (#136531) Closes #76637 --- Doc/library/codecs.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 37bd913b765d21..c5dae7c8e8fd04 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1395,7 +1395,11 @@ encodings. | | | It is used in the Python | | | | pickle protocol. | +--------------------+---------+---------------------------+ -| undefined | | Raise an exception for | +| undefined | | This Codec should only | +| | | be used for testing | +| | | purposes. | +| | | | +| | | Raise an exception for | | | | all conversions, even | | | | empty strings. The error | | | | handler is ignored. | From 3343fce05acb29a772599ce586abd43edf40bae6 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Fri, 11 Jul 2025 15:31:59 +0300 Subject: [PATCH 851/989] gh-136434: Fix docs generation of `UnboundItem` in subinterpreters (#136435) --- Lib/concurrent/interpreters/_crossinterp.py | 19 ++++++++++++------- ...-07-08-20-58-01.gh-issue-136434.uuJsjS.rst | 2 ++ 2 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-08-20-58-01.gh-issue-136434.uuJsjS.rst diff --git a/Lib/concurrent/interpreters/_crossinterp.py b/Lib/concurrent/interpreters/_crossinterp.py index f47eb693ac861c..a5f46b20fbb4c5 100644 --- a/Lib/concurrent/interpreters/_crossinterp.py +++ b/Lib/concurrent/interpreters/_crossinterp.py @@ -40,16 +40,21 @@ class UnboundItem: @classonly def singleton(cls, kind, module, name='UNBOUND'): - doc = cls.__doc__.replace('cross-interpreter container', kind) - doc = doc.replace('cross-interpreter', kind) + doc = cls.__doc__ + if doc: + doc = doc.replace( + 'cross-interpreter container', kind, + ).replace( + 'cross-interpreter', kind, + ) subclass = type( f'Unbound{kind.capitalize()}Item', (cls,), - dict( - _MODULE=module, - _NAME=name, - __doc__=doc, - ), + { + "_MODULE": module, + "_NAME": name, + "__doc__": doc, + }, ) return object.__new__(subclass) diff --git a/Misc/NEWS.d/next/Library/2025-07-08-20-58-01.gh-issue-136434.uuJsjS.rst b/Misc/NEWS.d/next/Library/2025-07-08-20-58-01.gh-issue-136434.uuJsjS.rst new file mode 100644 index 00000000000000..951f57100b650c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-08-20-58-01.gh-issue-136434.uuJsjS.rst @@ -0,0 +1,2 @@ +Fix docs generation of ``UnboundItem`` in :mod:`concurrent.interpreters` +when running with :option:`-OO`. From 7de8ea7be6c19f21c090f44a01817fab26c1f095 Mon Sep 17 00:00:00 2001 From: William S Fulton Date: Fri, 11 Jul 2025 14:18:35 +0100 Subject: [PATCH 852/989] gh-136300: Modify C tests to conform to PEP-737 (GH-136301) - Use %T format specifier instead of %s and Py_TYPE(x)->tp_name. - Remove legacy %.200s format specifier for truncating type names. Co-authored-by: Victor Stinner --- Modules/_testbuffer.c | 3 +-- Modules/_testcapi/monitoring.c | 2 +- Modules/_testcapi/time.c | 3 +-- Modules/_testcapimodule.c | 3 +-- Modules/_testinternalcapi.c | 3 +-- 5 files changed, 5 insertions(+), 9 deletions(-) diff --git a/Modules/_testbuffer.c b/Modules/_testbuffer.c index 7fc4d61db29469..d2e61e9d6acf24 100644 --- a/Modules/_testbuffer.c +++ b/Modules/_testbuffer.c @@ -1855,8 +1855,7 @@ ndarray_subscript(PyObject *op, PyObject *key) type_error: PyErr_Format(PyExc_TypeError, - "cannot index memory using \"%.200s\"", - Py_TYPE(key)->tp_name); + "cannot index memory using \"%T\"", key); err_occurred: Py_DECREF(nd); return NULL; diff --git a/Modules/_testcapi/monitoring.c b/Modules/_testcapi/monitoring.c index 08a2055c51bc0d..e041943492d61f 100644 --- a/Modules/_testcapi/monitoring.c +++ b/Modules/_testcapi/monitoring.c @@ -109,7 +109,7 @@ static PyTypeObject PyCodeLike_Type = { }; #define RAISE_UNLESS_CODELIKE(v) if (!Py_IS_TYPE((v), &PyCodeLike_Type)) { \ - PyErr_Format(PyExc_TypeError, "expected a code-like, got %s", Py_TYPE(v)->tp_name); \ + PyErr_Format(PyExc_TypeError, "expected a code-like, got %T", v); \ return NULL; \ } diff --git a/Modules/_testcapi/time.c b/Modules/_testcapi/time.c index 464cf5c3125012..4ca6ff587b95cc 100644 --- a/Modules/_testcapi/time.c +++ b/Modules/_testcapi/time.c @@ -5,8 +5,7 @@ static int pytime_from_nanoseconds(PyTime_t *tp, PyObject *obj) { if (!PyLong_Check(obj)) { - PyErr_Format(PyExc_TypeError, "expect int, got %s", - Py_TYPE(obj)->tp_name); + PyErr_Format(PyExc_TypeError, "expect int, got %T", obj); return -1; } diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 71fffedee146fa..334f2a53041ca4 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -515,8 +515,7 @@ test_thread_state(PyObject *self, PyObject *args) return NULL; if (!PyCallable_Check(fn)) { - PyErr_Format(PyExc_TypeError, "'%s' object is not callable", - Py_TYPE(fn)->tp_name); + PyErr_Format(PyExc_TypeError, "'%T' object is not callable", fn); return NULL; } diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 8027f0015c7409..533e7dd3a7ec00 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2207,8 +2207,7 @@ get_code(PyObject *obj) return (PyCodeObject *)PyFunction_GetCode(obj); } return (PyCodeObject *)PyErr_Format( - PyExc_TypeError, "expected function or code object, got %s", - Py_TYPE(obj)->tp_name); + PyExc_TypeError, "expected function or code object, got %T", obj); } static PyObject * From 236f733d8ffb3d587e1167fa0a0248c24512e7fd Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 11 Jul 2025 14:32:35 +0100 Subject: [PATCH 853/989] gh-136541: Fix several problems of perf trampolines in x86_64 and aarch64 (#136500) This commit fixes the following problems: * The x86_64 trampolines are not preserving frame pointers * The hardcoded offsets to the code segment from the FDE only worked properly for x64_64 * The CIE data was not following conventions of aarch64 * The eh_frame for aarch64 was not fully correct --- Include/internal/pycore_interp_structs.h | 1 + ...-07-11-13-45-48.gh-issue-136541.uZ_-Ju.rst | 3 + Python/asm_trampoline.S | 7 +- Python/perf_jit_trampoline.c | 163 ++++++++++++++---- Python/perf_trampoline.c | 15 +- 5 files changed, 148 insertions(+), 41 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-11-13-45-48.gh-issue-136541.uZ_-Ju.rst diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f1f427d99dea69..8c3946cd462ce6 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -73,6 +73,7 @@ struct trampoline_api_st { int (*free_state)(void* state); void *state; Py_ssize_t code_padding; + Py_ssize_t code_alignment; }; #endif diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-11-13-45-48.gh-issue-136541.uZ_-Ju.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-11-13-45-48.gh-issue-136541.uZ_-Ju.rst new file mode 100644 index 00000000000000..af9b94ad0613d7 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-11-13-45-48.gh-issue-136541.uZ_-Ju.rst @@ -0,0 +1,3 @@ +Fix some issues with the perf trampolines on x86-64 and aarch64. The +trampolines were not being generated correctly for some cases, which could +lead to the perf integration not working correctly. Patch by Pablo Galindo. diff --git a/Python/asm_trampoline.S b/Python/asm_trampoline.S index 616752459ba4d9..a14e68c0e81932 100644 --- a/Python/asm_trampoline.S +++ b/Python/asm_trampoline.S @@ -12,9 +12,10 @@ _Py_trampoline_func_start: #if defined(__CET__) && (__CET__ & 1) endbr64 #endif - sub $8, %rsp - call *%rcx - add $8, %rsp + push %rbp + mov %rsp, %rbp + call *%rcx + pop %rbp ret #endif // __x86_64__ #if defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index 2ca18c23593547..469882d9b2f025 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -97,10 +97,9 @@ * /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding] * /tmp/jitted-PID-1.so: [headers][.text][unwind_info][padding] * - * The padding size (0x100) is chosen to accommodate typical unwind info sizes - * while maintaining 16-byte alignment requirements. + * The padding size is now calculated automatically during initialization + * based on the actual unwind information requirements. */ -#define PERF_JIT_CODE_PADDING 0x100 /* Convenient access to the global trampoline API state */ #define trampoline_api _PyRuntime.ceval.perf.trampoline_api @@ -401,10 +400,12 @@ enum { DWRF_CFA_nop = 0x0, // No operation DWRF_CFA_offset_extended = 0x5, // Extended offset instruction DWRF_CFA_def_cfa = 0xc, // Define CFA rule + DWRF_CFA_def_cfa_register = 0xd, // Define CFA register DWRF_CFA_def_cfa_offset = 0xe, // Define CFA offset DWRF_CFA_offset_extended_sf = 0x11, // Extended signed offset DWRF_CFA_advance_loc = 0x40, // Advance location counter - DWRF_CFA_offset = 0x80 // Simple offset instruction + DWRF_CFA_offset = 0x80, // Simple offset instruction + DWRF_CFA_restore = 0xc0 // Restore register }; /* DWARF Exception Handling pointer encodings */ @@ -519,6 +520,7 @@ typedef struct ELFObjectContext { uint8_t* p; // Current write position in buffer uint8_t* startp; // Start of buffer (for offset calculations) uint8_t* eh_frame_p; // Start of EH frame data (for relative offsets) + uint8_t* fde_p; // Start of FDE data (for PC-relative calculations) uint32_t code_size; // Size of the code being described } ELFObjectContext; @@ -643,6 +645,8 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) { // DWARF EH FRAME GENERATION // ============================================================================= +static void elf_init_ehframe(ELFObjectContext* ctx); + /* * Initialize DWARF .eh_frame section for a code region * @@ -657,6 +661,23 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) { * Args: * ctx: ELF object context containing code size and buffer pointers */ +static size_t calculate_eh_frame_size(void) { + /* Calculate the EH frame size for the trampoline function */ + extern void *_Py_trampoline_func_start; + extern void *_Py_trampoline_func_end; + + size_t code_size = (char*)&_Py_trampoline_func_end - (char*)&_Py_trampoline_func_start; + + ELFObjectContext ctx; + char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient) + ctx.code_size = code_size; + ctx.startp = ctx.p = (uint8_t*)buffer; + ctx.fde_p = NULL; + + elf_init_ehframe(&ctx); + return ctx.p - ctx.startp; +} + static void elf_init_ehframe(ELFObjectContext* ctx) { uint8_t* p = ctx->p; uint8_t* framep = p; // Remember start of frame data @@ -784,7 +805,7 @@ static void elf_init_ehframe(ELFObjectContext* ctx) { * * DWRF_SECTION(FDE, * DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (relative from here) - * DWRF_U32(-0x30); // Initial PC-relative location of the code + * DWRF_U32(pc_relative_offset); // PC-relative location of the code (calculated dynamically) * DWRF_U32(ctx->code_size); // Code range covered by this FDE * DWRF_U8(0); // Augmentation data length (none) * @@ -830,19 +851,31 @@ static void elf_init_ehframe(ELFObjectContext* ctx) { DWRF_U32(0); // CIE ID (0 indicates this is a CIE) DWRF_U8(DWRF_CIE_VERSION); // CIE version (1) DWRF_STR("zR"); // Augmentation string ("zR" = has LSDA) - DWRF_UV(1); // Code alignment factor +#ifdef __x86_64__ + DWRF_UV(1); // Code alignment factor (x86_64: 1 byte) +#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) + DWRF_UV(4); // Code alignment factor (AArch64: 4 bytes per instruction) +#endif DWRF_SV(-(int64_t)sizeof(uintptr_t)); // Data alignment factor (negative) DWRF_U8(DWRF_REG_RA); // Return address register number DWRF_UV(1); // Augmentation data length DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // FDE pointer encoding /* Initial CFI instructions - describe default calling convention */ +#ifdef __x86_64__ + /* x86_64 initial CFI state */ DWRF_U8(DWRF_CFA_def_cfa); // Define CFA (Call Frame Address) DWRF_UV(DWRF_REG_SP); // CFA = SP register DWRF_UV(sizeof(uintptr_t)); // CFA = SP + pointer_size DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); // Return address is saved DWRF_UV(1); // At offset 1 from CFA - +#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) + /* AArch64 initial CFI state */ + DWRF_U8(DWRF_CFA_def_cfa); // Define CFA (Call Frame Address) + DWRF_UV(DWRF_REG_SP); // CFA = SP register + DWRF_UV(0); // CFA = SP + 0 (AArch64 starts with offset 0) + // No initial register saves in AArch64 CIE +#endif DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary ) @@ -853,11 +886,15 @@ static void elf_init_ehframe(ELFObjectContext* ctx) { * * The FDE describes unwinding information specific to this function. * It references the CIE and provides function-specific CFI instructions. + * + * The PC-relative offset is calculated after the entire EH frame is built + * to ensure accurate positioning relative to the synthesized DSO layout. */ DWRF_SECTION(FDE, DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference) - DWRF_U32(-0x30); // Machine code offset relative to .text - DWRF_U32(ctx->code_size); // Address range covered by this FDE (code lenght) + ctx->fde_p = p; // Remember where PC offset field is located for later calculation + DWRF_U32(0); // Placeholder for PC-relative offset (calculated at end of elf_init_ehframe) + DWRF_U32(ctx->code_size); // Address range covered by this FDE (code length) DWRF_U8(0); // Augmentation data length (none) /* @@ -868,32 +905,36 @@ static void elf_init_ehframe(ELFObjectContext* ctx) { * conventions and register usage patterns. */ #ifdef __x86_64__ - /* x86_64 calling convention unwinding rules */ + /* x86_64 calling convention unwinding rules with frame pointer */ # if defined(__CET__) && (__CET__ & 1) - DWRF_U8(DWRF_CFA_advance_loc | 8); // Advance location by 8 bytes when CET protection is enabled -# else - DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance location by 4 bytes + DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance past endbr64 (4 bytes) # endif - DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset + DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance past push %rbp (1 byte) + DWRF_U8(DWRF_CFA_def_cfa_offset); // def_cfa_offset 16 DWRF_UV(16); // New offset: SP + 16 - DWRF_U8(DWRF_CFA_advance_loc | 6); // Advance location by 6 bytes - DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset + DWRF_U8(DWRF_CFA_offset | DWRF_REG_BP); // offset r6 at cfa-16 + DWRF_UV(2); // Offset factor: 2 * 8 = 16 bytes + DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past mov %rsp,%rbp (3 bytes) + DWRF_U8(DWRF_CFA_def_cfa_register); // def_cfa_register r6 + DWRF_UV(DWRF_REG_BP); // Use base pointer register + DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3 + DWRF_U8(DWRF_CFA_def_cfa); // def_cfa r7 ofs 8 + DWRF_UV(DWRF_REG_SP); // Use stack pointer register DWRF_UV(8); // New offset: SP + 8 #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) /* AArch64 calling convention unwinding rules */ - DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 instruction (stp x29, x30) - DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset - DWRF_UV(16); // CFA = SP + 16 (stack pointer after push) - DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Frame pointer (x29) saved - DWRF_UV(2); // At offset 2 from CFA (2 * 8 = 16 bytes) - DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Link register (x30) saved - DWRF_UV(1); // At offset 1 from CFA (1 * 8 = 8 bytes) - DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance by 3 instructions (mov x16, x3; mov x29, sp; ldp...) - DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Restore frame pointer (x29) - DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Restore link register (x30) - DWRF_U8(DWRF_CFA_def_cfa_offset); // Final CFA adjustment - DWRF_UV(0); // CFA = SP + 0 (stack restored) - + DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance by 1 instruction (4 bytes) + DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 16 + DWRF_UV(16); // Stack pointer moved by 16 bytes + DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // x29 (frame pointer) saved + DWRF_UV(2); // At CFA-16 (2 * 8 = 16 bytes from CFA) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // x30 (link register) saved + DWRF_UV(1); // At CFA-8 (1 * 8 = 8 bytes from CFA) + DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance by 3 instructions (12 bytes) + DWRF_U8(DWRF_CFA_restore | DWRF_REG_RA); // Restore x30 - NO DWRF_UV() after this! + DWRF_U8(DWRF_CFA_restore | DWRF_REG_FP); // Restore x29 - NO DWRF_UV() after this! + DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 0 (stack restored) + DWRF_UV(0); // Back to original stack position #else # error "Unsupported target architecture" #endif @@ -902,6 +943,58 @@ static void elf_init_ehframe(ELFObjectContext* ctx) { ) ctx->p = p; // Update context pointer to end of generated data + + /* Calculate and update the PC-relative offset in the FDE + * + * When perf processes the jitdump, it creates a synthesized DSO with this layout: + * + * Synthesized DSO Memory Layout: + * ┌─────────────────────────────────────────────────────────────┐ < code_start + * │ Code Section │ + * │ (round_up(code_size, 8) bytes) │ + * ├─────────────────────────────────────────────────────────────┤ < start of EH frame data + * │ EH Frame Data │ + * │ ┌─────────────────────────────────────────────────────┐ │ + * │ │ CIE data │ │ + * │ └─────────────────────────────────────────────────────┘ │ + * │ ┌─────────────────────────────────────────────────────┐ │ + * │ │ FDE Header: │ │ + * │ │ - CIE offset (4 bytes) │ │ + * │ │ - PC offset (4 bytes) <─ fde_offset_in_frame ─────┼────┼─> points to code_start + * │ │ - address range (4 bytes) │ │ (this specific field) + * │ │ CFI Instructions... │ │ + * │ └─────────────────────────────────────────────────────┘ │ + * ├─────────────────────────────────────────────────────────────┤ < reference_point + * │ EhFrameHeader │ + * │ (navigation metadata) │ + * └─────────────────────────────────────────────────────────────┘ + * + * The PC offset field in the FDE must contain the distance from itself to code_start: + * + * distance = code_start - fde_pc_field + * + * Where: + * fde_pc_field_location = reference_point - eh_frame_size + fde_offset_in_frame + * code_start_location = reference_point - eh_frame_size - round_up(code_size, 8) + * + * Therefore: + * distance = code_start_location - fde_pc_field_location + * = (ref - eh_frame_size - rounded_code_size) - (ref - eh_frame_size + fde_offset_in_frame) + * = -rounded_code_size - fde_offset_in_frame + * = -(round_up(code_size, 8) + fde_offset_in_frame) + * + * Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field, + * + */ + if (ctx->fde_p != NULL) { + int32_t fde_offset_in_frame = (ctx->fde_p - ctx->startp); + int32_t rounded_code_size = round_up(ctx->code_size, 8); + int32_t pc_relative_offset = -(rounded_code_size + fde_offset_in_frame); + + + // Update the PC-relative offset in the FDE + *(int32_t*)ctx->fde_p = pc_relative_offset; + } } // ============================================================================= @@ -1002,8 +1095,11 @@ static void* perf_map_jit_init(void) { /* Initialize code ID counter */ perf_jit_map_state.code_id = 0; - /* Configure trampoline API with padding information */ - trampoline_api.code_padding = PERF_JIT_CODE_PADDING; + /* Calculate padding size based on actual unwind info requirements */ + size_t eh_frame_size = calculate_eh_frame_size(); + size_t unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; + trampoline_api.code_padding = round_up(unwind_data_size, 16); + trampoline_api.code_alignment = 32; return &perf_jit_map_state; } @@ -1092,6 +1188,7 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr, char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient) ctx.code_size = code_size; ctx.startp = ctx.p = (uint8_t*)buffer; + ctx.fde_p = NULL; // Initialize to NULL, will be set when FDE is written /* Generate EH frame (Exception Handling frame) data */ elf_init_ehframe(&ctx); @@ -1110,7 +1207,7 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr, ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; /* Verify we don't exceed our padding budget */ - assert(ev2.unwind_data_size <= PERF_JIT_CODE_PADDING); + assert(ev2.unwind_data_size <= (uint64_t)trampoline_api.code_padding); ev2.eh_frame_hdr_size = sizeof(EhFrameHeader); ev2.mapped_size = round_up(ev2.unwind_data_size, 16); // 16-byte alignment @@ -1262,4 +1359,4 @@ _PyPerf_Callbacks _Py_perfmap_jit_callbacks = { &perf_map_jit_fini, // Cleanup function }; -#endif /* PY_HAVE_PERF_TRAMPOLINE */ \ No newline at end of file +#endif /* PY_HAVE_PERF_TRAMPOLINE */ diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 996e54b82b61e8..a2da3c7d56df50 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -230,6 +230,7 @@ perf_map_init_state(void) { PyUnstable_PerfMapState_Init(); trampoline_api.code_padding = 0; + trampoline_api.code_alignment = 32; perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP; return NULL; } @@ -291,7 +292,9 @@ new_code_arena(void) void *start = &_Py_trampoline_func_start; void *end = &_Py_trampoline_func_end; size_t code_size = end - start; - size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16); + size_t unaligned_size = code_size + trampoline_api.code_padding; + size_t chunk_size = round_up(unaligned_size, trampoline_api.code_alignment); + assert(chunk_size % trampoline_api.code_alignment == 0); // TODO: Check the effect of alignment of the code chunks. Initial investigation // showed that this has no effect on performance in x86-64 or aarch64 and the current // version has the advantage that the unwinder in GDB can unwind across JIT-ed code. @@ -356,7 +359,9 @@ static inline py_trampoline code_arena_new_code(code_arena_t *code_arena) { py_trampoline trampoline = (py_trampoline)code_arena->current_addr; - size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16); + size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, + trampoline_api.code_alignment); + assert(total_code_size % trampoline_api.code_alignment == 0); code_arena->size_left -= total_code_size; code_arena->current_addr += total_code_size; return trampoline; @@ -489,9 +494,6 @@ _PyPerfTrampoline_Init(int activate) } else { _PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator); - if (new_code_arena() < 0) { - return -1; - } extra_code_index = _PyEval_RequestCodeExtraIndex(NULL); if (extra_code_index == -1) { return -1; @@ -499,6 +501,9 @@ _PyPerfTrampoline_Init(int activate) if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) { trampoline_api.state = trampoline_api.init_state(); } + if (new_code_arena() < 0) { + return -1; + } perf_status = PERF_STATUS_OK; } #endif From cbf007beeb2f69d9409b6388ee8d71007fa6c2df Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 11 Jul 2025 16:45:31 +0200 Subject: [PATCH 854/989] gh-136156: Remove tempfile test_link_tmpfile() (#136534) It's not always possible to guarantee that the file was opened with O_TMPFILE even if tempfile._O_TMPFILE_WORKS is true. --- Lib/test/test_tempfile.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index aeca62cf2564bc..52b13b98cbcce5 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -1594,30 +1594,6 @@ def test_unexpected_error(self): mock_close.assert_called() self.assertEqual(os.listdir(dir), []) - @os_helper.skip_unless_hardlink - @unittest.skipUnless(tempfile._O_TMPFILE_WORKS, 'need os.O_TMPFILE') - @unittest.skipUnless(os.path.exists('/proc/self/fd'), - 'need /proc/self/fd') - def test_link_tmpfile(self): - dir = tempfile.mkdtemp() - self.addCleanup(os_helper.rmtree, dir) - filename = os.path.join(dir, "link") - - with tempfile.TemporaryFile('w', dir=dir) as tmp: - # the flag can become False on Linux <= 3.11 - if not tempfile._O_TMPFILE_WORKS: - self.skipTest("O_TMPFILE doesn't work") - - tmp.write("hello") - tmp.flush() - fd = tmp.fileno() - - os.link(f'/proc/self/fd/{fd}', - filename, - follow_symlinks=True) - with open(filename) as fp: - self.assertEqual(fp.read(), "hello") - # Helper for test_del_on_shutdown class NulledModules: From 2c9a8011c640c6f18d620dda7a150bb065d9fd0b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 11 Jul 2025 16:48:43 +0200 Subject: [PATCH 855/989] gh-135906: Test the internal C API in test_cext (#136247) Remove duplicated definition: atexit_datacallbackfunc type is already defined by Include/cpython/pylifecycle.h. --- Include/internal/pycore_interp_structs.h | 2 - Lib/test/test_cext/__init__.py | 47 ++++++++++++++---------- Lib/test/test_cext/extension.c | 20 ++++++++++ Lib/test/test_cext/setup.py | 33 ++++++++++++++--- Lib/test/test_cppext/__init__.py | 24 +++++++----- Lib/test/test_cppext/extension.cpp | 9 +++++ Lib/test/test_cppext/setup.py | 4 ++ 7 files changed, 103 insertions(+), 36 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 8c3946cd462ce6..542a75617b4d3c 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -130,8 +130,6 @@ struct _atexit_runtime_state { //################### // interpreter atexit -typedef void (*atexit_datacallbackfunc)(void *); - typedef struct atexit_callback { atexit_datacallbackfunc func; void *data; diff --git a/Lib/test/test_cext/__init__.py b/Lib/test/test_cext/__init__.py index 46fde541494aa3..93e7b2043d397a 100644 --- a/Lib/test/test_cext/__init__.py +++ b/Lib/test/test_cext/__init__.py @@ -28,29 +28,13 @@ @support.requires_venv_with_pip() @support.requires_subprocess() @support.requires_resource('cpu') -class TestExt(unittest.TestCase): +class BaseTests: + TEST_INTERNAL_C_API = False + # Default build with no options def test_build(self): self.check_build('_test_cext') - def test_build_c11(self): - self.check_build('_test_c11_cext', std='c11') - - @unittest.skipIf(support.MS_WINDOWS, "MSVC doesn't support /std:c99") - def test_build_c99(self): - # In public docs, we say C API is compatible with C11. However, - # in practice we do maintain C99 compatibility in public headers. - # Please ask the C API WG before adding a new C11-only feature. - self.check_build('_test_c99_cext', std='c99') - - @support.requires_gil_enabled('incompatible with Free Threading') - def test_build_limited(self): - self.check_build('_test_limited_cext', limited=True) - - @support.requires_gil_enabled('broken for now with Free Threading') - def test_build_limited_c11(self): - self.check_build('_test_limited_c11_cext', limited=True, std='c11') - def check_build(self, extension_name, std=None, limited=False): venv_dir = 'env' with support.setup_venv_with_pip_setuptools(venv_dir) as python_exe: @@ -70,6 +54,7 @@ def run_cmd(operation, cmd): if limited: env['CPYTHON_TEST_LIMITED'] = '1' env['CPYTHON_TEST_EXT_NAME'] = extension_name + env['TEST_INTERNAL_C_API'] = str(int(self.TEST_INTERNAL_C_API)) if support.verbose: print('Run:', ' '.join(map(shlex.quote, cmd))) subprocess.run(cmd, check=True, env=env) @@ -110,5 +95,29 @@ def run_cmd(operation, cmd): run_cmd('Import', cmd) +class TestPublicCAPI(BaseTests, unittest.TestCase): + @support.requires_gil_enabled('incompatible with Free Threading') + def test_build_limited(self): + self.check_build('_test_limited_cext', limited=True) + + @support.requires_gil_enabled('broken for now with Free Threading') + def test_build_limited_c11(self): + self.check_build('_test_limited_c11_cext', limited=True, std='c11') + + def test_build_c11(self): + self.check_build('_test_c11_cext', std='c11') + + @unittest.skipIf(support.MS_WINDOWS, "MSVC doesn't support /std:c99") + def test_build_c99(self): + # In public docs, we say C API is compatible with C11. However, + # in practice we do maintain C99 compatibility in public headers. + # Please ask the C API WG before adding a new C11-only feature. + self.check_build('_test_c99_cext', std='c99') + + +class TestInteralCAPI(BaseTests, unittest.TestCase): + TEST_INTERNAL_C_API = True + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_cext/extension.c b/Lib/test/test_cext/extension.c index 64629c5a6da8cd..4be2f24c60d44b 100644 --- a/Lib/test/test_cext/extension.c +++ b/Lib/test/test_cext/extension.c @@ -1,11 +1,31 @@ // gh-116869: Basic C test extension to check that the Python C API // does not emit C compiler warnings. +// +// Test also the internal C API if the TEST_INTERNAL_C_API macro is defined. // Always enable assertions #undef NDEBUG +#ifdef TEST_INTERNAL_C_API +# define Py_BUILD_CORE_MODULE 1 +#endif + #include "Python.h" +#ifdef TEST_INTERNAL_C_API + // gh-135906: Check for compiler warnings in the internal C API. + // - Cython uses pycore_frame.h. + // - greenlet uses pycore_frame.h, pycore_interpframe_structs.h and + // pycore_interpframe.h. +# include "internal/pycore_frame.h" +# include "internal/pycore_gc.h" +# include "internal/pycore_interp.h" +# include "internal/pycore_interpframe.h" +# include "internal/pycore_interpframe_structs.h" +# include "internal/pycore_object.h" +# include "internal/pycore_pystate.h" +#endif + #ifndef MODULE_NAME # error "MODULE_NAME macro must be defined" #endif diff --git a/Lib/test/test_cext/setup.py b/Lib/test/test_cext/setup.py index 1275282983f7ff..587585e8086e92 100644 --- a/Lib/test/test_cext/setup.py +++ b/Lib/test/test_cext/setup.py @@ -14,10 +14,15 @@ if not support.MS_WINDOWS: # C compiler flags for GCC and clang - CFLAGS = [ + BASE_CFLAGS = [ # The purpose of test_cext extension is to check that building a C # extension using the Python C API does not emit C compiler warnings. '-Werror', + ] + + # C compiler flags for GCC and clang + PUBLIC_CFLAGS = [ + *BASE_CFLAGS, # gh-120593: Check the 'const' qualifier '-Wcast-qual', @@ -26,27 +31,40 @@ '-pedantic-errors', ] if not support.Py_GIL_DISABLED: - CFLAGS.append( + PUBLIC_CFLAGS.append( # gh-116869: The Python C API must be compatible with building # with the -Werror=declaration-after-statement compiler flag. '-Werror=declaration-after-statement', ) + INTERNAL_CFLAGS = [*BASE_CFLAGS] else: # MSVC compiler flags - CFLAGS = [ - # Display warnings level 1 to 4 - '/W4', + BASE_CFLAGS = [ # Treat all compiler warnings as compiler errors '/WX', ] + PUBLIC_CFLAGS = [ + *BASE_CFLAGS, + # Display warnings level 1 to 4 + '/W4', + ] + INTERNAL_CFLAGS = [ + *BASE_CFLAGS, + # Display warnings level 1 to 3 + '/W3', + ] def main(): std = os.environ.get("CPYTHON_TEST_STD", "") module_name = os.environ["CPYTHON_TEST_EXT_NAME"] limited = bool(os.environ.get("CPYTHON_TEST_LIMITED", "")) + internal = bool(int(os.environ.get("TEST_INTERNAL_C_API", "0"))) - cflags = list(CFLAGS) + if not internal: + cflags = list(PUBLIC_CFLAGS) + else: + cflags = list(INTERNAL_CFLAGS) cflags.append(f'-DMODULE_NAME={module_name}') # Add -std=STD or /std:STD (MSVC) compiler flag @@ -75,6 +93,9 @@ def main(): version = sys.hexversion cflags.append(f'-DPy_LIMITED_API={version:#x}') + if internal: + cflags.append('-DTEST_INTERNAL_C_API=1') + # On Windows, add PCbuild\amd64\ to include and library directories include_dirs = [] library_dirs = [] diff --git a/Lib/test/test_cppext/__init__.py b/Lib/test/test_cppext/__init__.py index 2b7adac4bccd15..2f54b3ccb35cc4 100644 --- a/Lib/test/test_cppext/__init__.py +++ b/Lib/test/test_cppext/__init__.py @@ -24,7 +24,7 @@ @support.requires_venv_with_pip() @support.requires_subprocess() @support.requires_resource('cpu') -class TestCPPExt(unittest.TestCase): +class BaseTests: def test_build(self): self.check_build('_testcppext') @@ -34,10 +34,6 @@ def test_build_cpp03(self): # Please ask the C API WG before adding a new C++11-only feature. self.check_build('_testcpp03ext', std='c++03') - @support.requires_gil_enabled('incompatible with Free Threading') - def test_build_limited_cpp03(self): - self.check_build('_test_limited_cpp03ext', std='c++03', limited=True) - @unittest.skipIf(support.MS_WINDOWS, "MSVC doesn't support /std:c++11") def test_build_cpp11(self): self.check_build('_testcpp11ext', std='c++11') @@ -48,10 +44,6 @@ def test_build_cpp11(self): def test_build_cpp14(self): self.check_build('_testcpp14ext', std='c++14') - @support.requires_gil_enabled('incompatible with Free Threading') - def test_build_limited(self): - self.check_build('_testcppext_limited', limited=True) - def check_build(self, extension_name, std=None, limited=False): venv_dir = 'env' with support.setup_venv_with_pip_setuptools(venv_dir) as python_exe: @@ -111,5 +103,19 @@ def run_cmd(operation, cmd): run_cmd('Import', cmd) +class TestPublicCAPI(BaseTests, unittest.TestCase): + @support.requires_gil_enabled('incompatible with Free Threading') + def test_build_limited_cpp03(self): + self.check_build('_test_limited_cpp03ext', std='c++03', limited=True) + + @support.requires_gil_enabled('incompatible with Free Threading') + def test_build_limited(self): + self.check_build('_testcppext_limited', limited=True) + + +class TestInteralCAPI(BaseTests, unittest.TestCase): + TEST_INTERNAL_C_API = True + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_cppext/extension.cpp b/Lib/test/test_cppext/extension.cpp index 5b3571b295bec3..1affa176088d57 100644 --- a/Lib/test/test_cppext/extension.cpp +++ b/Lib/test/test_cppext/extension.cpp @@ -6,8 +6,17 @@ // Always enable assertions #undef NDEBUG +#ifdef TEST_INTERNAL_C_API +# define Py_BUILD_CORE 1 +#endif + #include "Python.h" +#ifdef TEST_INTERNAL_C_API + // gh-135906: Check for compiler warnings in the internal C API +# include "internal/pycore_frame.h" +#endif + #ifndef MODULE_NAME # error "MODULE_NAME macro must be defined" #endif diff --git a/Lib/test/test_cppext/setup.py b/Lib/test/test_cppext/setup.py index ea1ed64bf7ab0a..98442b106b6113 100644 --- a/Lib/test/test_cppext/setup.py +++ b/Lib/test/test_cppext/setup.py @@ -47,6 +47,7 @@ def main(): std = os.environ.get("CPYTHON_TEST_CPP_STD", "") module_name = os.environ["CPYTHON_TEST_EXT_NAME"] limited = bool(os.environ.get("CPYTHON_TEST_LIMITED", "")) + internal = bool(int(os.environ.get("TEST_INTERNAL_C_API", "0"))) cppflags = list(CPPFLAGS) cppflags.append(f'-DMODULE_NAME={module_name}') @@ -82,6 +83,9 @@ def main(): version = sys.hexversion cppflags.append(f'-DPy_LIMITED_API={version:#x}') + if internal: + cppflags.append('-DTEST_INTERNAL_C_API=1') + # On Windows, add PCbuild\amd64\ to include and library directories include_dirs = [] library_dirs = [] From 252e2f710ea376a38c4545dd758e03d331c1eaad Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Sat, 12 Jul 2025 01:03:13 +0800 Subject: [PATCH 856/989] gh-101100: Fix sphinx warnings in Doc/library/functools.rst (GH-136424) Add index entries and anchors for cache_info, cache_clear and register. --- Doc/library/functools.rst | 33 +++++++++++++++++++++------------ Doc/tools/.nitignore | 1 - 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 3e75621be6dad3..beec9b942afc0f 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -199,12 +199,18 @@ The :mod:`functools` module defines the following functions: and *typed*. This is for information purposes only. Mutating the values has no effect. + .. method:: lru_cache.cache_info() + :no-typesetting: + To help measure the effectiveness of the cache and tune the *maxsize* - parameter, the wrapped function is instrumented with a :func:`cache_info` + parameter, the wrapped function is instrumented with a :func:`!cache_info` function that returns a :term:`named tuple` showing *hits*, *misses*, *maxsize* and *currsize*. - The decorator also provides a :func:`cache_clear` function for clearing or + .. method:: lru_cache.cache_clear() + :no-typesetting: + + The decorator also provides a :func:`!cache_clear` function for clearing or invalidating the cache. The original underlying function is accessible through the @@ -284,9 +290,9 @@ The :mod:`functools` module defines the following functions: class decorator supplies the rest. This simplifies the effort involved in specifying all of the possible rich comparison operations: - The class must define one of :meth:`__lt__`, :meth:`__le__`, - :meth:`__gt__`, or :meth:`__ge__`. - In addition, the class should supply an :meth:`__eq__` method. + The class must define one of :meth:`~object.__lt__`, :meth:`~object.__le__`, + :meth:`~object.__gt__`, or :meth:`~object.__ge__`. + In addition, the class should supply an :meth:`~object.__eq__` method. For example:: @@ -418,7 +424,7 @@ The :mod:`functools` module defines the following functions: like normal functions, are handled as descriptors). When *func* is a descriptor (such as a normal Python function, - :func:`classmethod`, :func:`staticmethod`, :func:`abstractmethod` or + :func:`classmethod`, :func:`staticmethod`, :func:`~abc.abstractmethod` or another instance of :class:`partialmethod`), calls to ``__get__`` are delegated to the underlying descriptor, and an appropriate :ref:`partial object` returned as the result. @@ -499,7 +505,10 @@ The :mod:`functools` module defines the following functions: ... print("Let me just say,", end=" ") ... print(arg) - To add overloaded implementations to the function, use the :func:`register` + .. method:: singledispatch.register() + :no-typesetting: + + To add overloaded implementations to the function, use the :func:`!register` attribute of the generic function, which can be used as a decorator. For functions annotated with types, the decorator will infer the type of the first argument automatically:: @@ -565,14 +574,14 @@ The :mod:`functools` module defines the following functions: runtime impact. To enable registering :term:`lambdas` and pre-existing functions, - the :func:`register` attribute can also be used in a functional form:: + the :func:`~singledispatch.register` attribute can also be used in a functional form:: >>> def nothing(arg, verbose=False): ... print("Nothing.") ... >>> fun.register(type(None), nothing) - The :func:`register` attribute returns the undecorated function. This + The :func:`~singledispatch.register` attribute returns the undecorated function. This enables decorator stacking, :mod:`pickling`, and the creation of unit tests for each variant independently:: @@ -650,10 +659,10 @@ The :mod:`functools` module defines the following functions: .. versionadded:: 3.4 .. versionchanged:: 3.7 - The :func:`register` attribute now supports using type annotations. + The :func:`~singledispatch.register` attribute now supports using type annotations. .. versionchanged:: 3.11 - The :func:`register` attribute now supports + The :func:`~singledispatch.register` attribute now supports :class:`typing.Union` as a type annotation. @@ -783,7 +792,7 @@ The :mod:`functools` module defines the following functions: 'Docstring' Without the use of this decorator factory, the name of the example function - would have been ``'wrapper'``, and the docstring of the original :func:`example` + would have been ``'wrapper'``, and the docstring of the original :func:`!example` would have been lost. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 897b404b8d6bb3..1fbb45ecd73801 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -15,7 +15,6 @@ Doc/extending/extending.rst Doc/library/ast.rst Doc/library/asyncio-extending.rst Doc/library/email.charset.rst -Doc/library/functools.rst Doc/library/http.cookiejar.rst Doc/library/http.server.rst Doc/library/importlib.rst From 561212a03306cb2f246771e9c4748bec1dc41405 Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Sat, 12 Jul 2025 02:18:47 +0800 Subject: [PATCH 857/989] Doc: More duplicate word fixes (GH-136299) --- Include/internal/mimalloc/mimalloc/types.h | 2 +- Lib/test/test_argparse.py | 2 +- Lib/test/test_dictcomps.py | 2 +- Lib/test/test_setcomps.py | 2 +- Lib/test/test_zipfile/_path/test_path.py | 2 +- Misc/NEWS.d/3.10.0a3.rst | 2 +- Misc/NEWS.d/3.12.0a5.rst | 2 +- Misc/NEWS.d/3.13.0a1.rst | 2 +- Misc/NEWS.d/3.14.0b1.rst | 2 +- Modules/_asynciomodule.c | 2 +- Objects/typeobject.c | 2 +- Python/preconfig.c | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h index a17f637fe68236..19e93224174314 100644 --- a/Include/internal/mimalloc/mimalloc/types.h +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -481,7 +481,7 @@ typedef struct mi_segment_s { struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`) size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim if it is too long) size_t used; // count of pages in use uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 08ff41368d9bb0..ddd48b1bc0c56f 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -1829,7 +1829,7 @@ def test_r_1_replace(self): class StdStreamComparer: def __init__(self, attr): # We try to use the actual stdXXX.buffer attribute as our - # marker, but but under some test environments, + # marker, but under some test environments, # sys.stdout/err are replaced by io.StringIO which won't have .buffer, # so we use a sentinel simply to show that the tests do the right thing # for any buffer supporting object diff --git a/Lib/test/test_dictcomps.py b/Lib/test/test_dictcomps.py index 26b56dac5032fa..a7a46216787437 100644 --- a/Lib/test/test_dictcomps.py +++ b/Lib/test/test_dictcomps.py @@ -132,7 +132,7 @@ def test_star_expression(self): def test_exception_locations(self): # The location of an exception raised from __init__ or - # __next__ should should be the iterator expression + # __next__ should be the iterator expression def init_raises(): try: {x:x for x in BrokenIter(init_raises=True)} diff --git a/Lib/test/test_setcomps.py b/Lib/test/test_setcomps.py index 0bb02ef11f6b4b..6fc5bb74036c5f 100644 --- a/Lib/test/test_setcomps.py +++ b/Lib/test/test_setcomps.py @@ -154,7 +154,7 @@ class SetComprehensionTest(unittest.TestCase): def test_exception_locations(self): # The location of an exception raised from __init__ or - # __next__ should should be the iterator expression + # __next__ should be the iterator expression def init_raises(): try: diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py index 696134023a56b9..958a586b0dc8e8 100644 --- a/Lib/test/test_zipfile/_path/test_path.py +++ b/Lib/test/test_zipfile/_path/test_path.py @@ -316,7 +316,7 @@ def test_mutability(self, alpharep): HUGE_ZIPFILE_NUM_ENTRIES = 2**13 def huge_zipfile(self): - """Create a read-only zipfile with a huge number of entries entries.""" + """Create a read-only zipfile with a huge number of entries.""" strm = io.BytesIO() zf = zipfile.ZipFile(strm, "w") for entry in map(str, range(self.HUGE_ZIPFILE_NUM_ENTRIES)): diff --git a/Misc/NEWS.d/3.10.0a3.rst b/Misc/NEWS.d/3.10.0a3.rst index 3f3fb7ec599e57..6cf3db3eb43c8b 100644 --- a/Misc/NEWS.d/3.10.0a3.rst +++ b/Misc/NEWS.d/3.10.0a3.rst @@ -394,7 +394,7 @@ Removed the ``formatter`` module, which was deprecated in Python 3.4. It is somewhat obsolete, little used, and not tested. It was originally scheduled to be removed in Python 3.6, but such removals were delayed until after Python 2.7 EOL. Existing users should copy whatever classes they use into -their code. Patch by Donghee Na and and Terry J. Reedy. +their code. Patch by Donghee Na and Terry J. Reedy. .. diff --git a/Misc/NEWS.d/3.12.0a5.rst b/Misc/NEWS.d/3.12.0a5.rst index 5dc443bb55b617..b73bbfbfdc4819 100644 --- a/Misc/NEWS.d/3.12.0a5.rst +++ b/Misc/NEWS.d/3.12.0a5.rst @@ -253,7 +253,7 @@ Adapt the ``_elementtree`` extension module to multi-phase init .. section: Library Avoid potential unexpected ``freeaddrinfo`` call (double free) in -:mod:`socket` when when a libc ``getaddrinfo()`` implementation leaves +:mod:`socket` when a libc ``getaddrinfo()`` implementation leaves garbage in an output pointer when returning an error. Original patch by Sergey G. Brester. diff --git a/Misc/NEWS.d/3.13.0a1.rst b/Misc/NEWS.d/3.13.0a1.rst index 0a93cbcea0ffd2..0741eab4eca6a4 100644 --- a/Misc/NEWS.d/3.13.0a1.rst +++ b/Misc/NEWS.d/3.13.0a1.rst @@ -153,7 +153,7 @@ about a 10% improvement. .. section: Core and Builtins Guard ``assert(tstate->thread_id > 0)`` with ``#ifndef HAVE_PTHREAD_STUBS``. -This allows for for pydebug builds to work under WASI which (currently) +This allows for pydebug builds to work under WASI which (currently) lacks thread support. .. diff --git a/Misc/NEWS.d/3.14.0b1.rst b/Misc/NEWS.d/3.14.0b1.rst index 5847dea7d5e8ee..041fbaf2051719 100644 --- a/Misc/NEWS.d/3.14.0b1.rst +++ b/Misc/NEWS.d/3.14.0b1.rst @@ -1051,7 +1051,7 @@ warning filtering state if the :data:`sys.flags.context_aware_warnings` flag is set to true. This makes using the context manager thread-safe in multi-threaded programs. The flag is true by default in free-threaded builds and is otherwise false. The value of the flag can be overridden by -the the :option:`-X context_aware_warnings <-X>` command-line option or by +the :option:`-X context_aware_warnings <-X>` command-line option or by the :envvar:`PYTHON_CONTEXT_AWARE_WARNINGS` environment variable. .. diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 5f9181395c4828..99408e60721c60 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -821,7 +821,7 @@ future_add_done_callback(asyncio_state *state, FutureObj *fut, PyObject *arg, Invariants: * callbacks != NULL: - There are some callbacks in in the list. Just + There are some callbacks in the list. Just add the new callback to it. * callbacks == NULL and callback0 == NULL: diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e84278d13c3e9c..379c4d0467c487 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -225,7 +225,7 @@ type_from_ref(PyObject *ref) } -/* helpers for for static builtin types */ +/* helpers for static builtin types */ #ifndef NDEBUG static inline int diff --git a/Python/preconfig.c b/Python/preconfig.c index 5b26c75de8b3a0..67b2d2f2dc186d 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -700,7 +700,7 @@ preconfig_init_coerce_c_locale(PyPreConfig *config) /* Test if coerce_c_locale equals to -1 or equals to 1: PYTHONCOERCECLOCALE=1 doesn't imply that the C locale is always coerced. - It is only coerced if if the LC_CTYPE locale is "C". */ + It is only coerced if the LC_CTYPE locale is "C". */ if (config->coerce_c_locale < 0 || config->coerce_c_locale == 1) { /* The C locale enables the C locale coercion (PEP 538) */ if (_Py_LegacyLocaleDetected(0)) { From 7f1e66ae0e7c6bfa26cc128291d8705abd1a3c93 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 11 Jul 2025 12:36:17 -0700 Subject: [PATCH 858/989] Minor edit: Improve comment readability and ordering (gh-136557) --- Lib/collections/__init__.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index d2ddc1cd9ec2ea..b8653f40a942f0 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -776,23 +776,26 @@ def __repr__(self): # When the multiplicities are all zero or one, multiset operations # are guaranteed to be equivalent to the corresponding operations # for regular sets. + # # Given counter multisets such as: # cp = Counter(a=1, b=0, c=1) # cq = Counter(c=1, d=0, e=1) + # # The corresponding regular sets would be: # sp = {'a', 'c'} # sq = {'c', 'e'} + # # All of the following relations would hold: - # set(cp + cq) == sp | sq - # set(cp - cq) == sp - sq - # set(cp | cq) == sp | sq - # set(cp & cq) == sp & sq # (cp == cq) == (sp == sq) # (cp != cq) == (sp != sq) # (cp <= cq) == (sp <= sq) # (cp < cq) == (sp < sq) # (cp >= cq) == (sp >= sq) # (cp > cq) == (sp > sq) + # set(cp + cq) == sp | sq + # set(cp - cq) == sp - sq + # set(cp | cq) == sp | sq + # set(cp & cq) == sp & sq def __eq__(self, other): 'True if all counts agree. Missing counts are treated as zero.' From db47f4d844acf2b6e52e44f7f3d5f7566b1e402c Mon Sep 17 00:00:00 2001 From: Will Childs-Klein Date: Fri, 11 Jul 2025 18:24:11 -0400 Subject: [PATCH 859/989] gh-135401: Test AWS-LC as a cryptography library in CI (GH-135402) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Zachary Ware --- .github/workflows/build.yml | 86 +++++++++++++++++- .github/workflows/posix-deps-apt.sh | 1 + ...-06-11-16-52-49.gh-issue-135401.ccMXmL.rst | 1 + Tools/ssl/multissltests.py | 88 +++++++++++++------ configure | 2 +- configure.ac | 2 +- 6 files changed, 147 insertions(+), 33 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2025-06-11-16-52-49.gh-issue-135401.ccMXmL.rst diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c6171571857af6..05f20e12f4653d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -260,7 +260,7 @@ jobs: free-threading: ${{ matrix.free-threading }} os: ${{ matrix.os }} - build-ubuntu-ssltests: + build-ubuntu-ssltests-openssl: name: 'Ubuntu SSL tests with OpenSSL' runs-on: ${{ matrix.os }} timeout-minutes: 60 @@ -322,6 +322,81 @@ jobs: - name: SSL tests run: ./python Lib/test/ssltests.py + build-ubuntu-ssltests-awslc: + name: 'Ubuntu SSL tests with AWS-LC' + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04] + awslc_ver: [1.55.0] + env: + AWSLC_VER: ${{ matrix.awslc_ver}} + MULTISSL_DIR: ${{ github.workspace }}/multissl + OPENSSL_DIR: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }} + LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }}/lib + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure SSL lib env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore AWS-LC build' + id: cache-aws-lc + uses: actions/cache@v4 + with: + path: ./multissl/aws-lc/${{ matrix.awslc_ver }} + key: ${{ matrix.os }}-multissl-aws-lc-${{ matrix.awslc_ver }} + - name: Install AWS-LC + if: steps.cache-aws-lc.outputs.cache-hit != 'true' + run: | + python3 Tools/ssl/multissltests.py \ + --steps=library \ + --base-directory "$MULTISSL_DIR" \ + --awslc ${{ matrix.awslc_ver }} \ + --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: false + - name: Configure CPython + run: | + ./configure CFLAGS="-fdiagnostics-format=json" \ + --config-cache \ + --enable-slower-safety \ + --with-pydebug \ + --with-openssl="$OPENSSL_DIR" \ + --with-builtin-hashlib-hashes=blake2 \ + --with-ssl-default-suites=openssl + - name: Build CPython + run: make -j + - name: Display build info + run: make pythoninfo + - name: Verify python is linked to AWS-LC + run: ./python -c 'import ssl; print(ssl.OPENSSL_VERSION)' | grep AWS-LC + - name: SSL tests + run: ./python Lib/test/ssltests.py + build-wasi: name: 'WASI' needs: build-context @@ -628,7 +703,8 @@ jobs: - build-windows-msi - build-macos - build-ubuntu - - build-ubuntu-ssltests + - build-ubuntu-ssltests-awslc + - build-ubuntu-ssltests-openssl - build-wasi - test-hypothesis - build-asan @@ -643,7 +719,8 @@ jobs: with: allowed-failures: >- build-windows-msi, - build-ubuntu-ssltests, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, test-hypothesis, cifuzz, allowed-skips: >- @@ -661,7 +738,8 @@ jobs: check-generated-files, build-macos, build-ubuntu, - build-ubuntu-ssltests, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, build-wasi, test-hypothesis, build-asan, diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index 44e6a9ce2d0cd1..0b64367e6c4562 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -5,6 +5,7 @@ apt-get -yq install \ build-essential \ pkg-config \ ccache \ + cmake \ gdb \ lcov \ libb2-dev \ diff --git a/Misc/NEWS.d/next/Tests/2025-06-11-16-52-49.gh-issue-135401.ccMXmL.rst b/Misc/NEWS.d/next/Tests/2025-06-11-16-52-49.gh-issue-135401.ccMXmL.rst new file mode 100644 index 00000000000000..6885fba30dbab0 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-06-11-16-52-49.gh-issue-135401.ccMXmL.rst @@ -0,0 +1 @@ +Add a new GitHub CI job to test the :mod:`ssl` module with `AWS-LC `_ as the backing cryptography and TLS library. diff --git a/Tools/ssl/multissltests.py b/Tools/ssl/multissltests.py index b1a5df91901fc6..f4c8fde8346fd9 100755 --- a/Tools/ssl/multissltests.py +++ b/Tools/ssl/multissltests.py @@ -1,12 +1,12 @@ #!./python -"""Run Python tests against multiple installations of OpenSSL and LibreSSL +"""Run Python tests against multiple installations of cryptography libraries The script - (1) downloads OpenSSL / LibreSSL tar bundle + (1) downloads the tar bundle (2) extracts it to ./src - (3) compiles OpenSSL / LibreSSL - (4) installs OpenSSL / LibreSSL into ../multissl/$LIB/$VERSION/ + (3) compiles the relevant library + (4) installs that library into ../multissl/$LIB/$VERSION/ (5) forces a recompilation of Python modules using the header and library files from ../multissl/$LIB/$VERSION/ (6) runs Python's test suite @@ -61,6 +61,10 @@ LIBRESSL_RECENT_VERSIONS = [ ] +AWSLC_RECENT_VERSIONS = [ + "1.55.0", +] + # store files in ../multissl HERE = os.path.dirname(os.path.abspath(__file__)) PYTHONROOT = os.path.abspath(os.path.join(HERE, '..', '..')) @@ -70,9 +74,9 @@ parser = argparse.ArgumentParser( prog='multissl', description=( - "Run CPython tests with multiple OpenSSL and LibreSSL " + "Run CPython tests with multiple cryptography libraries" "versions." - ) + ), ) parser.add_argument( '--debug', @@ -102,6 +106,14 @@ "OpenSSL and LibreSSL versions are given." ).format(LIBRESSL_RECENT_VERSIONS, LIBRESSL_OLD_VERSIONS) ) +parser.add_argument( + '--awslc', + nargs='+', + default=(), + help=( + "AWS-LC versions, defaults to '{}' if no crypto library versions are given." + ).format(AWSLC_RECENT_VERSIONS) +) parser.add_argument( '--tests', nargs='*', @@ -111,7 +123,7 @@ parser.add_argument( '--base-directory', default=MULTISSL_DIR, - help="Base directory for OpenSSL / LibreSSL sources and builds." + help="Base directory for crypto library sources and builds." ) parser.add_argument( '--no-network', @@ -124,8 +136,8 @@ choices=['library', 'modules', 'tests'], default='tests', help=( - "Which steps to perform. 'library' downloads and compiles OpenSSL " - "or LibreSSL. 'module' also compiles Python modules. 'tests' builds " + "Which steps to perform. 'library' downloads and compiles a crypto" + "library. 'module' also compiles Python modules. 'tests' builds " "all and runs the test suite." ) ) @@ -453,6 +465,34 @@ class BuildLibreSSL(AbstractBuilder): build_template = "libressl-{}" +class BuildAWSLC(AbstractBuilder): + library = "AWS-LC" + url_templates = ( + "https://github.com/aws/aws-lc/archive/refs/tags/v{v}.tar.gz", + ) + src_template = "aws-lc-{}.tar.gz" + build_template = "aws-lc-{}" + + def _build_src(self, config_args=()): + cwd = self.build_dir + log.info("Running build in {}".format(cwd)) + env = os.environ.copy() + env["LD_RUN_PATH"] = self.lib_dir # set rpath + if self.system: + env['SYSTEM'] = self.system + cmd = [ + "cmake", + "-DCMAKE_BUILD_TYPE=RelWithDebInfo", + "-DCMAKE_PREFIX_PATH={}".format(self.install_dir), + "-DCMAKE_INSTALL_PREFIX={}".format(self.install_dir), + "-DBUILD_SHARED_LIBS=ON", + "-DBUILD_TESTING=OFF", + "-DFIPS=OFF", + ] + self._subprocess_call(cmd, cwd=cwd, env=env) + self._subprocess_call(["make", "-j{}".format(self.jobs)], cwd=cwd, env=env) + + def configure_make(): if not os.path.isfile('Makefile'): log.info('Running ./configure') @@ -467,9 +507,10 @@ def configure_make(): def main(): args = parser.parse_args() - if not args.openssl and not args.libressl: + if not args.openssl and not args.libressl and not args.awslc: args.openssl = list(OPENSSL_RECENT_VERSIONS) args.libressl = list(LIBRESSL_RECENT_VERSIONS) + args.awslc = list(AWSLC_RECENT_VERSIONS) if not args.disable_ancient: args.openssl.extend(OPENSSL_OLD_VERSIONS) args.libressl.extend(LIBRESSL_OLD_VERSIONS) @@ -496,22 +537,15 @@ def main(): # download and register builder builds = [] - - for version in args.openssl: - build = BuildOpenSSL( - version, - args - ) - build.install() - builds.append(build) - - for version in args.libressl: - build = BuildLibreSSL( - version, - args - ) - build.install() - builds.append(build) + for build_class, versions in [ + (BuildOpenSSL, args.openssl), + (BuildLibreSSL, args.libressl), + (BuildAWSLC, args.awslc), + ]: + for version in versions: + build = build_class(version, args) + build.install() + builds.append(build) if args.steps in {'modules', 'tests'}: for build in builds: @@ -539,7 +573,7 @@ def main(): else: print('Executed all SSL tests.') - print('OpenSSL / LibreSSL versions:') + print('OpenSSL / LibreSSL / AWS-LC versions:') for build in builds: print(" * {0.library} {0.version}".format(build)) diff --git a/configure b/configure index 94a0b810333ce9..4292f33ce21dce 100755 --- a/configure +++ b/configure @@ -30848,8 +30848,8 @@ main (void) OBJ_nid2sn(NID_md5); OBJ_nid2sn(NID_sha1); + OBJ_nid2sn(NID_sha512); OBJ_nid2sn(NID_sha3_512); - OBJ_nid2sn(NID_blake2b512); EVP_PBE_scrypt(NULL, 0, NULL, 0, 2, 8, 1, 0, NULL, 0); ; diff --git a/configure.ac b/configure.ac index ade71bc011eb87..cc7a6e9397dded 100644 --- a/configure.ac +++ b/configure.ac @@ -7529,8 +7529,8 @@ WITH_SAVE_ENV([ ], [ OBJ_nid2sn(NID_md5); OBJ_nid2sn(NID_sha1); + OBJ_nid2sn(NID_sha512); OBJ_nid2sn(NID_sha3_512); - OBJ_nid2sn(NID_blake2b512); EVP_PBE_scrypt(NULL, 0, NULL, 0, 2, 8, 1, 0, NULL, 0); ])], [ac_cv_working_openssl_hashlib=yes], [ac_cv_working_openssl_hashlib=no]) ]) From c7d24b81c376b0cf0b34f861cb18c1b1e4eac27b Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Sat, 12 Jul 2025 09:55:12 +0200 Subject: [PATCH 860/989] gh-111506: Add _Py_OPAQUE_PYOBJECT to hide PyObject layout & related API (GH-136505) Allow Py_LIMITED_API for (Py_GIL_DISABLED && _Py_OPAQUE_PYOBJECT) API that's removed when _Py_OPAQUE_PYOBJECT is defined: - PyObject_HEAD - _PyObject_EXTRA_INIT - PyObject_HEAD_INIT - PyObject_VAR_HEAD - struct _object (i.e. PyObject) (opaque) - struct PyVarObject (opaque) - Py_SIZE - Py_SET_TYPE - Py_SET_SIZE - PyModuleDef_Base (opaque) - PyModuleDef_HEAD_INIT - PyModuleDef (opaque) - _Py_IsImmortal - _Py_IsStaticImmortal Note that the `_Py_IsImmortal` removal (and a few other issues) means _Py_OPAQUE_PYOBJECT only works with limited API 3.14+ now. Co-authored-by: Victor Stinner --- Include/Python.h | 22 +++++++++---------- Include/moduleobject.h | 5 +++++ Include/object.h | 24 +++++++++++++++++---- Include/refcount.h | 2 ++ Lib/test/test_cext/__init__.py | 25 ++++++++++++++++----- Lib/test/test_cext/create_moduledef.c | 29 +++++++++++++++++++++++++ Lib/test/test_cext/extension.c | 31 +++++++++++++++++++++++---- Lib/test/test_cext/setup.py | 10 ++++++++- 8 files changed, 123 insertions(+), 25 deletions(-) create mode 100644 Lib/test/test_cext/create_moduledef.c diff --git a/Include/Python.h b/Include/Python.h index 64be80145890a3..19417df698c8e7 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -45,19 +45,19 @@ # endif #endif -// gh-111506: The free-threaded build is not compatible with the limited API -// or the stable ABI. -#if defined(Py_LIMITED_API) && defined(Py_GIL_DISABLED) -# error "The limited API is not currently supported in the free-threaded build" -#endif +#if defined(Py_GIL_DISABLED) +# if defined(Py_LIMITED_API) && !defined(_Py_OPAQUE_PYOBJECT) +# error "Py_LIMITED_API is not currently supported in the free-threaded build" +# endif -#if defined(Py_GIL_DISABLED) && defined(_MSC_VER) -# include // __readgsqword() -#endif +# if defined(_MSC_VER) +# include // __readgsqword() +# endif -#if defined(Py_GIL_DISABLED) && defined(__MINGW32__) -# include // __readgsqword() -#endif +# if defined(__MINGW32__) +# include // __readgsqword() +# endif +#endif // Py_GIL_DISABLED // Include Python header files #include "pyport.h" diff --git a/Include/moduleobject.h b/Include/moduleobject.h index 2a17c891dda811..17634a93f8fa6f 100644 --- a/Include/moduleobject.h +++ b/Include/moduleobject.h @@ -36,6 +36,7 @@ PyAPI_FUNC(PyObject *) PyModuleDef_Init(PyModuleDef*); PyAPI_DATA(PyTypeObject) PyModuleDef_Type; #endif +#ifndef _Py_OPAQUE_PYOBJECT typedef struct PyModuleDef_Base { PyObject_HEAD /* The function used to re-initialize the module. @@ -63,6 +64,7 @@ typedef struct PyModuleDef_Base { 0, /* m_index */ \ _Py_NULL, /* m_copy */ \ } +#endif // _Py_OPAQUE_PYOBJECT #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 /* New in 3.5 */ @@ -104,6 +106,8 @@ struct PyModuleDef_Slot { PyAPI_FUNC(int) PyUnstable_Module_SetGIL(PyObject *module, void *gil); #endif + +#ifndef _Py_OPAQUE_PYOBJECT struct PyModuleDef { PyModuleDef_Base m_base; const char* m_name; @@ -115,6 +119,7 @@ struct PyModuleDef { inquiry m_clear; freefunc m_free; }; +#endif // _Py_OPAQUE_PYOBJECT #ifdef __cplusplus } diff --git a/Include/object.h b/Include/object.h index c75e9db0cbd935..b1bcc9481871b4 100644 --- a/Include/object.h +++ b/Include/object.h @@ -56,6 +56,11 @@ whose size is determined when the object is allocated. # define Py_REF_DEBUG #endif +#if defined(_Py_OPAQUE_PYOBJECT) && !defined(Py_LIMITED_API) +# error "_Py_OPAQUE_PYOBJECT only makes sense with Py_LIMITED_API" +#endif + +#ifndef _Py_OPAQUE_PYOBJECT /* PyObject_HEAD defines the initial segment of every PyObject. */ #define PyObject_HEAD PyObject ob_base; @@ -99,6 +104,8 @@ whose size is determined when the object is allocated. * not necessarily a byte count. */ #define PyObject_VAR_HEAD PyVarObject ob_base; +#endif // !defined(_Py_OPAQUE_PYOBJECT) + #define Py_INVALID_SIZE (Py_ssize_t)-1 /* PyObjects are given a minimum alignment so that the least significant bits @@ -112,7 +119,9 @@ whose size is determined when the object is allocated. * by hand. Similarly every pointer to a variable-size Python object can, * in addition, be cast to PyVarObject*. */ -#ifndef Py_GIL_DISABLED +#ifdef _Py_OPAQUE_PYOBJECT + /* PyObject is opaque */ +#elif !defined(Py_GIL_DISABLED) struct _object { #if (defined(__GNUC__) || defined(__clang__)) \ && !(defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L) @@ -168,15 +177,18 @@ struct _object { Py_ssize_t ob_ref_shared; // shared (atomic) reference count PyTypeObject *ob_type; }; -#endif +#endif // !defined(_Py_OPAQUE_PYOBJECT) /* Cast argument to PyObject* type. */ #define _PyObject_CAST(op) _Py_CAST(PyObject*, (op)) -typedef struct { +#ifndef _Py_OPAQUE_PYOBJECT +struct PyVarObject { PyObject ob_base; Py_ssize_t ob_size; /* Number of items in variable part */ -} PyVarObject; +}; +#endif +typedef struct PyVarObject PyVarObject; /* Cast argument to PyVarObject* type. */ #define _PyVarObject_CAST(op) _Py_CAST(PyVarObject*, (op)) @@ -286,6 +298,7 @@ PyAPI_FUNC(PyTypeObject*) Py_TYPE(PyObject *ob); PyAPI_DATA(PyTypeObject) PyLong_Type; PyAPI_DATA(PyTypeObject) PyBool_Type; +#ifndef _Py_OPAQUE_PYOBJECT // bpo-39573: The Py_SET_SIZE() function must be used to set an object size. static inline Py_ssize_t Py_SIZE(PyObject *ob) { assert(Py_TYPE(ob) != &PyLong_Type); @@ -295,6 +308,7 @@ static inline Py_ssize_t Py_SIZE(PyObject *ob) { #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_SIZE(ob) Py_SIZE(_PyObject_CAST(ob)) #endif +#endif // !defined(_Py_OPAQUE_PYOBJECT) static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { return Py_TYPE(ob) == type; @@ -304,6 +318,7 @@ static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { #endif +#ifndef _Py_OPAQUE_PYOBJECT static inline void Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { ob->ob_type = type; } @@ -323,6 +338,7 @@ static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_SET_SIZE(ob, size) Py_SET_SIZE(_PyVarObject_CAST(ob), (size)) #endif +#endif // !defined(_Py_OPAQUE_PYOBJECT) /* diff --git a/Include/refcount.h b/Include/refcount.h index 457972b6dcfd9f..ba34461fefcbb0 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -117,6 +117,7 @@ PyAPI_FUNC(Py_ssize_t) Py_REFCNT(PyObject *ob); #endif #endif +#ifndef _Py_OPAQUE_PYOBJECT static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) { #if defined(Py_GIL_DISABLED) @@ -140,6 +141,7 @@ static inline Py_ALWAYS_INLINE int _Py_IsStaticImmortal(PyObject *op) #endif } #define _Py_IsStaticImmortal(op) _Py_IsStaticImmortal(_PyObject_CAST(op)) +#endif // !defined(_Py_OPAQUE_PYOBJECT) // Py_SET_REFCNT() implementation for stable ABI PyAPI_FUNC(void) _Py_SetRefcnt(PyObject *ob, Py_ssize_t refcnt); diff --git a/Lib/test/test_cext/__init__.py b/Lib/test/test_cext/__init__.py index 93e7b2043d397a..fb93c6ccbb637d 100644 --- a/Lib/test/test_cext/__init__.py +++ b/Lib/test/test_cext/__init__.py @@ -12,7 +12,10 @@ from test import support -SOURCE = os.path.join(os.path.dirname(__file__), 'extension.c') +SOURCES = [ + os.path.join(os.path.dirname(__file__), 'extension.c'), + os.path.join(os.path.dirname(__file__), 'create_moduledef.c'), +] SETUP = os.path.join(os.path.dirname(__file__), 'setup.py') @@ -35,17 +38,22 @@ class BaseTests: def test_build(self): self.check_build('_test_cext') - def check_build(self, extension_name, std=None, limited=False): + def check_build(self, extension_name, std=None, limited=False, + opaque_pyobject=False): venv_dir = 'env' with support.setup_venv_with_pip_setuptools(venv_dir) as python_exe: self._check_build(extension_name, python_exe, - std=std, limited=limited) + std=std, limited=limited, + opaque_pyobject=opaque_pyobject) - def _check_build(self, extension_name, python_exe, std, limited): + def _check_build(self, extension_name, python_exe, std, limited, + opaque_pyobject): pkg_dir = 'pkg' os.mkdir(pkg_dir) shutil.copy(SETUP, os.path.join(pkg_dir, os.path.basename(SETUP))) - shutil.copy(SOURCE, os.path.join(pkg_dir, os.path.basename(SOURCE))) + for source in SOURCES: + dest = os.path.join(pkg_dir, os.path.basename(source)) + shutil.copy(source, dest) def run_cmd(operation, cmd): env = os.environ.copy() @@ -53,6 +61,8 @@ def run_cmd(operation, cmd): env['CPYTHON_TEST_STD'] = std if limited: env['CPYTHON_TEST_LIMITED'] = '1' + if opaque_pyobject: + env['CPYTHON_TEST_OPAQUE_PYOBJECT'] = '1' env['CPYTHON_TEST_EXT_NAME'] = extension_name env['TEST_INTERNAL_C_API'] = str(int(self.TEST_INTERNAL_C_API)) if support.verbose: @@ -107,6 +117,11 @@ def test_build_limited_c11(self): def test_build_c11(self): self.check_build('_test_c11_cext', std='c11') + def test_build_opaque_pyobject(self): + # Test with _Py_OPAQUE_PYOBJECT + self.check_build('_test_limited_opaque_cext', limited=True, + opaque_pyobject=True) + @unittest.skipIf(support.MS_WINDOWS, "MSVC doesn't support /std:c99") def test_build_c99(self): # In public docs, we say C API is compatible with C11. However, diff --git a/Lib/test/test_cext/create_moduledef.c b/Lib/test/test_cext/create_moduledef.c new file mode 100644 index 00000000000000..249c3163552c03 --- /dev/null +++ b/Lib/test/test_cext/create_moduledef.c @@ -0,0 +1,29 @@ +// Workaround for testing _Py_OPAQUE_PYOBJECT. +// See end of 'extension.c' + + +#undef _Py_OPAQUE_PYOBJECT +#undef Py_LIMITED_API +#include "Python.h" + + +// (repeated definition to avoid creating a header) +extern PyObject *testcext_create_moduledef( + const char *name, const char *doc, + PyMethodDef *methods, PyModuleDef_Slot *slots); + +PyObject *testcext_create_moduledef( + const char *name, const char *doc, + PyMethodDef *methods, PyModuleDef_Slot *slots) { + + static struct PyModuleDef _testcext_module = { + PyModuleDef_HEAD_INIT, + }; + if (!_testcext_module.m_name) { + _testcext_module.m_name = name; + _testcext_module.m_doc = doc; + _testcext_module.m_methods = methods; + _testcext_module.m_slots = slots; + } + return PyModuleDef_Init(&_testcext_module); +} diff --git a/Lib/test/test_cext/extension.c b/Lib/test/test_cext/extension.c index 4be2f24c60d44b..73fc67ae59d18f 100644 --- a/Lib/test/test_cext/extension.c +++ b/Lib/test/test_cext/extension.c @@ -78,6 +78,9 @@ _testcext_exec( return 0; } +#define _FUNC_NAME(NAME) PyInit_ ## NAME +#define FUNC_NAME(NAME) _FUNC_NAME(NAME) + // Converting from function pointer to void* has undefined behavior, but // works on all known platforms, and CPython's module and type slots currently // need it. @@ -96,9 +99,10 @@ static PyModuleDef_Slot _testcext_slots[] = { _Py_COMP_DIAG_POP - PyDoc_STRVAR(_testcext_doc, "C test extension."); +#ifndef _Py_OPAQUE_PYOBJECT + static struct PyModuleDef _testcext_module = { PyModuleDef_HEAD_INIT, // m_base STR(MODULE_NAME), // m_name @@ -112,11 +116,30 @@ static struct PyModuleDef _testcext_module = { }; -#define _FUNC_NAME(NAME) PyInit_ ## NAME -#define FUNC_NAME(NAME) _FUNC_NAME(NAME) - PyMODINIT_FUNC FUNC_NAME(MODULE_NAME)(void) { return PyModuleDef_Init(&_testcext_module); } + +#else // _Py_OPAQUE_PYOBJECT + +// Opaque PyObject means that PyModuleDef is also opaque and cannot be +// declared statically. See PEP 793. +// So, this part of module creation is split into a separate source file +// which uses non-limited API. + +// (repeated definition to avoid creating a header) +extern PyObject *testcext_create_moduledef( + const char *name, const char *doc, + PyMethodDef *methods, PyModuleDef_Slot *slots); + + +PyMODINIT_FUNC +FUNC_NAME(MODULE_NAME)(void) +{ + return testcext_create_moduledef( + STR(MODULE_NAME), _testcext_doc, _testcext_methods, _testcext_slots); +} + +#endif // _Py_OPAQUE_PYOBJECT diff --git a/Lib/test/test_cext/setup.py b/Lib/test/test_cext/setup.py index 587585e8086e92..4d71e4751f7afd 100644 --- a/Lib/test/test_cext/setup.py +++ b/Lib/test/test_cext/setup.py @@ -59,8 +59,11 @@ def main(): std = os.environ.get("CPYTHON_TEST_STD", "") module_name = os.environ["CPYTHON_TEST_EXT_NAME"] limited = bool(os.environ.get("CPYTHON_TEST_LIMITED", "")) + opaque_pyobject = bool(os.environ.get("CPYTHON_TEST_OPAQUE_PYOBJECT", "")) internal = bool(int(os.environ.get("TEST_INTERNAL_C_API", "0"))) + sources = [SOURCE] + if not internal: cflags = list(PUBLIC_CFLAGS) else: @@ -93,6 +96,11 @@ def main(): version = sys.hexversion cflags.append(f'-DPy_LIMITED_API={version:#x}') + # Define _Py_OPAQUE_PYOBJECT macro + if opaque_pyobject: + cflags.append(f'-D_Py_OPAQUE_PYOBJECT') + sources.append('create_moduledef.c') + if internal: cflags.append('-DTEST_INTERNAL_C_API=1') @@ -120,7 +128,7 @@ def main(): ext = Extension( module_name, - sources=[SOURCE], + sources=sources, extra_compile_args=cflags, include_dirs=include_dirs, library_dirs=library_dirs) From 83d04a29a64eedc55d0a8d93aaae43d6069729e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:02:27 +0200 Subject: [PATCH 861/989] gh-136565: Improve and amend `hashlib.__doc__` (#136566) --- Lib/hashlib.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 0e9bd98aa1fc31..6d4f84a5a6e9ff 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -2,7 +2,7 @@ # Licensed to PSF under a Contributor Agreement. # -__doc__ = """hashlib module - A common interface to many hash functions. +__doc__ = r"""hashlib module - A common interface to many hash functions. new(name, data=b'', **kwargs) - returns a new hash object implementing the given hash function; initializing the hash @@ -12,7 +12,7 @@ than using new(name): md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(), -sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256. +sha3_224(), sha3_256(), sha3_384(), sha3_512(), shake_128(), and shake_256(). More algorithms may be available on your platform but the above are guaranteed to exist. See the algorithms_guaranteed and algorithms_available attributes @@ -21,8 +21,8 @@ NOTE: If you want the adler32 or crc32 hash functions they are available in the zlib module. -Choose your hash function wisely. Some have known collision weaknesses. -sha384 and sha512 will be slow on 32 bit platforms. +Choose your hash function wisely. Some have known collision weaknesses, +while others may be slower depending on the CPU architecture. Hash objects have these methods: - update(data): Update the hash object with the bytes in data. Repeated calls @@ -36,20 +36,20 @@ efficiently compute the digests of data that share a common initial substring. -For example, to obtain the digest of the byte string 'Nobody inspects the -spammish repetition': +Assuming that Python has been built with MD5 support, the following computes +the MD5 digest of the byte string b'Nobody inspects the spammish repetition': >>> import hashlib >>> m = hashlib.md5() >>> m.update(b"Nobody inspects") >>> m.update(b" the spammish repetition") >>> m.digest() - b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9' + b'\xbbd\x9c\x83\xdd\x1e\xa5\xc9\xd9\xde\xc9\xa1\x8d\xf0\xff\xe9' More condensed: - >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest() - 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' + >>> hashlib.md5(b"Nobody inspects the spammish repetition").hexdigest() + 'bb649c83dd1ea5c9d9dec9a18df0ffe9' """ @@ -203,7 +203,7 @@ def file_digest(fileobj, digest, /, *, _bufsize=2**18): *digest* must either be a hash algorithm name as a *str*, a hash constructor, or a callable that returns a hash object. """ - # On Linux we could use AF_ALG sockets and sendfile() to archive zero-copy + # On Linux we could use AF_ALG sockets and sendfile() to achieve zero-copy # hashing with hardware acceleration. if isinstance(digest, str): digestobj = new(digest) From 2301cdb5596ebc8ded56b948c92c1f5fd29ba92a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:31:10 +0200 Subject: [PATCH 862/989] gh-135853: add `math.fmax` and `math.fmin` (#135888) --- Doc/library/math.rst | 26 +++++ Doc/whatsnew/3.15.rst | 2 +- Lib/test/test_math.py | 87 ++++++++++++++ ...-06-24-13-30-47.gh-issue-135853.7ejTvK.rst | 2 + Modules/clinic/mathmodule.c.h | 108 +++++++++++++++++- Modules/mathmodule.c | 36 ++++++ 6 files changed, 259 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-24-13-30-47.gh-issue-135853.7ejTvK.rst diff --git a/Doc/library/math.rst b/Doc/library/math.rst index bf7a00549fc6e6..55f2de07553d56 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -42,6 +42,8 @@ noted otherwise, all return values are floats. :func:`fabs(x) ` Absolute value of *x* :func:`floor(x) ` Floor of *x*, the largest integer less than or equal to *x* :func:`fma(x, y, z) ` Fused multiply-add operation: ``(x * y) + z`` +:func:`fmax(x, y) ` Maximum of two floating-point values +:func:`fmin(x, y) ` Minimum of two floating-point values :func:`fmod(x, y) ` Remainder of division ``x / y`` :func:`modf(x) ` Fractional and integer parts of *x* :func:`remainder(x, y) ` Remainder of *x* with respect to *y* @@ -248,6 +250,30 @@ Floating point arithmetic .. versionadded:: 3.13 +.. function:: fmax(x, y) + + Get the larger of two floating-point values, treating NaNs as missing data. + + When both operands are (signed) NaNs or zeroes, return ``nan`` and ``0`` + respectively and the sign of the result is implementation-defined, that + is, :func:`!fmax` is not required to be sensitive to the sign of such + operands (see Annex F of the C11 standard, §F.10.0.3 and §F.10.9.2). + + .. versionadded:: next + + +.. function:: fmin(x, y) + + Get the smaller of two floating-point values, treating NaNs as missing data. + + When both operands are (signed) NaNs or zeroes, return ``nan`` and ``0`` + respectively and the sign of the result is implementation-defined, that + is, :func:`!fmin` is not required to be sensitive to the sign of such + operands (see Annex F of the C11 standard, §F.10.0.3 and §F.10.9.3). + + .. versionadded:: next + + .. function:: fmod(x, y) Return the floating-point remainder of ``x / y``, diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index e9b88458acdd79..010abb7d9b9278 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -212,7 +212,7 @@ math * Add :func:`math.isnormal` and :func:`math.issubnormal` functions. (Contributed by Sergey B Kirpichev in :gh:`132908`.) -* Add :func:`math.signbit` function. +* Add :func:`math.fmax`, :func:`math.fmin` and :func:`math.signbit` functions. (Contributed by Bénédikt Tran in :gh:`135853`.) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 46cb54647b1968..e3b0d4fa9eeeb3 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -17,6 +17,7 @@ eps = 1E-05 NAN = float('nan') +NNAN = float('-nan') INF = float('inf') NINF = float('-inf') FLOAT_MAX = sys.float_info.max @@ -636,6 +637,92 @@ def testFmod(self): self.assertEqual(math.fmod(0.0, NINF), 0.0) self.assertRaises(ValueError, math.fmod, INF, INF) + def test_fmax(self): + self.assertRaises(TypeError, math.fmax) + self.assertRaises(TypeError, math.fmax, 'x', 'y') + + self.assertEqual(math.fmax(0., 0.), 0.) + self.assertEqual(math.fmax(1., 2.), 2.) + self.assertEqual(math.fmax(2., 1.), 2.) + + self.assertEqual(math.fmax(+1., +0.), 1.) + self.assertEqual(math.fmax(+0., +1.), 1.) + self.assertEqual(math.fmax(+1., -0.), 1.) + self.assertEqual(math.fmax(-0., +1.), 1.) + + self.assertEqual(math.fmax(-1., +0.), 0.) + self.assertEqual(math.fmax(+0., -1.), 0.) + self.assertEqual(math.fmax(-1., -0.), 0.) + self.assertEqual(math.fmax(-0., -1.), 0.) + + for x in [NINF, -1., -0., 0., 1., INF]: + self.assertFalse(math.isnan(x)) + + with self.subTest(x=x, is_negative=math.copysign(1, x) < 0): + self.assertEqual(math.fmax(INF, x), INF) + self.assertEqual(math.fmax(x, INF), INF) + self.assertEqual(math.fmax(NINF, x), x) + self.assertEqual(math.fmax(x, NINF), x) + + @requires_IEEE_754 + def test_fmax_nans(self): + # When exactly one operand is NaN, the other is returned. + for x in [NINF, -1., -0., 0., 1., INF]: + with self.subTest(x=x, is_negative=math.copysign(1, x) < 0): + self.assertFalse(math.isnan(math.fmax(NAN, x))) + self.assertFalse(math.isnan(math.fmax(x, NAN))) + self.assertFalse(math.isnan(math.fmax(NNAN, x))) + self.assertFalse(math.isnan(math.fmax(x, NNAN))) + # When both operands are NaNs, fmax() returns NaN (see C11, F.10.9.2) + # whose sign is implementation-defined (see C11, F.10.0.3). + self.assertTrue(math.isnan(math.fmax(NAN, NAN))) + self.assertTrue(math.isnan(math.fmax(NNAN, NNAN))) + self.assertTrue(math.isnan(math.fmax(NAN, NNAN))) + self.assertTrue(math.isnan(math.fmax(NNAN, NAN))) + + def test_fmin(self): + self.assertRaises(TypeError, math.fmin) + self.assertRaises(TypeError, math.fmin, 'x', 'y') + + self.assertEqual(math.fmin(0., 0.), 0.) + self.assertEqual(math.fmin(1., 2.), 1.) + self.assertEqual(math.fmin(2., 1.), 1.) + + self.assertEqual(math.fmin(+1., +0.), 0.) + self.assertEqual(math.fmin(+0., +1.), 0.) + self.assertEqual(math.fmin(+1., -0.), 0.) + self.assertEqual(math.fmin(-0., +1.), 0.) + + self.assertEqual(math.fmin(-1., +0.), -1.) + self.assertEqual(math.fmin(+0., -1.), -1.) + self.assertEqual(math.fmin(-1., -0.), -1.) + self.assertEqual(math.fmin(-0., -1.), -1.) + + for x in [NINF, -1., -0., 0., 1., INF]: + self.assertFalse(math.isnan(x)) + + with self.subTest(x=x, is_negative=math.copysign(1, x) < 0): + self.assertEqual(math.fmin(INF, x), x) + self.assertEqual(math.fmin(x, INF), x) + self.assertEqual(math.fmin(NINF, x), NINF) + self.assertEqual(math.fmin(x, NINF), NINF) + + @requires_IEEE_754 + def test_fmin_nans(self): + # When exactly one operand is NaN, the other is returned. + for x in [NINF, -1., -0., 0., 1., INF]: + with self.subTest(x=x, is_negative=math.copysign(1, x) < 0): + self.assertFalse(math.isnan(math.fmin(NAN, x))) + self.assertFalse(math.isnan(math.fmin(x, NAN))) + self.assertFalse(math.isnan(math.fmin(NNAN, x))) + self.assertFalse(math.isnan(math.fmin(x, NNAN))) + # When both operands are NaNs, fmin() returns NaN (see C11, F.10.9.3) + # whose sign is implementation-defined (see C11, F.10.0.3). + self.assertTrue(math.isnan(math.fmin(NAN, NAN))) + self.assertTrue(math.isnan(math.fmin(NNAN, NNAN))) + self.assertTrue(math.isnan(math.fmin(NAN, NNAN))) + self.assertTrue(math.isnan(math.fmin(NNAN, NAN))) + def testFrexp(self): self.assertRaises(TypeError, math.frexp) diff --git a/Misc/NEWS.d/next/Library/2025-06-24-13-30-47.gh-issue-135853.7ejTvK.rst b/Misc/NEWS.d/next/Library/2025-06-24-13-30-47.gh-issue-135853.7ejTvK.rst new file mode 100644 index 00000000000000..240ea72c69fa6c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-24-13-30-47.gh-issue-135853.7ejTvK.rst @@ -0,0 +1,2 @@ +Add :func:`math.fmax` and :func:`math.fmin` to get the larger and smaller of +two floating-point values. Patch by Bénédikt Tran. diff --git a/Modules/clinic/mathmodule.c.h b/Modules/clinic/mathmodule.c.h index a443c48faaa88a..246019f2206028 100644 --- a/Modules/clinic/mathmodule.c.h +++ b/Modules/clinic/mathmodule.c.h @@ -84,6 +84,112 @@ PyDoc_STRVAR(math_floor__doc__, #define MATH_FLOOR_METHODDEF \ {"floor", (PyCFunction)math_floor, METH_O, math_floor__doc__}, +PyDoc_STRVAR(math_fmax__doc__, +"fmax($module, x, y, /)\n" +"--\n" +"\n" +"Return the larger of two floating-point arguments."); + +#define MATH_FMAX_METHODDEF \ + {"fmax", _PyCFunction_CAST(math_fmax), METH_FASTCALL, math_fmax__doc__}, + +static double +math_fmax_impl(PyObject *module, double x, double y); + +static PyObject * +math_fmax(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + double x; + double y; + double _return_value; + + if (!_PyArg_CheckPositional("fmax", nargs, 2, 2)) { + goto exit; + } + if (PyFloat_CheckExact(args[0])) { + x = PyFloat_AS_DOUBLE(args[0]); + } + else + { + x = PyFloat_AsDouble(args[0]); + if (x == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + if (PyFloat_CheckExact(args[1])) { + y = PyFloat_AS_DOUBLE(args[1]); + } + else + { + y = PyFloat_AsDouble(args[1]); + if (y == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + _return_value = math_fmax_impl(module, x, y); + if ((_return_value == -1.0) && PyErr_Occurred()) { + goto exit; + } + return_value = PyFloat_FromDouble(_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(math_fmin__doc__, +"fmin($module, x, y, /)\n" +"--\n" +"\n" +"Return the smaller of two floating-point arguments."); + +#define MATH_FMIN_METHODDEF \ + {"fmin", _PyCFunction_CAST(math_fmin), METH_FASTCALL, math_fmin__doc__}, + +static double +math_fmin_impl(PyObject *module, double x, double y); + +static PyObject * +math_fmin(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + double x; + double y; + double _return_value; + + if (!_PyArg_CheckPositional("fmin", nargs, 2, 2)) { + goto exit; + } + if (PyFloat_CheckExact(args[0])) { + x = PyFloat_AS_DOUBLE(args[0]); + } + else + { + x = PyFloat_AsDouble(args[0]); + if (x == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + if (PyFloat_CheckExact(args[1])) { + y = PyFloat_AS_DOUBLE(args[1]); + } + else + { + y = PyFloat_AsDouble(args[1]); + if (y == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + _return_value = math_fmin_impl(module, x, y); + if ((_return_value == -1.0) && PyErr_Occurred()) { + goto exit; + } + return_value = PyFloat_FromDouble(_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(math_signbit__doc__, "signbit($module, x, /)\n" "--\n" @@ -1212,4 +1318,4 @@ math_ulp(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=4e3fa94d026f027b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=4fb180d4c25ff8fa input=a9049054013a1b77]*/ diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 033de0b2907a69..7c2a421dd6a450 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -1214,6 +1214,40 @@ math_floor(PyObject *module, PyObject *number) return PyLong_FromDouble(floor(x)); } +/*[clinic input] +math.fmax -> double + + x: double + y: double + / + +Return the larger of two floating-point arguments. +[clinic start generated code]*/ + +static double +math_fmax_impl(PyObject *module, double x, double y) +/*[clinic end generated code: output=00692358d312fee2 input=021596c027336ffe]*/ +{ + return fmax(x, y); +} + +/*[clinic input] +math.fmin -> double + + x: double + y: double + / + +Return the smaller of two floating-point arguments. +[clinic start generated code]*/ + +static double +math_fmin_impl(PyObject *module, double x, double y) +/*[clinic end generated code: output=3d5b7826bd292dd9 input=d12e64ccc33f878a]*/ +{ + return fmin(x, y); +} + FUNC1AD(gamma, m_tgamma, "gamma($module, x, /)\n--\n\n" "Gamma function at x.", @@ -4192,7 +4226,9 @@ static PyMethodDef math_methods[] = { MATH_FACTORIAL_METHODDEF MATH_FLOOR_METHODDEF MATH_FMA_METHODDEF + MATH_FMAX_METHODDEF MATH_FMOD_METHODDEF + MATH_FMIN_METHODDEF MATH_FREXP_METHODDEF MATH_FSUM_METHODDEF {"gamma", math_gamma, METH_O, math_gamma_doc}, From c564847e98db462edfc30a971da061eeb775e475 Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Sat, 12 Jul 2025 20:06:15 +0800 Subject: [PATCH 863/989] gh-89083: Add CLI tests for `UUIDv{6,7,8}` (#136548) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_uuid.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 7ddacf07a2c789..0e1a723ce3a151 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1140,6 +1140,23 @@ def test_uuid_weakref(self): weak = weakref.ref(strong) self.assertIs(strong, weak()) + +class CommandLineTestCases: + uuid = None # to be defined in subclasses + + def do_test_standalone_uuid(self, version): + stdout = io.StringIO() + with contextlib.redirect_stdout(stdout): + self.uuid.main() + output = stdout.getvalue().strip() + u = self.uuid.UUID(output) + self.assertEqual(output, str(u)) + self.assertEqual(u.version, version) + + @mock.patch.object(sys, "argv", ["", "-u", "uuid1"]) + def test_cli_uuid1(self): + self.do_test_standalone_uuid(1) + @mock.patch.object(sys, "argv", ["", "-u", "uuid3", "-n", "@dns"]) @mock.patch('sys.stderr', new_callable=io.StringIO) def test_cli_namespace_required_for_uuid3(self, mock_err): @@ -1214,13 +1231,25 @@ def test_cli_uuid5_ouputted_with_valid_namespace_and_name(self): self.assertEqual(output, str(uuid_output)) self.assertEqual(uuid_output.version, 5) + @mock.patch.object(sys, "argv", ["", "-u", "uuid6"]) + def test_cli_uuid6(self): + self.do_test_standalone_uuid(6) + + @mock.patch.object(sys, "argv", ["", "-u", "uuid7"]) + def test_cli_uuid7(self): + self.do_test_standalone_uuid(7) + + @mock.patch.object(sys, "argv", ["", "-u", "uuid8"]) + def test_cli_uuid8(self): + self.do_test_standalone_uuid(8) + -class TestUUIDWithoutExtModule(BaseTestUUID, unittest.TestCase): +class TestUUIDWithoutExtModule(CommandLineTestCases, BaseTestUUID, unittest.TestCase): uuid = py_uuid @unittest.skipUnless(c_uuid, 'requires the C _uuid module') -class TestUUIDWithExtModule(BaseTestUUID, unittest.TestCase): +class TestUUIDWithExtModule(CommandLineTestCases, BaseTestUUID, unittest.TestCase): uuid = c_uuid def check_has_stable_libuuid_extractable_node(self): From dda70fa77125f189be038e2883b28ec4ea2d6e73 Mon Sep 17 00:00:00 2001 From: Furkan Onder Date: Sat, 12 Jul 2025 15:27:32 +0300 Subject: [PATCH 864/989] gh-99631: Add custom `loads` and `dumps` support for the `shelve` module (#118065) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Pieter Eendebak Co-authored-by: Petr Viktorin Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/shelve.rst | 70 +++++- Lib/shelve.py | 64 +++-- Lib/test/test_shelve.py | 236 +++++++++++++++++- ...4-07-16-00-01-04.gh-issue-99631.GWD4fD.rst | 2 + 4 files changed, 336 insertions(+), 36 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-16-00-01-04.gh-issue-99631.GWD4fD.rst diff --git a/Doc/library/shelve.rst b/Doc/library/shelve.rst index 23a2e0c3d0c758..23808619524056 100644 --- a/Doc/library/shelve.rst +++ b/Doc/library/shelve.rst @@ -17,7 +17,8 @@ This includes most class instances, recursive data types, and objects containing lots of shared sub-objects. The keys are ordinary strings. -.. function:: open(filename, flag='c', protocol=None, writeback=False) +.. function:: open(filename, flag='c', protocol=None, writeback=False, *, \ + serializer=None, deserializer=None) Open a persistent dictionary. The filename specified is the base filename for the underlying database. As a side-effect, an extension may be added to the @@ -41,6 +42,21 @@ lots of shared sub-objects. The keys are ordinary strings. determine which accessed entries are mutable, nor which ones were actually mutated). + By default, :mod:`shelve` uses :func:`pickle.dumps` and :func:`pickle.loads` + for serializing and deserializing. This can be changed by supplying + *serializer* and *deserializer*, respectively. + + The *serializer* argument must be a callable which takes an object ``obj`` + and the *protocol* as inputs and returns the representation ``obj`` as a + :term:`bytes-like object`; the *protocol* value may be ignored by the + serializer. + + The *deserializer* argument must be callable which takes a serialized object + given as a :class:`bytes` object and returns the corresponding object. + + A :exc:`ShelveError` is raised if *serializer* is given but *deserializer* + is not, or vice-versa. + .. versionchanged:: 3.10 :const:`pickle.DEFAULT_PROTOCOL` is now used as the default pickle protocol. @@ -48,6 +64,10 @@ lots of shared sub-objects. The keys are ordinary strings. .. versionchanged:: 3.11 Accepts :term:`path-like object` for filename. + .. versionchanged:: next + Accepts custom *serializer* and *deserializer* functions in place of + :func:`pickle.dumps` and :func:`pickle.loads`. + .. note:: Do not rely on the shelf being closed automatically; always call @@ -129,7 +149,8 @@ Restrictions explicitly. -.. class:: Shelf(dict, protocol=None, writeback=False, keyencoding='utf-8') +.. class:: Shelf(dict, protocol=None, writeback=False, \ + keyencoding='utf-8', *, serializer=None, deserializer=None) A subclass of :class:`collections.abc.MutableMapping` which stores pickled values in the *dict* object. @@ -147,6 +168,9 @@ Restrictions The *keyencoding* parameter is the encoding used to encode keys before they are used with the underlying dict. + The *serializer* and *deserializer* parameters have the same interpretation + as in :func:`~shelve.open`. + A :class:`Shelf` object can also be used as a context manager, in which case it will be automatically closed when the :keyword:`with` block ends. @@ -161,8 +185,13 @@ Restrictions :const:`pickle.DEFAULT_PROTOCOL` is now used as the default pickle protocol. + .. versionchanged:: next + Added the *serializer* and *deserializer* parameters. -.. class:: BsdDbShelf(dict, protocol=None, writeback=False, keyencoding='utf-8') + +.. class:: BsdDbShelf(dict, protocol=None, writeback=False, \ + keyencoding='utf-8', *, \ + serializer=None, deserializer=None) A subclass of :class:`Shelf` which exposes :meth:`!first`, :meth:`!next`, :meth:`!previous`, :meth:`!last` and :meth:`!set_location` methods. @@ -172,18 +201,27 @@ Restrictions modules. The *dict* object passed to the constructor must support those methods. This is generally accomplished by calling one of :func:`!bsddb.hashopen`, :func:`!bsddb.btopen` or :func:`!bsddb.rnopen`. The - optional *protocol*, *writeback*, and *keyencoding* parameters have the same - interpretation as for the :class:`Shelf` class. + optional *protocol*, *writeback*, *keyencoding*, *serializer* and *deserializer* + parameters have the same interpretation as in :func:`~shelve.open`. + + .. versionchanged:: next + Added the *serializer* and *deserializer* parameters. -.. class:: DbfilenameShelf(filename, flag='c', protocol=None, writeback=False) +.. class:: DbfilenameShelf(filename, flag='c', protocol=None, \ + writeback=False, *, serializer=None, \ + deserializer=None) A subclass of :class:`Shelf` which accepts a *filename* instead of a dict-like object. The underlying file will be opened using :func:`dbm.open`. By default, the file will be created and opened for both read and write. The - optional *flag* parameter has the same interpretation as for the :func:`.open` - function. The optional *protocol* and *writeback* parameters have the same - interpretation as for the :class:`Shelf` class. + optional *flag* parameter has the same interpretation as for the + :func:`.open` function. The optional *protocol*, *writeback*, *serializer* + and *deserializer* parameters have the same interpretation as in + :func:`~shelve.open`. + + .. versionchanged:: next + Added the *serializer* and *deserializer* parameters. .. _shelve-example: @@ -225,6 +263,20 @@ object):: d.close() # close it +Exceptions +---------- + +.. exception:: ShelveError + + Exception raised when one of the arguments *deserializer* and *serializer* + is missing in the :func:`~shelve.open`, :class:`Shelf`, :class:`BsdDbShelf` + and :class:`DbfilenameShelf`. + + The *deserializer* and *serializer* arguments must be given together. + + .. versionadded:: next + + .. seealso:: Module :mod:`dbm` diff --git a/Lib/shelve.py b/Lib/shelve.py index b53dc8b7a8ece9..1010be1e09d702 100644 --- a/Lib/shelve.py +++ b/Lib/shelve.py @@ -56,12 +56,17 @@ the persistent dictionary on disk, if feasible). """ -from pickle import DEFAULT_PROTOCOL, Pickler, Unpickler +from pickle import DEFAULT_PROTOCOL, dumps, loads from io import BytesIO import collections.abc -__all__ = ["Shelf", "BsdDbShelf", "DbfilenameShelf", "open"] +__all__ = ["ShelveError", "Shelf", "BsdDbShelf", "DbfilenameShelf", "open"] + + +class ShelveError(Exception): + pass + class _ClosedDict(collections.abc.MutableMapping): 'Marker for a closed dict. Access attempts raise a ValueError.' @@ -82,7 +87,7 @@ class Shelf(collections.abc.MutableMapping): """ def __init__(self, dict, protocol=None, writeback=False, - keyencoding="utf-8"): + keyencoding="utf-8", *, serializer=None, deserializer=None): self.dict = dict if protocol is None: protocol = DEFAULT_PROTOCOL @@ -91,6 +96,16 @@ def __init__(self, dict, protocol=None, writeback=False, self.cache = {} self.keyencoding = keyencoding + if serializer is None and deserializer is None: + self.serializer = dumps + self.deserializer = loads + elif (serializer is None) ^ (deserializer is None): + raise ShelveError("serializer and deserializer must be " + "defined together") + else: + self.serializer = serializer + self.deserializer = deserializer + def __iter__(self): for k in self.dict.keys(): yield k.decode(self.keyencoding) @@ -110,8 +125,8 @@ def __getitem__(self, key): try: value = self.cache[key] except KeyError: - f = BytesIO(self.dict[key.encode(self.keyencoding)]) - value = Unpickler(f).load() + f = self.dict[key.encode(self.keyencoding)] + value = self.deserializer(f) if self.writeback: self.cache[key] = value return value @@ -119,10 +134,8 @@ def __getitem__(self, key): def __setitem__(self, key, value): if self.writeback: self.cache[key] = value - f = BytesIO() - p = Pickler(f, self._protocol) - p.dump(value) - self.dict[key.encode(self.keyencoding)] = f.getvalue() + serialized_value = self.serializer(value, self._protocol) + self.dict[key.encode(self.keyencoding)] = serialized_value def __delitem__(self, key): del self.dict[key.encode(self.keyencoding)] @@ -191,33 +204,29 @@ class BsdDbShelf(Shelf): """ def __init__(self, dict, protocol=None, writeback=False, - keyencoding="utf-8"): - Shelf.__init__(self, dict, protocol, writeback, keyencoding) + keyencoding="utf-8", *, serializer=None, deserializer=None): + Shelf.__init__(self, dict, protocol, writeback, keyencoding, + serializer=serializer, deserializer=deserializer) def set_location(self, key): (key, value) = self.dict.set_location(key) - f = BytesIO(value) - return (key.decode(self.keyencoding), Unpickler(f).load()) + return (key.decode(self.keyencoding), self.deserializer(value)) def next(self): (key, value) = next(self.dict) - f = BytesIO(value) - return (key.decode(self.keyencoding), Unpickler(f).load()) + return (key.decode(self.keyencoding), self.deserializer(value)) def previous(self): (key, value) = self.dict.previous() - f = BytesIO(value) - return (key.decode(self.keyencoding), Unpickler(f).load()) + return (key.decode(self.keyencoding), self.deserializer(value)) def first(self): (key, value) = self.dict.first() - f = BytesIO(value) - return (key.decode(self.keyencoding), Unpickler(f).load()) + return (key.decode(self.keyencoding), self.deserializer(value)) def last(self): (key, value) = self.dict.last() - f = BytesIO(value) - return (key.decode(self.keyencoding), Unpickler(f).load()) + return (key.decode(self.keyencoding), self.deserializer(value)) class DbfilenameShelf(Shelf): @@ -227,9 +236,11 @@ class DbfilenameShelf(Shelf): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, filename, flag='c', protocol=None, writeback=False): + def __init__(self, filename, flag='c', protocol=None, writeback=False, *, + serializer=None, deserializer=None): import dbm - Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback) + Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback, + serializer=serializer, deserializer=deserializer) def clear(self): """Remove all items from the shelf.""" @@ -238,8 +249,8 @@ def clear(self): self.cache.clear() self.dict.clear() - -def open(filename, flag='c', protocol=None, writeback=False): +def open(filename, flag='c', protocol=None, writeback=False, *, + serializer=None, deserializer=None): """Open a persistent dictionary for reading and writing. The filename parameter is the base filename for the underlying @@ -252,4 +263,5 @@ def open(filename, flag='c', protocol=None, writeback=False): See the module's __doc__ string for an overview of the interface. """ - return DbfilenameShelf(filename, flag, protocol, writeback) + return DbfilenameShelf(filename, flag, protocol, writeback, + serializer=serializer, deserializer=deserializer) diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py index 08c6562f2a273e..64609ab9dd9a62 100644 --- a/Lib/test/test_shelve.py +++ b/Lib/test/test_shelve.py @@ -1,10 +1,11 @@ +import array import unittest import dbm import shelve import pickle import os -from test.support import os_helper +from test.support import import_helper, os_helper from collections.abc import MutableMapping from test.test_dbm import dbm_iterator @@ -165,6 +166,239 @@ def test_default_protocol(self): with shelve.Shelf({}) as s: self.assertEqual(s._protocol, pickle.DEFAULT_PROTOCOL) + def test_custom_serializer_and_deserializer(self): + os.mkdir(self.dirname) + self.addCleanup(os_helper.rmtree, self.dirname) + + def serializer(obj, protocol): + if isinstance(obj, (bytes, bytearray, str)): + if protocol == 5: + return obj + return type(obj).__name__ + elif isinstance(obj, array.array): + return obj.tobytes() + raise TypeError(f"Unsupported type for serialization: {type(obj)}") + + def deserializer(data): + if isinstance(data, (bytes, bytearray, str)): + return data.decode("utf-8") + elif isinstance(data, array.array): + return array.array("b", data) + raise TypeError( + f"Unsupported type for deserialization: {type(data)}" + ) + + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto), shelve.open( + self.fn, + protocol=proto, + serializer=serializer, + deserializer=deserializer + ) as s: + bar = "bar" + bytes_data = b"Hello, world!" + bytearray_data = bytearray(b"\x00\x01\x02\x03\x04") + array_data = array.array("i", [1, 2, 3, 4, 5]) + + s["foo"] = bar + s["bytes_data"] = bytes_data + s["bytearray_data"] = bytearray_data + s["array_data"] = array_data + + if proto == 5: + self.assertEqual(s["foo"], str(bar)) + self.assertEqual(s["bytes_data"], "Hello, world!") + self.assertEqual( + s["bytearray_data"], bytearray_data.decode() + ) + self.assertEqual( + s["array_data"], array_data.tobytes().decode() + ) + else: + self.assertEqual(s["foo"], "str") + self.assertEqual(s["bytes_data"], "bytes") + self.assertEqual(s["bytearray_data"], "bytearray") + self.assertEqual( + s["array_data"], array_data.tobytes().decode() + ) + + def test_custom_incomplete_serializer_and_deserializer(self): + dbm_sqlite3 = import_helper.import_module("dbm.sqlite3") + os.mkdir(self.dirname) + self.addCleanup(os_helper.rmtree, self.dirname) + + with self.assertRaises(dbm_sqlite3.error): + def serializer(obj, protocol=None): + pass + + def deserializer(data): + return data.decode("utf-8") + + with shelve.open(self.fn, serializer=serializer, + deserializer=deserializer) as s: + s["foo"] = "bar" + + def serializer(obj, protocol=None): + return type(obj).__name__.encode("utf-8") + + def deserializer(data): + pass + + with shelve.open(self.fn, serializer=serializer, + deserializer=deserializer) as s: + s["foo"] = "bar" + self.assertNotEqual(s["foo"], "bar") + self.assertIsNone(s["foo"]) + + def test_custom_serializer_and_deserializer_bsd_db_shelf(self): + berkeleydb = import_helper.import_module("berkeleydb") + os.mkdir(self.dirname) + self.addCleanup(os_helper.rmtree, self.dirname) + + def serializer(obj, protocol=None): + data = obj.__class__.__name__ + if protocol == 5: + data = str(len(data)) + return data.encode("utf-8") + + def deserializer(data): + return data.decode("utf-8") + + def type_name_len(obj): + return f"{(len(type(obj).__name__))}" + + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto), shelve.BsdDbShelf( + berkeleydb.btopen(self.fn), + protocol=proto, + serializer=serializer, + deserializer=deserializer, + ) as s: + bar = "bar" + bytes_obj = b"Hello, world!" + bytearray_obj = bytearray(b"\x00\x01\x02\x03\x04") + arr_obj = array.array("i", [1, 2, 3, 4, 5]) + + s["foo"] = bar + s["bytes_data"] = bytes_obj + s["bytearray_data"] = bytearray_obj + s["array_data"] = arr_obj + + if proto == 5: + self.assertEqual(s["foo"], type_name_len(bar)) + self.assertEqual(s["bytes_data"], type_name_len(bytes_obj)) + self.assertEqual(s["bytearray_data"], + type_name_len(bytearray_obj)) + self.assertEqual(s["array_data"], type_name_len(arr_obj)) + + k, v = s.set_location(b"foo") + self.assertEqual(k, "foo") + self.assertEqual(v, type_name_len(bar)) + + k, v = s.previous() + self.assertEqual(k, "bytes_data") + self.assertEqual(v, type_name_len(bytes_obj)) + + k, v = s.previous() + self.assertEqual(k, "bytearray_data") + self.assertEqual(v, type_name_len(bytearray_obj)) + + k, v = s.previous() + self.assertEqual(k, "array_data") + self.assertEqual(v, type_name_len(arr_obj)) + + k, v = s.next() + self.assertEqual(k, "bytearray_data") + self.assertEqual(v, type_name_len(bytearray_obj)) + + k, v = s.next() + self.assertEqual(k, "bytes_data") + self.assertEqual(v, type_name_len(bytes_obj)) + + k, v = s.first() + self.assertEqual(k, "array_data") + self.assertEqual(v, type_name_len(arr_obj)) + else: + k, v = s.set_location(b"foo") + self.assertEqual(k, "foo") + self.assertEqual(v, "str") + + k, v = s.previous() + self.assertEqual(k, "bytes_data") + self.assertEqual(v, "bytes") + + k, v = s.previous() + self.assertEqual(k, "bytearray_data") + self.assertEqual(v, "bytearray") + + k, v = s.previous() + self.assertEqual(k, "array_data") + self.assertEqual(v, "array") + + k, v = s.next() + self.assertEqual(k, "bytearray_data") + self.assertEqual(v, "bytearray") + + k, v = s.next() + self.assertEqual(k, "bytes_data") + self.assertEqual(v, "bytes") + + k, v = s.first() + self.assertEqual(k, "array_data") + self.assertEqual(v, "array") + + self.assertEqual(s["foo"], "str") + self.assertEqual(s["bytes_data"], "bytes") + self.assertEqual(s["bytearray_data"], "bytearray") + self.assertEqual(s["array_data"], "array") + + def test_custom_incomplete_serializer_and_deserializer_bsd_db_shelf(self): + berkeleydb = import_helper.import_module("berkeleydb") + os.mkdir(self.dirname) + self.addCleanup(os_helper.rmtree, self.dirname) + + def serializer(obj, protocol=None): + return type(obj).__name__.encode("utf-8") + + def deserializer(data): + pass + + with shelve.BsdDbShelf(berkeleydb.btopen(self.fn), + serializer=serializer, + deserializer=deserializer) as s: + s["foo"] = "bar" + self.assertIsNone(s["foo"]) + self.assertNotEqual(s["foo"], "bar") + + def serializer(obj, protocol=None): + pass + + def deserializer(data): + return data.decode("utf-8") + + with shelve.BsdDbShelf(berkeleydb.btopen(self.fn), + serializer=serializer, + deserializer=deserializer) as s: + s["foo"] = "bar" + self.assertEqual(s["foo"], "") + self.assertNotEqual(s["foo"], "bar") + + def test_missing_custom_deserializer(self): + def serializer(obj, protocol=None): + pass + + kwargs = dict(protocol=2, writeback=False, serializer=serializer) + self.assertRaises(shelve.ShelveError, shelve.Shelf, {}, **kwargs) + self.assertRaises(shelve.ShelveError, shelve.BsdDbShelf, {}, **kwargs) + + def test_missing_custom_serializer(self): + def deserializer(data): + pass + + kwargs = dict(protocol=2, writeback=False, deserializer=deserializer) + self.assertRaises(shelve.ShelveError, shelve.Shelf, {}, **kwargs) + self.assertRaises(shelve.ShelveError, shelve.BsdDbShelf, {}, **kwargs) + class TestShelveBase: type2test = shelve.Shelf diff --git a/Misc/NEWS.d/next/Library/2024-07-16-00-01-04.gh-issue-99631.GWD4fD.rst b/Misc/NEWS.d/next/Library/2024-07-16-00-01-04.gh-issue-99631.GWD4fD.rst new file mode 100644 index 00000000000000..735249b4dae224 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-16-00-01-04.gh-issue-99631.GWD4fD.rst @@ -0,0 +1,2 @@ +The :mod:`shelve` module now accepts custom serialization +and deserialization functions. From 5a20e7972505c6953e7c7f434f42d62cab3795f3 Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Sat, 12 Jul 2025 15:42:35 +0300 Subject: [PATCH 865/989] gh-99813: Start using `SSL_sendfile` when available (#99907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Peter Bierma --- Doc/library/ssl.rst | 10 +- Lib/socket.py | 143 ++++++------ Lib/ssl.py | 27 ++- Lib/test/test_ssl.py | 23 +- ...3-03-13-22-51-40.gh-issue-99813.40TV02.rst | 4 + Modules/_ssl.c | 208 ++++++++++++++++++ Modules/clinic/_ssl.c.h | 116 +++++++++- 7 files changed, 449 insertions(+), 82 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-13-22-51-40.gh-issue-99813.40TV02.rst diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst index ae2e324d0abaa4..a9930183f9a400 100644 --- a/Doc/library/ssl.rst +++ b/Doc/library/ssl.rst @@ -1078,8 +1078,9 @@ SSL Sockets (but passing a non-zero ``flags`` argument is not allowed) - :meth:`~socket.socket.send`, :meth:`~socket.socket.sendall` (with the same limitation) - - :meth:`~socket.socket.sendfile` (but :mod:`os.sendfile` will be used - for plain-text sockets only, else :meth:`~socket.socket.send` will be used) + - :meth:`~socket.socket.sendfile` (it may be high-performant only when + the kernel TLS is enabled by setting :data:`~ssl.OP_ENABLE_KTLS` or when a + socket is plain-text, else :meth:`~socket.socket.send` will be used) - :meth:`~socket.socket.shutdown` However, since the SSL (and TLS) protocol has its own framing atop @@ -1113,6 +1114,11 @@ SSL Sockets functions support reading and writing of data larger than 2 GB. Writing zero-length data no longer fails with a protocol violation error. + .. versionchanged:: next + Python now uses ``SSL_sendfile`` internally when possible. The + function sends a file more efficiently because it performs TLS encryption + in the kernel to avoid additional context switches. + SSL sockets also have the following additional methods and attributes: .. method:: SSLSocket.read(len=1024, buffer=None) diff --git a/Lib/socket.py b/Lib/socket.py index 727b0e75f03595..3073c012b19877 100644 --- a/Lib/socket.py +++ b/Lib/socket.py @@ -56,6 +56,7 @@ import os import sys from enum import IntEnum, IntFlag +from functools import partial try: import errno @@ -348,75 +349,83 @@ def makefile(self, mode="r", buffering=None, *, text.mode = mode return text - if hasattr(os, 'sendfile'): + def _sendfile_zerocopy(self, zerocopy_func, giveup_exc_type, file, + offset=0, count=None): + """ + Send a file using a zero-copy function. + """ + import selectors - def _sendfile_use_sendfile(self, file, offset=0, count=None): - # Lazy import to improve module import time - import selectors + self._check_sendfile_params(file, offset, count) + sockno = self.fileno() + try: + fileno = file.fileno() + except (AttributeError, io.UnsupportedOperation) as err: + raise giveup_exc_type(err) # not a regular file + try: + fsize = os.fstat(fileno).st_size + except OSError as err: + raise giveup_exc_type(err) # not a regular file + if not fsize: + return 0 # empty file + # Truncate to 1GiB to avoid OverflowError, see bpo-38319. + blocksize = min(count or fsize, 2 ** 30) + timeout = self.gettimeout() + if timeout == 0: + raise ValueError("non-blocking sockets are not supported") + # poll/select have the advantage of not requiring any + # extra file descriptor, contrarily to epoll/kqueue + # (also, they require a single syscall). + if hasattr(selectors, 'PollSelector'): + selector = selectors.PollSelector() + else: + selector = selectors.SelectSelector() + selector.register(sockno, selectors.EVENT_WRITE) - self._check_sendfile_params(file, offset, count) - sockno = self.fileno() - try: - fileno = file.fileno() - except (AttributeError, io.UnsupportedOperation) as err: - raise _GiveupOnSendfile(err) # not a regular file - try: - fsize = os.fstat(fileno).st_size - except OSError as err: - raise _GiveupOnSendfile(err) # not a regular file - if not fsize: - return 0 # empty file - # Truncate to 1GiB to avoid OverflowError, see bpo-38319. - blocksize = min(count or fsize, 2 ** 30) - timeout = self.gettimeout() - if timeout == 0: - raise ValueError("non-blocking sockets are not supported") - # poll/select have the advantage of not requiring any - # extra file descriptor, contrarily to epoll/kqueue - # (also, they require a single syscall). - if hasattr(selectors, 'PollSelector'): - selector = selectors.PollSelector() - else: - selector = selectors.SelectSelector() - selector.register(sockno, selectors.EVENT_WRITE) - - total_sent = 0 - # localize variable access to minimize overhead - selector_select = selector.select - os_sendfile = os.sendfile - try: - while True: - if timeout and not selector_select(timeout): - raise TimeoutError('timed out') - if count: - blocksize = min(count - total_sent, blocksize) - if blocksize <= 0: - break - try: - sent = os_sendfile(sockno, fileno, offset, blocksize) - except BlockingIOError: - if not timeout: - # Block until the socket is ready to send some - # data; avoids hogging CPU resources. - selector_select() - continue - except OSError as err: - if total_sent == 0: - # We can get here for different reasons, the main - # one being 'file' is not a regular mmap(2)-like - # file, in which case we'll fall back on using - # plain send(). - raise _GiveupOnSendfile(err) - raise err from None - else: - if sent == 0: - break # EOF - offset += sent - total_sent += sent - return total_sent - finally: - if total_sent > 0 and hasattr(file, 'seek'): - file.seek(offset) + total_sent = 0 + # localize variable access to minimize overhead + selector_select = selector.select + try: + while True: + if timeout and not selector_select(timeout): + raise TimeoutError('timed out') + if count: + blocksize = min(count - total_sent, blocksize) + if blocksize <= 0: + break + try: + sent = zerocopy_func(fileno, offset, blocksize) + except BlockingIOError: + if not timeout: + # Block until the socket is ready to send some + # data; avoids hogging CPU resources. + selector_select() + continue + except OSError as err: + if total_sent == 0: + # We can get here for different reasons, the main + # one being 'file' is not a regular mmap(2)-like + # file, in which case we'll fall back on using + # plain send(). + raise giveup_exc_type(err) + raise err from None + else: + if sent == 0: + break # EOF + offset += sent + total_sent += sent + return total_sent + finally: + if total_sent > 0 and hasattr(file, 'seek'): + file.seek(offset) + + if hasattr(os, 'sendfile'): + def _sendfile_use_sendfile(self, file, offset=0, count=None): + return self._sendfile_zerocopy( + partial(os.sendfile, self.fileno()), + _GiveupOnSendfile, + file, offset, count, + ) else: def _sendfile_use_sendfile(self, file, offset=0, count=None): raise _GiveupOnSendfile( diff --git a/Lib/ssl.py b/Lib/ssl.py index 7e3c4cbd6bbf8e..86fb8990636692 100644 --- a/Lib/ssl.py +++ b/Lib/ssl.py @@ -975,6 +975,10 @@ def _sslcopydoc(func): return func +class _GiveupOnSSLSendfile(Exception): + pass + + class SSLSocket(socket): """This class implements a subtype of socket.socket that wraps the underlying OS socket in an SSL context when necessary, and @@ -1266,15 +1270,26 @@ def sendall(self, data, flags=0): return super().sendall(data, flags) def sendfile(self, file, offset=0, count=None): - """Send a file, possibly by using os.sendfile() if this is a - clear-text socket. Return the total number of bytes sent. + """Send a file, possibly by using an efficient sendfile() call if + the system supports it. Return the total number of bytes sent. """ - if self._sslobj is not None: - return self._sendfile_use_send(file, offset, count) - else: - # os.sendfile() works with plain sockets only + if self._sslobj is None: return super().sendfile(file, offset, count) + if not self._sslobj.uses_ktls_for_send(): + return self._sendfile_use_send(file, offset, count) + + sendfile = getattr(self._sslobj, "sendfile", None) + if sendfile is None: + return self._sendfile_use_send(file, offset, count) + + try: + return self._sendfile_zerocopy( + sendfile, _GiveupOnSSLSendfile, file, offset, count, + ) + except _GiveupOnSSLSendfile: + return self._sendfile_use_send(file, offset, count) + def recv(self, buflen=1024, flags=0): self._checkClosed() if self._sslobj is not None: diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index f123f6ece40669..9e519537ca5ed3 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -4316,19 +4316,30 @@ def test_read_write_after_close_raises_valuerror(self): self.assertRaises(ValueError, s.write, b'hello') def test_sendfile(self): + """Try to send a file using kTLS if possible.""" TEST_DATA = b"x" * 512 with open(os_helper.TESTFN, 'wb') as f: f.write(TEST_DATA) self.addCleanup(os_helper.unlink, os_helper.TESTFN) client_context, server_context, hostname = testing_context() + client_context.options |= getattr(ssl, 'OP_ENABLE_KTLS', 0) server = ThreadedEchoServer(context=server_context, chatty=False) - with server: - with client_context.wrap_socket(socket.socket(), - server_hostname=hostname) as s: - s.connect((HOST, server.port)) + # kTLS seems to work only with a connection created before + # wrapping `sock` by the SSL context in contrast to calling + # `sock.connect()` after the wrapping. + with server, socket.create_connection((HOST, server.port)) as sock: + with client_context.wrap_socket( + sock, server_hostname=hostname + ) as ssock: + if support.verbose: + ktls_used = ssock._sslobj.uses_ktls_for_send() + print( + 'kTLS is', + 'available' if ktls_used else 'unavailable', + ) with open(os_helper.TESTFN, 'rb') as file: - s.sendfile(file) - self.assertEqual(s.recv(1024), TEST_DATA) + ssock.sendfile(file) + self.assertEqual(ssock.recv(1024), TEST_DATA) def test_session(self): client_context, server_context, hostname = testing_context() diff --git a/Misc/NEWS.d/next/Library/2023-03-13-22-51-40.gh-issue-99813.40TV02.rst b/Misc/NEWS.d/next/Library/2023-03-13-22-51-40.gh-issue-99813.40TV02.rst new file mode 100644 index 00000000000000..c511c63021442b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-13-22-51-40.gh-issue-99813.40TV02.rst @@ -0,0 +1,4 @@ +:mod:`ssl` now uses ``SSL_sendfile`` internally when it is possible (see +:data:`~ssl.OP_ENABLE_KTLS`). The function sends a file more efficiently +because it performs TLS encryption in the kernel to avoid additional context +switches. Patch by Illia Volochii. diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 014e624f6c2f00..24c243e330d4bf 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -75,6 +75,33 @@ #endif +#ifdef BIO_get_ktls_send +# ifdef MS_WINDOWS +typedef long long Py_off_t; +# else +typedef off_t Py_off_t; +# endif + +static int +Py_off_t_converter(PyObject *arg, void *addr) +{ +#ifdef HAVE_LARGEFILE_SUPPORT + *((Py_off_t *)addr) = PyLong_AsLongLong(arg); +#else + *((Py_off_t *)addr) = PyLong_AsLong(arg); +#endif + return PyErr_Occurred() ? 0 : 1; +} + +/*[python input] + +class Py_off_t_converter(CConverter): + type = 'Py_off_t' + converter = 'Py_off_t_converter' + +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=3fd9ca8ca6f0cbb8]*/ +#endif /* BIO_get_ktls_send */ struct py_ssl_error_code { const char *mnemonic; @@ -2442,6 +2469,184 @@ PySSL_select(PySocketSockObject *s, int writing, PyTime_t timeout) return rc == 0 ? SOCKET_HAS_TIMED_OUT : SOCKET_OPERATION_OK; } +/*[clinic input] +@critical_section +_ssl._SSLSocket.uses_ktls_for_send + +Check if the Kernel TLS data-path is used for sending. +[clinic start generated code]*/ + +static PyObject * +_ssl__SSLSocket_uses_ktls_for_send_impl(PySSLSocket *self) +/*[clinic end generated code: output=f9d95fbefceb5068 input=8d1ce4a131190e6b]*/ +{ +#ifdef BIO_get_ktls_send + int uses = BIO_get_ktls_send(SSL_get_wbio(self->ssl)); + // BIO_get_ktls_send() returns 1 if kTLS is used and 0 if not. + // Also, it returns -1 for failure before OpenSSL 3.0.4. + return Py_NewRef(uses == 1 ? Py_True : Py_False); +#else + Py_RETURN_FALSE; +#endif +} + +/*[clinic input] +@critical_section +_ssl._SSLSocket.uses_ktls_for_recv + +Check if the Kernel TLS data-path is used for receiving. +[clinic start generated code]*/ + +static PyObject * +_ssl__SSLSocket_uses_ktls_for_recv_impl(PySSLSocket *self) +/*[clinic end generated code: output=ce38b00317a1f681 input=a13778a924fc7d44]*/ +{ +#ifdef BIO_get_ktls_recv + int uses = BIO_get_ktls_recv(SSL_get_rbio(self->ssl)); + // BIO_get_ktls_recv() returns 1 if kTLS is used and 0 if not. + // Also, it returns -1 for failure before OpenSSL 3.0.4. + return Py_NewRef(uses == 1 ? Py_True : Py_False); +#else + Py_RETURN_FALSE; +#endif +} + +#ifdef BIO_get_ktls_send +/*[clinic input] +@critical_section +_ssl._SSLSocket.sendfile + fd: int + offset: Py_off_t + size: size_t + flags: int = 0 + / + +Write size bytes from offset in the file descriptor fd to the SSL connection. + +This method uses the zero-copy technique and returns the number of bytes +written. It should be called only when Kernel TLS is used for sending data in +the connection. + +The meaning of flags is platform dependent. +[clinic start generated code]*/ + +static PyObject * +_ssl__SSLSocket_sendfile_impl(PySSLSocket *self, int fd, Py_off_t offset, + size_t size, int flags) +/*[clinic end generated code: output=0c6815b0719ca8d5 input=dfc1b162bb020de1]*/ +{ + Py_ssize_t retval; + int sockstate; + _PySSLError err; + PySocketSockObject *sock = GET_SOCKET(self); + PyTime_t timeout, deadline = 0; + int has_timeout; + + if (sock != NULL) { + if ((PyObject *)sock == Py_None) { + _setSSLError(get_state_sock(self), + "Underlying socket connection gone", + PY_SSL_ERROR_NO_SOCKET, __FILE__, __LINE__); + return NULL; + } + Py_INCREF(sock); + /* just in case the blocking state of the socket has been changed */ + int nonblocking = (sock->sock_timeout >= 0); + BIO_set_nbio(SSL_get_rbio(self->ssl), nonblocking); + BIO_set_nbio(SSL_get_wbio(self->ssl), nonblocking); + } + + timeout = GET_SOCKET_TIMEOUT(sock); + has_timeout = (timeout > 0); + if (has_timeout) { + deadline = _PyDeadline_Init(timeout); + } + + sockstate = PySSL_select(sock, 1, timeout); + switch (sockstate) { + case SOCKET_HAS_TIMED_OUT: + PyErr_SetString(PyExc_TimeoutError, + "The write operation timed out"); + goto error; + case SOCKET_HAS_BEEN_CLOSED: + PyErr_SetString(get_state_sock(self)->PySSLErrorObject, + "Underlying socket has been closed."); + goto error; + case SOCKET_TOO_LARGE_FOR_SELECT: + PyErr_SetString(get_state_sock(self)->PySSLErrorObject, + "Underlying socket too large for select()."); + goto error; + } + + do { + PySSL_BEGIN_ALLOW_THREADS + retval = SSL_sendfile(self->ssl, fd, (off_t)offset, size, flags); + err = _PySSL_errno(retval < 0, self->ssl, (int)retval); + PySSL_END_ALLOW_THREADS + self->err = err; + + if (PyErr_CheckSignals()) { + goto error; + } + + if (has_timeout) { + timeout = _PyDeadline_Get(deadline); + } + + switch (err.ssl) { + case SSL_ERROR_WANT_READ: + sockstate = PySSL_select(sock, 0, timeout); + break; + case SSL_ERROR_WANT_WRITE: + sockstate = PySSL_select(sock, 1, timeout); + break; + default: + sockstate = SOCKET_OPERATION_OK; + break; + } + + if (sockstate == SOCKET_HAS_TIMED_OUT) { + PyErr_SetString(PyExc_TimeoutError, + "The sendfile operation timed out"); + goto error; + } + else if (sockstate == SOCKET_HAS_BEEN_CLOSED) { + PyErr_SetString(get_state_sock(self)->PySSLErrorObject, + "Underlying socket has been closed."); + goto error; + } + else if (sockstate == SOCKET_IS_NONBLOCKING) { + break; + } + } while (err.ssl == SSL_ERROR_WANT_READ + || err.ssl == SSL_ERROR_WANT_WRITE); + + if (err.ssl == SSL_ERROR_SSL + && ERR_GET_REASON(ERR_peek_error()) == SSL_R_UNINITIALIZED) + { + /* OpenSSL fails to return SSL_ERROR_SYSCALL if an error + * happens in sendfile(), and returns SSL_ERROR_SSL with + * SSL_R_UNINITIALIZED reason instead. */ + _setSSLError(get_state_sock(self), + "Some I/O error occurred in sendfile()", + PY_SSL_ERROR_SYSCALL, __FILE__, __LINE__); + goto error; + } + Py_XDECREF(sock); + if (retval < 0) { + return PySSL_SetError(self, __FILE__, __LINE__); + } + if (PySSL_ChainExceptions(self) < 0) { + return NULL; + } + return PyLong_FromSize_t(retval); +error: + Py_XDECREF(sock); + (void)PySSL_ChainExceptions(self); + return NULL; +} +#endif /* BIO_get_ktls_send */ + /*[clinic input] @critical_section _ssl._SSLSocket.write @@ -3017,6 +3222,9 @@ static PyGetSetDef ssl_getsetlist[] = { static PyMethodDef PySSLMethods[] = { _SSL__SSLSOCKET_DO_HANDSHAKE_METHODDEF + _SSL__SSLSOCKET_USES_KTLS_FOR_SEND_METHODDEF + _SSL__SSLSOCKET_USES_KTLS_FOR_RECV_METHODDEF + _SSL__SSLSOCKET_SENDFILE_METHODDEF _SSL__SSLSOCKET_WRITE_METHODDEF _SSL__SSLSOCKET_READ_METHODDEF _SSL__SSLSOCKET_PENDING_METHODDEF diff --git a/Modules/clinic/_ssl.c.h b/Modules/clinic/_ssl.c.h index c6e2abd4d93474..7027d87379283d 100644 --- a/Modules/clinic/_ssl.c.h +++ b/Modules/clinic/_ssl.c.h @@ -7,6 +7,7 @@ preserve # include "pycore_runtime.h" // _Py_ID() #endif #include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_long.h" // _PyLong_Size_t_Converter() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_ssl__SSLSocket_do_handshake__doc__, @@ -442,6 +443,115 @@ _ssl__SSLSocket_owner_set(PyObject *self, PyObject *value, void *Py_UNUSED(conte return return_value; } +PyDoc_STRVAR(_ssl__SSLSocket_uses_ktls_for_send__doc__, +"uses_ktls_for_send($self, /)\n" +"--\n" +"\n" +"Check if the Kernel TLS data-path is used for sending."); + +#define _SSL__SSLSOCKET_USES_KTLS_FOR_SEND_METHODDEF \ + {"uses_ktls_for_send", (PyCFunction)_ssl__SSLSocket_uses_ktls_for_send, METH_NOARGS, _ssl__SSLSocket_uses_ktls_for_send__doc__}, + +static PyObject * +_ssl__SSLSocket_uses_ktls_for_send_impl(PySSLSocket *self); + +static PyObject * +_ssl__SSLSocket_uses_ktls_for_send(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_uses_ktls_for_send_impl((PySSLSocket *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_ssl__SSLSocket_uses_ktls_for_recv__doc__, +"uses_ktls_for_recv($self, /)\n" +"--\n" +"\n" +"Check if the Kernel TLS data-path is used for receiving."); + +#define _SSL__SSLSOCKET_USES_KTLS_FOR_RECV_METHODDEF \ + {"uses_ktls_for_recv", (PyCFunction)_ssl__SSLSocket_uses_ktls_for_recv, METH_NOARGS, _ssl__SSLSocket_uses_ktls_for_recv__doc__}, + +static PyObject * +_ssl__SSLSocket_uses_ktls_for_recv_impl(PySSLSocket *self); + +static PyObject * +_ssl__SSLSocket_uses_ktls_for_recv(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_uses_ktls_for_recv_impl((PySSLSocket *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(BIO_get_ktls_send) + +PyDoc_STRVAR(_ssl__SSLSocket_sendfile__doc__, +"sendfile($self, fd, offset, size, flags=0, /)\n" +"--\n" +"\n" +"Write size bytes from offset in the file descriptor fd to the SSL connection.\n" +"\n" +"This method uses the zero-copy technique and returns the number of bytes\n" +"written. It should be called only when Kernel TLS is used for sending data in\n" +"the connection.\n" +"\n" +"The meaning of flags is platform dependent."); + +#define _SSL__SSLSOCKET_SENDFILE_METHODDEF \ + {"sendfile", _PyCFunction_CAST(_ssl__SSLSocket_sendfile), METH_FASTCALL, _ssl__SSLSocket_sendfile__doc__}, + +static PyObject * +_ssl__SSLSocket_sendfile_impl(PySSLSocket *self, int fd, Py_off_t offset, + size_t size, int flags); + +static PyObject * +_ssl__SSLSocket_sendfile(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + int fd; + Py_off_t offset; + size_t size; + int flags = 0; + + if (!_PyArg_CheckPositional("sendfile", nargs, 3, 4)) { + goto exit; + } + fd = PyLong_AsInt(args[0]); + if (fd == -1 && PyErr_Occurred()) { + goto exit; + } + if (!Py_off_t_converter(args[1], &offset)) { + goto exit; + } + if (!_PyLong_Size_t_Converter(args[2], &size)) { + goto exit; + } + if (nargs < 4) { + goto skip_optional; + } + flags = PyLong_AsInt(args[3]); + if (flags == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_sendfile_impl((PySSLSocket *)self, fd, offset, size, flags); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +#endif /* defined(BIO_get_ktls_send) */ + PyDoc_STRVAR(_ssl__SSLSocket_write__doc__, "write($self, b, /)\n" "--\n" @@ -2893,6 +3003,10 @@ _ssl_enum_crls(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje #endif /* defined(_MSC_VER) */ +#ifndef _SSL__SSLSOCKET_SENDFILE_METHODDEF + #define _SSL__SSLSOCKET_SENDFILE_METHODDEF +#endif /* !defined(_SSL__SSLSOCKET_SENDFILE_METHODDEF) */ + #ifndef _SSL_ENUM_CERTIFICATES_METHODDEF #define _SSL_ENUM_CERTIFICATES_METHODDEF #endif /* !defined(_SSL_ENUM_CERTIFICATES_METHODDEF) */ @@ -2900,4 +3014,4 @@ _ssl_enum_crls(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje #ifndef _SSL_ENUM_CRLS_METHODDEF #define _SSL_ENUM_CRLS_METHODDEF #endif /* !defined(_SSL_ENUM_CRLS_METHODDEF) */ -/*[clinic end generated code: output=748650909fec8906 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1adc3780d8ca682a input=a9049054013a1b77]*/ From 25335d297b5248922a4c82183bcdf0c0ada8352b Mon Sep 17 00:00:00 2001 From: Kliment Lamonov Date: Sat, 12 Jul 2025 16:30:09 +0300 Subject: [PATCH 866/989] gh-134759: fix `UnboundLocalError` in `email.message.Message.get_payload` (#136071) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/email/message.py | 2 ++ Lib/test/test_email/test_message.py | 9 +++++++++ Misc/ACKS | 1 + .../2025-06-28-11-32-57.gh-issue-134759.AjjKcG.rst | 2 ++ 4 files changed, 14 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-06-28-11-32-57.gh-issue-134759.AjjKcG.rst diff --git a/Lib/email/message.py b/Lib/email/message.py index 41fcc2b9778798..36e4b4a9f0b773 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -313,6 +313,8 @@ def get_payload(self, i=None, decode=False): # If it does happen, turn the string into bytes in a way # guaranteed not to fail. bpayload = payload.encode('raw-unicode-escape') + else: + bpayload = payload if cte == 'quoted-printable': return quopri.decodestring(bpayload) elif cte == 'base64': diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 23c39775a8b2e5..b4128f70f18412 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -1055,6 +1055,15 @@ def test_get_body_malformed(self): # AttributeError: 'str' object has no attribute 'is_attachment' m.get_body() + def test_get_bytes_payload_with_quoted_printable_encoding(self): + # We use a memoryview to avoid directly changing the private payload + # and to prevent using the dedicated paths for string or bytes objects. + payload = memoryview(b'Some payload') + m = self._make_message() + m.add_header('Content-Transfer-Encoding', 'quoted-printable') + m.set_payload(payload) + self.assertEqual(m.get_payload(decode=True), payload) + class TestMIMEPart(TestEmailMessageBase, TestEmailBase): # Doing the full test run here may seem a bit redundant, since the two diff --git a/Misc/ACKS b/Misc/ACKS index d1490e1e46ccfd..817b21259774df 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1055,6 +1055,7 @@ Alexander Lakeev David Lam Thomas Lamb Valerie Lambert +Kliment Lamonov Peter Lamut Jean-Baptiste "Jiba" Lamy Ronan Lamy diff --git a/Misc/NEWS.d/next/Library/2025-06-28-11-32-57.gh-issue-134759.AjjKcG.rst b/Misc/NEWS.d/next/Library/2025-06-28-11-32-57.gh-issue-134759.AjjKcG.rst new file mode 100644 index 00000000000000..79b85320926954 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-28-11-32-57.gh-issue-134759.AjjKcG.rst @@ -0,0 +1,2 @@ +Fix :exc:`UnboundLocalError` in :func:`email.message.Message.get_payload` when +the payload to decode is a :class:`bytes` object. Patch by Kliment Lamonov. From 5e1e21dee35b8e9066692d08033bbbdb562e2c28 Mon Sep 17 00:00:00 2001 From: Bast <52266665+bast0006@users.noreply.github.com> Date: Sat, 12 Jul 2025 06:37:52 -0700 Subject: [PATCH 867/989] gh-91153: prevent a crash in `bytearray.__setitem__(ind, ...)` when `ind.__index__` has side-effects (#132379) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_bytes.py | 35 +++++++++++++++++++ ...5-05-17-20-56-05.gh-issue-91153.afgtG2.rst | 1 + Objects/bytearrayobject.c | 8 +++-- 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-17-20-56-05.gh-issue-91153.afgtG2.rst diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index bb0f8aa99da910..2591e7ca6ab0ec 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -1899,6 +1899,8 @@ def test_repeat_after_setslice(self): self.assertEqual(b3, b'xcxcxc') def test_mutating_index(self): + # bytearray slice assignment can call into python code + # that reallocates the internal buffer # See gh-91153 class Boom: @@ -1916,6 +1918,39 @@ def __index__(self): with self.assertRaises(IndexError): self._testlimitedcapi.sequence_setitem(b, 0, Boom()) + def test_mutating_index_inbounds(self): + # gh-91153 continued + # Ensure buffer is not broken even if length is correct + + class MutatesOnIndex: + def __init__(self): + self.ba = bytearray(0x180) + + def __index__(self): + self.ba.clear() + self.new_ba = bytearray(0x180) # to catch out-of-bounds writes + self.ba.extend([0] * 0x180) # to check bounds checks + return 0 + + with self.subTest("skip_bounds_safety"): + instance = MutatesOnIndex() + instance.ba[instance] = ord("?") + self.assertEqual(instance.ba[0], ord("?"), "Assigned bytearray not altered") + self.assertEqual(instance.new_ba, bytearray(0x180), "Wrong object altered") + + with self.subTest("skip_bounds_safety_capi"): + instance = MutatesOnIndex() + instance.ba[instance] = ord("?") + self._testlimitedcapi.sequence_setitem(instance.ba, instance, ord("?")) + self.assertEqual(instance.ba[0], ord("?"), "Assigned bytearray not altered") + self.assertEqual(instance.new_ba, bytearray(0x180), "Wrong object altered") + + with self.subTest("skip_bounds_safety_slice"): + instance = MutatesOnIndex() + instance.ba[instance:1] = [ord("?")] + self.assertEqual(instance.ba[0], ord("?"), "Assigned bytearray not altered") + self.assertEqual(instance.new_ba, bytearray(0x180), "Wrong object altered") + class AssortedBytesTest(unittest.TestCase): # diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-17-20-56-05.gh-issue-91153.afgtG2.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-17-20-56-05.gh-issue-91153.afgtG2.rst new file mode 100644 index 00000000000000..dc2f1e22ba5b05 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-17-20-56-05.gh-issue-91153.afgtG2.rst @@ -0,0 +1 @@ +Fix a crash when a :class:`bytearray` is concurrently mutated during item assignment. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index b5d5ca9178ebdb..bf30c06af5d8fa 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -709,7 +709,9 @@ bytearray_ass_subscript_lock_held(PyObject *op, PyObject *index, PyObject *value _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); PyByteArrayObject *self = _PyByteArray_CAST(op); Py_ssize_t start, stop, step, slicelen; - char *buf = PyByteArray_AS_STRING(self); + // Do not store a reference to the internal buffer since + // index.__index__() or _getbytevalue() may alter 'self'. + // See https://github.com/python/cpython/issues/91153. if (_PyIndex_Check(index)) { Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError); @@ -744,7 +746,7 @@ bytearray_ass_subscript_lock_held(PyObject *op, PyObject *index, PyObject *value } else { assert(0 <= ival && ival < 256); - buf[i] = (char)ival; + PyByteArray_AS_STRING(self)[i] = (char)ival; return 0; } } @@ -805,6 +807,7 @@ bytearray_ass_subscript_lock_held(PyObject *op, PyObject *index, PyObject *value /* Delete slice */ size_t cur; Py_ssize_t i; + char *buf = PyByteArray_AS_STRING(self); if (!_canresize(self)) return -1; @@ -845,6 +848,7 @@ bytearray_ass_subscript_lock_held(PyObject *op, PyObject *index, PyObject *value /* Assign slice */ Py_ssize_t i; size_t cur; + char *buf = PyByteArray_AS_STRING(self); if (needed != slicelen) { PyErr_Format(PyExc_ValueError, From be2c3d284ecce67474a260b8c37e2f1e0628a9cf Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Jul 2025 18:54:26 +0300 Subject: [PATCH 868/989] gh-136549: Fix signature of threading.excepthook() (GH-136559) --- Lib/test/test_inspect/test_inspect.py | 1 + .../next/Library/2025-07-11-23-04-39.gh-issue-136549.oAi8u4.rst | 1 + Modules/_threadmodule.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-11-23-04-39.gh-issue-136549.oAi8u4.rst diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index 0ea029b977b3fc..4f3983d83c7c06 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -5916,6 +5916,7 @@ def test_sysconfig_module_has_signatures(self): def test_threading_module_has_signatures(self): import threading self._test_module_has_signatures(threading) + self.assertIsNotNone(inspect.signature(threading.__excepthook__)) def test_thread_module_has_signatures(self): import _thread diff --git a/Misc/NEWS.d/next/Library/2025-07-11-23-04-39.gh-issue-136549.oAi8u4.rst b/Misc/NEWS.d/next/Library/2025-07-11-23-04-39.gh-issue-136549.oAi8u4.rst new file mode 100644 index 00000000000000..f3050ad5d5aa10 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-11-23-04-39.gh-issue-136549.oAi8u4.rst @@ -0,0 +1 @@ +Fix signature of :func:`threading.excepthook`. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 8886a9d6bd0c8d..3540fead8e8e74 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -2330,7 +2330,7 @@ thread_excepthook(PyObject *module, PyObject *args) } PyDoc_STRVAR(excepthook_doc, -"_excepthook($module, (exc_type, exc_value, exc_traceback, thread), /)\n\ +"_excepthook($module, args, /)\n\ --\n\ \n\ Handle uncaught Thread.run() exception."); From 9be3649f5eccfbda1b3c9c3195927951a9ae9b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 12 Jul 2025 18:33:07 +0200 Subject: [PATCH 869/989] gh-136591: avoid using deprecated features for OpenSSL 3.0+ (#136592) Since OpenSSL 3.0, `ERR_func_error_string()` always returns NULL and `EVP_MD_CTX_get0_md()` should be preferred over `EVP_MD_CTX_md()`. --- ...-07-12-18-05-37.gh-issue-136591.ujXmSN.rst | 3 +++ Modules/_hashopenssl.c | 21 +++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-12-18-05-37.gh-issue-136591.ujXmSN.rst diff --git a/Misc/NEWS.d/next/Library/2025-07-12-18-05-37.gh-issue-136591.ujXmSN.rst b/Misc/NEWS.d/next/Library/2025-07-12-18-05-37.gh-issue-136591.ujXmSN.rst new file mode 100644 index 00000000000000..ccd5bf11f0467a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-12-18-05-37.gh-issue-136591.ujXmSN.rst @@ -0,0 +1,3 @@ +:mod:`!_hashlib`: avoid using deprecated functions +:manpage:`ERR_func_error_string` and :manpage:`EVP_MD_CTX_md` when using +OpenSSL 3.0 and later. Patch by Bénédikt Tran. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 90a7391ebb09af..1a6c831e48377b 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -64,11 +64,15 @@ #define PY_EVP_MD_fetch(algorithm, properties) EVP_MD_fetch(NULL, algorithm, properties) #define PY_EVP_MD_up_ref(md) EVP_MD_up_ref(md) #define PY_EVP_MD_free(md) EVP_MD_free(md) + +#define PY_EVP_MD_CTX_md(CTX) EVP_MD_CTX_get0_md(CTX) #else #define PY_EVP_MD const EVP_MD #define PY_EVP_MD_fetch(algorithm, properties) EVP_get_digestbyname(algorithm) #define PY_EVP_MD_up_ref(md) do {} while(0) #define PY_EVP_MD_free(md) do {} while(0) + +#define PY_EVP_MD_CTX_md(CTX) EVP_MD_CTX_md(CTX) #endif /* hash alias map and fast lookup @@ -308,6 +312,14 @@ class _hashlib.HMAC "HMACobject *" "&PyType_Type" /* LCOV_EXCL_START */ +/* Thin wrapper around ERR_reason_error_string() returning non-NULL text. */ +static const char * +py_wrapper_ERR_reason_error_string(unsigned long errcode) +{ + const char *reason = ERR_reason_error_string(errcode); + return reason ? reason : "no reason"; +} + /* Set an exception of given type using the given OpenSSL error code. */ static void set_ssl_exception_from_errcode(PyObject *exc_type, unsigned long errcode) @@ -317,8 +329,13 @@ set_ssl_exception_from_errcode(PyObject *exc_type, unsigned long errcode) /* ERR_ERROR_STRING(3) ensures that the messages below are ASCII */ const char *lib = ERR_lib_error_string(errcode); +#ifdef Py_HAS_OPENSSL3_SUPPORT + // Since OpenSSL 3.0, ERR_func_error_string() always returns NULL. + const char *func = NULL; +#else const char *func = ERR_func_error_string(errcode); - const char *reason = ERR_reason_error_string(errcode); +#endif + const char *reason = py_wrapper_ERR_reason_error_string(errcode); if (lib && func) { PyErr_Format(exc_type, "[%s: %s] %s", lib, func, reason); @@ -838,7 +855,7 @@ static PyObject * _hashlib_HASH_get_name(PyObject *op, void *Py_UNUSED(closure)) { HASHobject *self = HASHobject_CAST(op); - const EVP_MD *md = EVP_MD_CTX_md(self->ctx); + const EVP_MD *md = PY_EVP_MD_CTX_md(self->ctx); if (md == NULL) { notify_ssl_error_occurred("missing EVP_MD for HASH context"); return NULL; From 47b01da4ccedd9c00fad4325b3e87d7732abeb6d Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Sun, 13 Jul 2025 02:15:04 +0800 Subject: [PATCH 870/989] gh-101100: Fix sphinx warnings in Doc/library/platform.rst (GH-136562) --- Doc/library/platform.rst | 4 ++-- Doc/tools/.nitignore | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Doc/library/platform.rst b/Doc/library/platform.rst index 06de152a742f28..37df13f8a1eb8c 100644 --- a/Doc/library/platform.rst +++ b/Doc/library/platform.rst @@ -176,8 +176,8 @@ Cross platform :attr:`processor` is resolved late, on demand. Note: the first two attribute names differ from the names presented by - :func:`os.uname`, where they are named :attr:`sysname` and - :attr:`nodename`. + :func:`os.uname`, where they are named :attr:`!sysname` and + :attr:`!nodename`. Entries which cannot be determined are set to ``''``. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 1fbb45ecd73801..510225afab8933 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -26,7 +26,6 @@ Doc/library/multiprocessing.rst Doc/library/optparse.rst Doc/library/os.rst Doc/library/pickletools.rst -Doc/library/platform.rst Doc/library/profile.rst Doc/library/pyexpat.rst Doc/library/resource.rst From 42b251bcebd749eceeb62389e413a3be37cff343 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sun, 13 Jul 2025 07:35:57 +0200 Subject: [PATCH 871/989] gh-134939: Correct `concurrent.interpreters` source code link (#136564) --- Doc/library/concurrent.interpreters.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/concurrent.interpreters.rst b/Doc/library/concurrent.interpreters.rst index 524d505bcf144f..be9d565f8e0d38 100644 --- a/Doc/library/concurrent.interpreters.rst +++ b/Doc/library/concurrent.interpreters.rst @@ -9,7 +9,7 @@ .. versionadded:: 3.14 -**Source code:** :source:`Lib/concurrent/interpreters.py` +**Source code:** :source:`Lib/concurrent/interpreters` -------------- From 171de05b4884d1353044417ea51a4efcb55ba633 Mon Sep 17 00:00:00 2001 From: Sachin Shah <39803835+inventshah@users.noreply.github.com> Date: Sun, 13 Jul 2025 01:49:12 -0400 Subject: [PATCH 872/989] gh-136523: Fix wave.Wave_write emitting an unraisable when open raises (GH-136529) --- Lib/test/test_wave.py | 9 +++++++++ Lib/wave.py | 2 ++ .../2025-07-11-03-39-15.gh-issue-136523.s7caKL.rst | 1 + 3 files changed, 12 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-11-03-39-15.gh-issue-136523.s7caKL.rst diff --git a/Lib/test/test_wave.py b/Lib/test/test_wave.py index 6c3362857fc2ba..226b1aa84bd73c 100644 --- a/Lib/test/test_wave.py +++ b/Lib/test/test_wave.py @@ -2,6 +2,7 @@ from test import audiotests from test import support import io +import os import struct import sys import wave @@ -196,6 +197,14 @@ def test_read_wrong_sample_width(self): with self.assertRaisesRegex(wave.Error, 'bad sample width'): wave.open(io.BytesIO(b)) + def test_open_in_write_raises(self): + # gh-136523: Wave_write.__del__ should not throw + with support.catch_unraisable_exception() as cm: + with self.assertRaises(OSError): + wave.open(os.curdir, "wb") + support.gc_collect() + self.assertIsNone(cm.unraisable) + if __name__ == '__main__': unittest.main() diff --git a/Lib/wave.py b/Lib/wave.py index 929609fa52409d..5af745e2217ec3 100644 --- a/Lib/wave.py +++ b/Lib/wave.py @@ -427,6 +427,8 @@ class Wave_write: _datawritten -- the size of the audio samples actually written """ + _file = None + def __init__(self, f): self._i_opened_the_file = None if isinstance(f, str): diff --git a/Misc/NEWS.d/next/Library/2025-07-11-03-39-15.gh-issue-136523.s7caKL.rst b/Misc/NEWS.d/next/Library/2025-07-11-03-39-15.gh-issue-136523.s7caKL.rst new file mode 100644 index 00000000000000..71ec66a37ef4c3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-11-03-39-15.gh-issue-136523.s7caKL.rst @@ -0,0 +1 @@ +Fix :class:`wave.Wave_write` emitting an unraisable when open raises. From 46707d241a9eef4713450fa3ccbe490e48c8b722 Mon Sep 17 00:00:00 2001 From: Ajay Kamdar <140011370+ogbiggles@users.noreply.github.com> Date: Sun, 13 Jul 2025 01:49:44 -0400 Subject: [PATCH 873/989] gh-132969: update ACKS file (gh-133222) (#136144) --- Misc/ACKS | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/ACKS b/Misc/ACKS index 817b21259774df..3814509aea030a 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -932,6 +932,7 @@ Bob Kahn Kurt B. Kaiser Tamito Kajiyama Jan Kaliszewski +Ajay Kamdar Peter van Kampen Jan Kanis Rafe Kaplan From 609d5adc7cc241da8fe314a64ddd2c8a883ee8b7 Mon Sep 17 00:00:00 2001 From: Yongzi Li <204532581+Yzi-Li@users.noreply.github.com> Date: Sun, 13 Jul 2025 13:56:31 +0800 Subject: [PATCH 874/989] gh-134833: improve docs for `del s[i:j]` in `Mutable Sequence Types` (#134834) --- Doc/library/stdtypes.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 394c302fd354b9..8e688f03eb3a87 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1223,7 +1223,9 @@ accepts integers that meet the value restriction ``0 <= x <= 255``). | | is replaced by the contents of | | | | the iterable *t* | | +------------------------------+--------------------------------+---------------------+ -| ``del s[i:j]`` | same as ``s[i:j] = []`` | | +| ``del s[i:j]`` | removes the elements of | | +| | ``s[i:j]`` from the list | | +| | (same as ``s[i:j] = []``) | | +------------------------------+--------------------------------+---------------------+ | ``s[i:j:k] = t`` | the elements of ``s[i:j:k]`` | \(1) | | | are replaced by those of *t* | | From 0d4fd10fbab2767fad3eb27639905c8885b88c89 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Sun, 13 Jul 2025 02:46:13 -0400 Subject: [PATCH 875/989] Docs: Fix and improve the `PyUnstable_Object_EnableDeferredRefcount` documentation (GH-135323) --- Doc/c-api/object.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 21fa1491b33d86..55f0d0f9fb7ff8 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -611,12 +611,13 @@ Object Protocol if supported by the runtime. In the :term:`free-threaded ` build, this allows the interpreter to avoid reference count adjustments to *obj*, which may improve multi-threaded performance. The tradeoff is - that *obj* will only be deallocated by the tracing garbage collector. + that *obj* will only be deallocated by the tracing garbage collector, and + not when the interpreter no longer has any references to it. - This function returns ``1`` if deferred reference counting is enabled on *obj* - (including when it was enabled before the call), + This function returns ``1`` if deferred reference counting is enabled on *obj*, and ``0`` if deferred reference counting is not supported or if the hint was - ignored by the runtime. This function is thread-safe, and cannot fail. + ignored by the interpreter, such as when deferred reference counting is already + enabled on *obj*. This function is thread-safe, and cannot fail. This function does nothing on builds with the :term:`GIL` enabled, which do not support deferred reference counting. This also does nothing if *obj* is not @@ -624,7 +625,8 @@ Object Protocol :c:func:`PyObject_GC_IsTracked`). This function is intended to be used soon after *obj* is created, - by the code that creates it. + by the code that creates it, such as in the object's :c:member:`~PyTypeObject.tp_new` + slot. .. versionadded:: 3.14 From 9e5cebd56d06e35faeca166813215d72f2f8906a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 10:58:15 +0200 Subject: [PATCH 876/989] gh-136547: allow to temporarily disable hash algorithms in tests (#136570) --- Lib/test/support/hashlib_helper.py | 318 +++++++++++++++++++++++++++-- Lib/test/test_support.py | 156 ++++++++++++++ 2 files changed, 461 insertions(+), 13 deletions(-) diff --git a/Lib/test/support/hashlib_helper.py b/Lib/test/support/hashlib_helper.py index 7032257b06877a..337a1e415b0de3 100644 --- a/Lib/test/support/hashlib_helper.py +++ b/Lib/test/support/hashlib_helper.py @@ -1,8 +1,13 @@ +import contextlib import functools import hashlib import importlib +import inspect import unittest +import unittest.mock +from collections import namedtuple from test.support.import_helper import import_module +from types import MappingProxyType try: import _hashlib @@ -15,6 +20,93 @@ _hmac = None +CANONICAL_DIGEST_NAMES = frozenset(( + 'md5', 'sha1', + 'sha224', 'sha256', 'sha384', 'sha512', + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + 'shake_128', 'shake_256', + 'blake2s', 'blake2b', +)) + +NON_HMAC_DIGEST_NAMES = frozenset({ + 'shake_128', 'shake_256', + 'blake2s', 'blake2b', +}) + + +class HashAPI(namedtuple("HashAPI", "builtin openssl hashlib")): + + def fullname(self, typ): + match typ: + case "builtin": + return self.builtin + case "openssl": + return f"_hashlib.{self.openssl}" if self.openssl else None + case "hashlib": + return f"hashlib.{self.hashlib}" if self.hashlib else None + case _: + raise AssertionError(f"unknown type: {typ}") + + +# Mapping from a "canonical" name to a pair (HACL*, _hashlib.*, hashlib.*) +# constructors. If the constructor name is None, then this means that the +# algorithm can only be used by the "agile" new() interfaces. +_EXPLICIT_CONSTRUCTORS = MappingProxyType({ + "md5": HashAPI("_md5.md5", "openssl_md5", "md5"), + "sha1": HashAPI("_sha1.sha1", "openssl_sha1", "sha1"), + "sha224": HashAPI("_sha2.sha224", "openssl_sha224", "sha224"), + "sha256": HashAPI("_sha2.sha256", "openssl_sha256", "sha256"), + "sha384": HashAPI("_sha2.sha384", "openssl_sha384", "sha384"), + "sha512": HashAPI("_sha2.sha512", "openssl_sha512", "sha512"), + "sha3_224": HashAPI("_sha3.sha3_224", "openssl_sha3_224", "sha3_224"), + "sha3_256": HashAPI("_sha3.sha3_256", "openssl_sha3_256", "sha3_256"), + "sha3_384": HashAPI("_sha3.sha3_384", "openssl_sha3_384", "sha3_384"), + "sha3_512": HashAPI("_sha3.sha3_512", "openssl_sha3_512", "sha3_512"), + "shake_128": HashAPI("_sha3.shake_128", "openssl_shake_128", "shake_128"), + "shake_256": HashAPI("_sha3.shake_256", "openssl_shake_256", "shake_256"), + "blake2s": HashAPI("_blake2.blake2s", None, "blake2s"), + "blake2b": HashAPI("_blake2.blake2b", None, "blake2b"), +}) +assert _EXPLICIT_CONSTRUCTORS.keys() == CANONICAL_DIGEST_NAMES + +_EXPLICIT_HMAC_CONSTRUCTORS = { + name: f'_hmac.compute_{name}' for name in ( + 'md5', 'sha1', + 'sha224', 'sha256', 'sha384', 'sha512', + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + ) +} +_EXPLICIT_HMAC_CONSTRUCTORS['shake_128'] = None +_EXPLICIT_HMAC_CONSTRUCTORS['shake_256'] = None +# Strictly speaking, HMAC-BLAKE is meaningless as BLAKE2 is already a +# keyed hash function. However, as it's exposed by HACL*, we test it. +_EXPLICIT_HMAC_CONSTRUCTORS['blake2s'] = '_hmac.compute_blake2s_32' +_EXPLICIT_HMAC_CONSTRUCTORS['blake2b'] = '_hmac.compute_blake2b_32' +_EXPLICIT_HMAC_CONSTRUCTORS = MappingProxyType(_EXPLICIT_HMAC_CONSTRUCTORS) +assert _EXPLICIT_HMAC_CONSTRUCTORS.keys() == CANONICAL_DIGEST_NAMES + + +def _ensure_wrapper_signature(wrapper, wrapped): + """Ensure that a wrapper has the same signature as the wrapped function. + + This is used to guarantee that a TypeError raised due to a bad API call + is raised consistently (using variadic signatures would hide such errors). + """ + try: + wrapped_sig = inspect.signature(wrapped) + except ValueError: # built-in signature cannot be found + return + + wrapper_sig = inspect.signature(wrapper) + if wrapped_sig != wrapper_sig: + fullname = f"{wrapped.__module__}.{wrapped.__qualname__}" + raise AssertionError( + f"signature for {fullname}() is incorrect:\n" + f" expect: {wrapped_sig}\n" + f" actual: {wrapper_sig}" + ) + + def requires_hashlib(): return unittest.skipIf(_hashlib is None, "requires _hashlib") @@ -30,6 +122,7 @@ def _missing_hash(digestname, implementation=None, *, exc=None): def _openssl_availabillity(digestname, *, usedforsecurity): + assert isinstance(digestname, str), digestname try: _hashlib.new(digestname, usedforsecurity=usedforsecurity) except AttributeError: @@ -74,6 +167,7 @@ def requires_hashdigest(digestname, openssl=None, usedforsecurity=True): ValueError: [digital envelope routines: EVP_DigestInit_ex] disabled for FIPS ValueError: unsupported hash type md4 """ + assert isinstance(digestname, str), digestname if openssl and _hashlib is not None: def test_availability(): _hashlib.new(digestname, usedforsecurity=usedforsecurity) @@ -101,6 +195,7 @@ def requires_openssl_hashdigest(digestname, *, usedforsecurity=True): The hashing algorithm may be missing or blocked by a strict crypto policy. """ + assert isinstance(digestname, str), digestname def decorator_func(func): @requires_hashlib() # avoid checking at each call @functools.wraps(func) @@ -131,6 +226,7 @@ def requires_builtin_hashdigest( - The *module_name* is the C extension module name based on HACL*. - The *digestname* is one of its member, e.g., 'md5'. """ + assert isinstance(digestname, str), digestname def decorator_func(func): @functools.wraps(func) def wrapper(*args, **kwargs): @@ -156,6 +252,7 @@ def find_builtin_hashdigest_constructor( - The *module_name* is the C extension module name based on HACL*. - The *digestname* is one of its member, e.g., 'md5'. """ + assert isinstance(digestname, str), digestname module = import_module(module_name) try: constructor = getattr(module, digestname) @@ -178,7 +275,7 @@ class HashFunctionsTrait: implementation of HMAC). """ - ALGORITHMS = [ + DIGEST_NAMES = [ 'md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', @@ -187,10 +284,18 @@ class HashFunctionsTrait: # Default 'usedforsecurity' to use when looking up a hash function. usedforsecurity = True - def _find_constructor(self, name): + @classmethod + def setUpClass(cls): + super().setUpClass() + assert CANONICAL_DIGEST_NAMES.issuperset(cls.DIGEST_NAMES) + + def is_valid_digest_name(self, digestname): + self.assertIn(digestname, self.DIGEST_NAMES) + + def _find_constructor(self, digestname): # By default, a missing algorithm skips the test that uses it. - self.assertIn(name, self.ALGORITHMS) - self.skipTest(f"missing hash function: {name}") + self.is_valid_digest_name(digestname) + self.skipTest(f"missing hash function: {digestname}") @property def md5(self): @@ -239,9 +344,9 @@ class NamedHashFunctionsTrait(HashFunctionsTrait): Hash functions are available if and only if they are available in hashlib. """ - def _find_constructor(self, name): - self.assertIn(name, self.ALGORITHMS) - return name + def _find_constructor(self, digestname): + self.is_valid_digest_name(digestname) + return digestname class OpenSSLHashFunctionsTrait(HashFunctionsTrait): @@ -250,10 +355,10 @@ class OpenSSLHashFunctionsTrait(HashFunctionsTrait): Hash functions are available if and only if they are available in _hashlib. """ - def _find_constructor(self, name): - self.assertIn(name, self.ALGORITHMS) + def _find_constructor(self, digestname): + self.is_valid_digest_name(digestname) return find_openssl_hashdigest_constructor( - name, usedforsecurity=self.usedforsecurity + digestname, usedforsecurity=self.usedforsecurity ) @@ -265,9 +370,9 @@ class BuiltinHashFunctionsTrait(HashFunctionsTrait): is not since the former is unconditionally built. """ - def _find_constructor_in(self, module, name): - self.assertIn(name, self.ALGORITHMS) - return find_builtin_hashdigest_constructor(module, name) + def _find_constructor_in(self, module, digestname): + self.is_valid_digest_name(digestname) + return find_builtin_hashdigest_constructor(module, digestname) @property def md5(self): @@ -327,3 +432,190 @@ def find_gil_minsize(modules_names, default=2048): continue sizes.append(getattr(module, '_GIL_MINSIZE', default)) return max(sizes, default=default) + + +def _block_openssl_hash_new(blocked_name): + """Block OpenSSL implementation of _hashlib.new().""" + assert isinstance(blocked_name, str), blocked_name + if _hashlib is None: + return contextlib.nullcontext() + @functools.wraps(wrapped := _hashlib.new) + def wrapper(name, data=b'', *, usedforsecurity=True, string=None): + if name == blocked_name: + raise _hashlib.UnsupportedDigestmodError(blocked_name) + return wrapped(*args, **kwargs) + _ensure_wrapper_signature(wrapper, wrapped) + return unittest.mock.patch('_hashlib.new', wrapper) + + +def _block_openssl_hmac_new(blocked_name): + """Block OpenSSL HMAC-HASH implementation.""" + assert isinstance(blocked_name, str), blocked_name + if _hashlib is None: + return contextlib.nullcontext() + @functools.wraps(wrapped := _hashlib.hmac_new) + def wrapper(key, msg=b'', digestmod=None): + if digestmod == blocked_name: + raise _hashlib.UnsupportedDigestmodError(blocked_name) + return wrapped(key, msg, digestmod) + _ensure_wrapper_signature(wrapper, wrapped) + return unittest.mock.patch('_hashlib.hmac_new', wrapper) + + +def _block_openssl_hmac_digest(blocked_name): + """Block OpenSSL HMAC-HASH one-shot digest implementation.""" + assert isinstance(blocked_name, str), blocked_name + if _hashlib is None: + return contextlib.nullcontext() + @functools.wraps(wrapped := _hashlib.hmac_digest) + def wrapper(key, msg, digest): + if digest == blocked_name: + raise _hashlib.UnsupportedDigestmodError(blocked_name) + return wrapped(key, msg, digestmod) + _ensure_wrapper_signature(wrapper, wrapped) + return unittest.mock.patch('_hashlib.hmac_digest', wrapper) + + +@contextlib.contextmanager +def _block_builtin_hash_new(name): + assert isinstance(name, str), name + assert name.lower() == name, f"invalid name: {name}" + + builtin_cache = getattr(hashlib, '__builtin_constructor_cache') + if name in builtin_cache: + f = builtin_cache.pop(name) + F = builtin_cache.pop(name.upper(), None) + else: + f = F = None + try: + yield + finally: + if f is not None: + builtin_cache[name] = f + if F is not None: + builtin_cache[name.upper()] = F + + +def _block_builtin_hmac_new(blocked_name): + assert isinstance(blocked_name, str), blocked_name + if _hmac is None: + return contextlib.nullcontext() + @functools.wraps(wrapped := _hmac.new) + def wrapper(key, msg=None, digestmod=None): + if digestmod == blocked_name: + raise _hmac.UnknownHashError(blocked_name) + return wrapped(key, msg, digestmod) + _ensure_wrapper_signature(wrapper, wrapped) + return unittest.mock.patch('_hmac.new', wrapper) + + +def _block_builtin_hmac_digest(blocked_name): + assert isinstance(blocked_name, str), blocked_name + if _hmac is None: + return contextlib.nullcontext() + @functools.wraps(wrapped := _hmac.compute_digest) + def wrapper(key, msg, digest): + if digest == blocked_name: + raise _hmac.UnknownHashError(blocked_name) + return wrapped(key, msg, digest) + _ensure_wrapper_signature(wrapper, wrapped) + return unittest.mock.patch('_hmac.compute_digest', wrapper) + + +def _make_hash_constructor_blocker(name, dummy, *, interface): + assert isinstance(name, str), name + assert interface in ('builtin', 'openssl', 'hashlib') + assert name in _EXPLICIT_CONSTRUCTORS, f"invalid hash: {name}" + fullname = _EXPLICIT_CONSTRUCTORS[name].fullname(interface) + if fullname is None: + # function shouldn't exist for this implementation + return contextlib.nullcontext() + assert fullname.count('.') == 1, fullname + module_name, method = fullname.split('.', maxsplit=1) + try: + module = importlib.import_module(module_name) + except ImportError: + # module is already disabled + return contextlib.nullcontext() + wrapped = getattr(module, method) + wrapper = functools.wraps(wrapped)(dummy) + _ensure_wrapper_signature(wrapper, wrapped) + return unittest.mock.patch(fullname, wrapper) + + +def _block_hashlib_hash_constructor(name): + """Block explicit public constructors.""" + assert isinstance(name, str), name + def dummy(data=b'', *, usedforsecurity=True, string=None): + raise ValueError(f"unsupported hash name: {name}") + return _make_hash_constructor_blocker(name, dummy, interface='hashlib') + + +def _block_openssl_hash_constructor(name): + """Block explicit OpenSSL constructors.""" + assert isinstance(name, str), name + def dummy(data=b'', *, usedforsecurity=True, string=None): + raise ValueError(f"unsupported hash name: {name}") + return _make_hash_constructor_blocker(name, dummy, interface='openssl') + + +def _block_builtin_hash_constructor(name): + """Block explicit HACL* constructors.""" + assert isinstance(name, str), name + def dummy(data=b'', *, usedforsecurity=True, string=b''): + raise ValueError(f"unsupported hash name: {name}") + return _make_hash_constructor_blocker(name, dummy, interface='builtin') + + +def _block_builtin_hmac_constructor(name): + """Block explicit HACL* HMAC constructors.""" + assert isinstance(name, str), name + assert name in _EXPLICIT_HMAC_CONSTRUCTORS, f"invalid hash: {name}" + fullname = _EXPLICIT_HMAC_CONSTRUCTORS[name] + if fullname is None: + # function shouldn't exist for this implementation + return contextlib.nullcontext() + assert fullname.count('.') == 1, fullname + module_name, method = fullname.split('.', maxsplit=1) + assert module_name == '_hmac', module_name + try: + module = importlib.import_module(module_name) + except ImportError: + # module is already disabled + return contextlib.nullcontext() + @functools.wraps(wrapped := getattr(module, method)) + def wrapper(key, obj): + raise ValueError(f"unsupported hash name: {name}") + _ensure_wrapper_signature(wrapper, wrapped) + return unittest.mock.patch(fullname, wrapper) + + +@contextlib.contextmanager +def block_algorithm(name, *, allow_openssl=False, allow_builtin=False): + """Block a hash algorithm for both hashing and HMAC. + + Be careful with this helper as a function may be allowed, but can + still raise a ValueError at runtime if the OpenSSL security policy + disables it, e.g., if allow_openssl=True and FIPS mode is on. + """ + with contextlib.ExitStack() as stack: + if not (allow_openssl or allow_builtin): + # If one of the private interface is allowed, then the + # public interface will fallback to it even though the + # comment in hashlib.py says otherwise. + # + # So we should only block it if the private interfaces + # are blocked as well. + stack.enter_context(_block_hashlib_hash_constructor(name)) + if not allow_openssl: + stack.enter_context(_block_openssl_hash_new(name)) + stack.enter_context(_block_openssl_hmac_new(name)) + stack.enter_context(_block_openssl_hmac_digest(name)) + stack.enter_context(_block_openssl_hash_constructor(name)) + if not allow_builtin: + stack.enter_context(_block_builtin_hash_new(name)) + stack.enter_context(_block_builtin_hmac_new(name)) + stack.enter_context(_block_builtin_hmac_digest(name)) + stack.enter_context(_block_builtin_hash_constructor(name)) + stack.enter_context(_block_builtin_hmac_constructor(name)) + yield diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py index e48a2464ee5977..cb31122fee9642 100644 --- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -1,6 +1,7 @@ import contextlib import errno import importlib +import itertools import io import logging import os @@ -17,6 +18,7 @@ import warnings from test import support +from test.support import hashlib_helper from test.support import import_helper from test.support import os_helper from test.support import script_helper @@ -818,5 +820,159 @@ def test_linked_to_musl(self): # SuppressCrashReport +class TestHashlibSupport(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.hashlib = import_helper.import_module("hashlib") + cls.hmac = import_helper.import_module("hmac") + + # We required the extension modules to be present since blocking + # HACL* implementations while allowing OpenSSL ones would still + # result in failures. + cls._hashlib = import_helper.import_module("_hashlib") + cls._hmac = import_helper.import_module("_hmac") + + def check_context(self, disabled=True): + if disabled: + return self.assertRaises(ValueError) + return contextlib.nullcontext() + + def try_import_attribute(self, fullname, default=None): + if fullname is None: + return default + assert fullname.count('.') == 1, fullname + module_name, attribute = fullname.split('.', maxsplit=1) + try: + module = importlib.import_module(module_name) + except ImportError: + return default + try: + return getattr(module, attribute, default) + except TypeError: + return default + + def validate_modules(self): + if hasattr(hashlib_helper, 'hashlib'): + self.assertIs(hashlib_helper.hashlib, self.hashlib) + if hasattr(hashlib_helper, 'hmac'): + self.assertIs(hashlib_helper.hmac, self.hmac) + + def fetch_hash_function(self, name, typ): + entry = hashlib_helper._EXPLICIT_CONSTRUCTORS[name] + match typ: + case "hashlib": + assert entry.hashlib is not None, entry + return getattr(self.hashlib, entry.hashlib) + case "openssl": + try: + return getattr(self._hashlib, entry.openssl, None) + except TypeError: + return None + case "builtin": + return self.try_import_attribute(entry.fullname(typ)) + + def fetch_hmac_function(self, name): + fullname = hashlib_helper._EXPLICIT_HMAC_CONSTRUCTORS[name] + return self.try_import_attribute(fullname) + + def check_openssl_hash(self, name, *, disabled=True): + """Check that OpenSSL HASH interface is enabled/disabled.""" + with self.check_context(disabled): + _ = self._hashlib.new(name) + if do_hash := self.fetch_hash_function(name, "openssl"): + self.assertStartsWith(do_hash.__name__, 'openssl_') + with self.check_context(disabled): + _ = do_hash(b"") + + def check_openssl_hmac(self, name, *, disabled=True): + """Check that OpenSSL HMAC interface is enabled/disabled.""" + if name in hashlib_helper.NON_HMAC_DIGEST_NAMES: + # HMAC-BLAKE and HMAC-SHAKE raise a ValueError as they are not + # supported at all (they do not make any sense in practice). + with self.assertRaises(ValueError): + self._hashlib.hmac_digest(b"", b"", name) + else: + with self.check_context(disabled): + _ = self._hashlib.hmac_digest(b"", b"", name) + # OpenSSL does not provide one-shot explicit HMAC functions + + def check_builtin_hash(self, name, *, disabled=True): + """Check that HACL* HASH interface is enabled/disabled.""" + if do_hash := self.fetch_hash_function(name, "builtin"): + self.assertEqual(do_hash.__name__, name) + with self.check_context(disabled): + _ = do_hash(b"") + + def check_builtin_hmac(self, name, *, disabled=True): + """Check that HACL* HMAC interface is enabled/disabled.""" + if name in hashlib_helper.NON_HMAC_DIGEST_NAMES: + # HMAC-BLAKE and HMAC-SHAKE raise a ValueError as they are not + # supported at all (they do not make any sense in practice). + with self.assertRaises(ValueError): + self._hmac.compute_digest(b"", b"", name) + else: + with self.check_context(disabled): + _ = self._hmac.compute_digest(b"", b"", name) + + with self.check_context(disabled): + _ = self._hmac.new(b"", b"", name) + + if do_hmac := self.fetch_hmac_function(name): + self.assertStartsWith(do_hmac.__name__, 'compute_') + with self.check_context(disabled): + _ = do_hmac(b"", b"") + else: + self.assertIn(name, hashlib_helper.NON_HMAC_DIGEST_NAMES) + + @support.subTests( + ('name', 'allow_openssl', 'allow_builtin'), + itertools.product( + hashlib_helper.CANONICAL_DIGEST_NAMES, + [True, False], + [True, False], + ) + ) + def test_disable_hash(self, name, allow_openssl, allow_builtin): + # In FIPS mode, the function may be available but would still need + # to raise a ValueError. For simplicity, we don't test the helper + # when we're in FIPS mode. + if self._hashlib.get_fips_mode(): + self.skipTest("hash functions may still be blocked in FIPS mode") + flags = dict(allow_openssl=allow_openssl, allow_builtin=allow_builtin) + is_simple_disabled = not allow_builtin and not allow_openssl + + with hashlib_helper.block_algorithm(name, **flags): + self.validate_modules() + + # OpenSSL's blake2s and blake2b are unknown names + # when only the OpenSSL interface is available. + if allow_openssl and not allow_builtin: + aliases = {'blake2s': 'blake2s256', 'blake2b': 'blake2b512'} + name_for_hashlib_new = aliases.get(name, name) + else: + name_for_hashlib_new = name + + with self.check_context(is_simple_disabled): + _ = self.hashlib.new(name_for_hashlib_new) + with self.check_context(is_simple_disabled): + _ = getattr(self.hashlib, name)(b"") + + self.check_openssl_hash(name, disabled=not allow_openssl) + self.check_builtin_hash(name, disabled=not allow_builtin) + + if name not in hashlib_helper.NON_HMAC_DIGEST_NAMES: + with self.check_context(is_simple_disabled): + _ = self.hmac.new(b"", b"", name) + with self.check_context(is_simple_disabled): + _ = self.hmac.HMAC(b"", b"", name) + with self.check_context(is_simple_disabled): + _ = self.hmac.digest(b"", b"", name) + + self.check_openssl_hmac(name, disabled=not allow_openssl) + self.check_builtin_hmac(name, disabled=not allow_builtin) + + if __name__ == '__main__': unittest.main() From 3dbe02ccd3eefc48ac9fa14427bb4cdb82d1ebae Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Sun, 13 Jul 2025 05:10:37 -0400 Subject: [PATCH 877/989] gh-132346: Docs: Clarify that reference counts aren't stable between versions (GH-132352) --- Doc/glossary.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 705b0a9279c6d4..199a917f9f101e 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -1208,6 +1208,11 @@ Glossary :func:`sys.getrefcount` function to return the reference count for a particular object. + In :term:`CPython`, reference counts are not considered to be stable + or well-defined values; the number of references to an object, and how + that number is affected by Python code, may be different between + versions. + regular package A traditional :term:`package`, such as a directory containing an ``__init__.py`` file. From e18829a8adb3a64ffffffbd7dcada3c3611522b0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 13 Jul 2025 12:44:54 +0300 Subject: [PATCH 878/989] gh-132629: Deprecate accepting out-of-range values for unsigned integers in PyArg_Parse (GH-132630) For unsigned integer formats in the PyArg_Parse* functions, accepting Python integers with value that is larger than the maximal value the corresponding C type or less than the minimal value for the corresponding signed integer type is now deprecated. --- Doc/c-api/arg.rst | 27 +++-- Doc/whatsnew/3.15.rst | 6 +- Lib/test/clinic.test.c | 95 ++++++++++++--- Lib/test/test_capi/test_getargs.py | 70 ++++++++--- Lib/test/test_clinic.py | 55 ++++++--- ...-04-17-12-37-27.gh-issue-132629.01ArwX.rst | 4 + Modules/clinic/_cursesmodule.c.h | 36 +++++- Modules/clinic/_testclinic.c.h | 87 +++++++++++--- Modules/clinic/_zoneinfo.c.h | 17 ++- Modules/clinic/binascii.c.h | 38 ++++-- Modules/clinic/fcntlmodule.c.h | 19 ++- Modules/clinic/posixmodule.c.h | 54 ++++++++- Modules/clinic/selectmodule.c.h | 38 ++++-- Modules/clinic/signalmodule.c.h | 19 ++- Modules/clinic/zlibmodule.c.h | 110 +++++++++++++++--- Modules/fcntlmodule.c | 4 +- PC/clinic/msvcrtmodule.c.h | 20 +++- Python/getargs.c | 83 +++++++++---- Tools/clinic/libclinic/converters.py | 103 +++++++--------- 19 files changed, 674 insertions(+), 211 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-04-17-12-37-27.gh-issue-132629.01ArwX.rst diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index ab9f9c4539ae9a..803572afc39ae3 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -241,9 +241,11 @@ the Python object to the required type. For signed integer formats, :exc:`OverflowError` is raised if the value is out of range for the C type. -For unsigned integer formats, no range checking is done --- the +For unsigned integer formats, the most significant bits are silently truncated when the receiving field is too -small to receive the value. +small to receive the value, and :exc:`DeprecationWarning` is emitted when +the value is larger than the maximal value for the C type or less than +the minimal value for the corresponding signed integer type of the same size. ``b`` (:class:`int`) [unsigned char] Convert a nonnegative Python integer to an unsigned tiny integer, stored in a C @@ -252,27 +254,25 @@ small to receive the value. ``B`` (:class:`int`) [unsigned char] Convert a Python integer to a tiny integer without overflow checking, stored in a C :c:expr:`unsigned char`. + Convert a Python integer to a C :c:expr:`unsigned char`. ``h`` (:class:`int`) [short int] Convert a Python integer to a C :c:expr:`short int`. ``H`` (:class:`int`) [unsigned short int] - Convert a Python integer to a C :c:expr:`unsigned short int`, without overflow - checking. + Convert a Python integer to a C :c:expr:`unsigned short int`. ``i`` (:class:`int`) [int] Convert a Python integer to a plain C :c:expr:`int`. ``I`` (:class:`int`) [unsigned int] - Convert a Python integer to a C :c:expr:`unsigned int`, without overflow - checking. + Convert a Python integer to a C :c:expr:`unsigned int`. ``l`` (:class:`int`) [long int] Convert a Python integer to a C :c:expr:`long int`. ``k`` (:class:`int`) [unsigned long] - Convert a Python integer to a C :c:expr:`unsigned long` without - overflow checking. + Convert a Python integer to a C :c:expr:`unsigned long`. .. versionchanged:: 3.14 Use :meth:`~object.__index__` if available. @@ -281,8 +281,7 @@ small to receive the value. Convert a Python integer to a C :c:expr:`long long`. ``K`` (:class:`int`) [unsigned long long] - Convert a Python integer to a C :c:expr:`unsigned long long` - without overflow checking. + Convert a Python integer to a C :c:expr:`unsigned long long`. .. versionchanged:: 3.14 Use :meth:`~object.__index__` if available. @@ -310,6 +309,14 @@ small to receive the value. ``D`` (:class:`complex`) [Py_complex] Convert a Python complex number to a C :c:type:`Py_complex` structure. +.. deprecated:: next + + For unsigned integer formats ``B``, ``H``, ``I``, ``k`` and ``K``, + :exc:`DeprecationWarning` is emitted when the value is larger than + the maximal value for the C type or less than the minimal value for + the corresponding signed integer type of the same size. + + Other objects ------------- diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 010abb7d9b9278..2f713fbb888c30 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -462,7 +462,11 @@ Porting to Python 3.15 Deprecated C APIs ----------------- -* TODO +* For unsigned integer formats in :c:func:`PyArg_ParseTuple`, + accepting Python integers with value that is larger than the maximal value + for the C type or less than the minimal value for the corresponding + signed integer type of the same size is now deprecated. + (Contributed by Serhiy Storchaka in :gh:`132629`.) .. Add C API deprecations above alphabetically, not here at the end. diff --git a/Lib/test/clinic.test.c b/Lib/test/clinic.test.c index 4a67fcd2c3e9b3..dc5b4b27a07f99 100644 --- a/Lib/test/clinic.test.c +++ b/Lib/test/clinic.test.c @@ -1020,12 +1020,19 @@ test_unsigned_char_converter(PyObject *module, PyObject *const *args, Py_ssize_t goto skip_optional; } { - unsigned long ival = PyLong_AsUnsignedLongMask(args[2]); - if (ival == (unsigned long)-1 && PyErr_Occurred()) { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned char), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { goto exit; } - else { - c = (unsigned char) ival; + if ((size_t)_bytes > sizeof(unsigned char)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } } } skip_optional: @@ -1038,7 +1045,7 @@ test_unsigned_char_converter(PyObject *module, PyObject *const *args, Py_ssize_t static PyObject * test_unsigned_char_converter_impl(PyObject *module, unsigned char a, unsigned char b, unsigned char c) -/*[clinic end generated code: output=45920dbedc22eb55 input=021414060993e289]*/ +/*[clinic end generated code: output=49eda9faaf53372a input=021414060993e289]*/ /*[clinic input] @@ -1151,9 +1158,21 @@ test_unsigned_short_converter(PyObject *module, PyObject *const *args, Py_ssize_ if (nargs < 3) { goto skip_optional; } - c = (unsigned short)PyLong_AsUnsignedLongMask(args[2]); - if (c == (unsigned short)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned short), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned short)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } skip_optional: return_value = test_unsigned_short_converter_impl(module, a, b, c); @@ -1165,7 +1184,7 @@ test_unsigned_short_converter(PyObject *module, PyObject *const *args, Py_ssize_ static PyObject * test_unsigned_short_converter_impl(PyObject *module, unsigned short a, unsigned short b, unsigned short c) -/*[clinic end generated code: output=e6e990df729114fc input=cdfd8eff3d9176b4]*/ +/*[clinic end generated code: output=f591c7797e150f49 input=cdfd8eff3d9176b4]*/ /*[clinic input] @@ -1298,9 +1317,21 @@ test_unsigned_int_converter(PyObject *module, PyObject *const *args, Py_ssize_t if (nargs < 3) { goto skip_optional; } - c = (unsigned int)PyLong_AsUnsignedLongMask(args[2]); - if (c == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } skip_optional: return_value = test_unsigned_int_converter_impl(module, a, b, c); @@ -1312,7 +1343,7 @@ test_unsigned_int_converter(PyObject *module, PyObject *const *args, Py_ssize_t static PyObject * test_unsigned_int_converter_impl(PyObject *module, unsigned int a, unsigned int b, unsigned int c) -/*[clinic end generated code: output=f9cdbe410ccc98a3 input=5533534828b62fc0]*/ +/*[clinic end generated code: output=50a413f1cc82dc11 input=5533534828b62fc0]*/ /*[clinic input] @@ -1414,7 +1445,22 @@ test_unsigned_long_converter(PyObject *module, PyObject *const *args, Py_ssize_t _PyArg_BadArgument("test_unsigned_long_converter", "argument 3", "int", args[2]); goto exit; } - c = PyLong_AsUnsignedLongMask(args[2]); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } skip_optional: return_value = test_unsigned_long_converter_impl(module, a, b, c); @@ -1425,7 +1471,7 @@ test_unsigned_long_converter(PyObject *module, PyObject *const *args, Py_ssize_t static PyObject * test_unsigned_long_converter_impl(PyObject *module, unsigned long a, unsigned long b, unsigned long c) -/*[clinic end generated code: output=d74eed227d77a31b input=f450d94cae1ef73b]*/ +/*[clinic end generated code: output=1bbf5620093cc914 input=f450d94cae1ef73b]*/ /*[clinic input] @@ -1529,7 +1575,22 @@ test_unsigned_long_long_converter(PyObject *module, PyObject *const *args, Py_ss _PyArg_BadArgument("test_unsigned_long_long_converter", "argument 3", "int", args[2]); goto exit; } - c = PyLong_AsUnsignedLongLongMask(args[2]); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned long long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } skip_optional: return_value = test_unsigned_long_long_converter_impl(module, a, b, c); @@ -1542,7 +1603,7 @@ test_unsigned_long_long_converter_impl(PyObject *module, unsigned long long a, unsigned long long b, unsigned long long c) -/*[clinic end generated code: output=5ca4e4dfb3db644b input=a15115dc41866ff4]*/ +/*[clinic end generated code: output=582a6623dc845824 input=a15115dc41866ff4]*/ /*[clinic input] diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 67a8da7599511f..dc2000089684a6 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -48,8 +48,8 @@ LARGE = 0x7FFFFFFF VERY_LARGE = 0xFF0000121212121212121242 -from _testcapi import UCHAR_MAX, USHRT_MAX, UINT_MAX, ULONG_MAX, INT_MAX, \ - INT_MIN, LONG_MIN, LONG_MAX, PY_SSIZE_T_MIN, PY_SSIZE_T_MAX, \ +from _testcapi import UCHAR_MAX, USHRT_MAX, UINT_MAX, ULONG_MAX, ULLONG_MAX, INT_MAX, \ + INT_MIN, LONG_MIN, LONG_MAX, LLONG_MIN, LLONG_MAX, PY_SSIZE_T_MIN, PY_SSIZE_T_MAX, \ SHRT_MIN, SHRT_MAX, FLT_MIN, FLT_MAX, DBL_MIN, DBL_MAX DBL_MAX_EXP = sys.float_info.max_exp @@ -57,9 +57,8 @@ NAN = float('nan') # fake, they are not defined in Python's header files -LLONG_MAX = 2**63-1 -LLONG_MIN = -2**63 -ULLONG_MAX = 2**64-1 +SCHAR_MAX = UCHAR_MAX // 2 +SCHAR_MIN = SCHAR_MAX - UCHAR_MAX NULL = None @@ -209,10 +208,23 @@ def test_B(self): self.assertEqual(UCHAR_MAX, getargs_B(-1)) self.assertEqual(0, getargs_B(0)) self.assertEqual(UCHAR_MAX, getargs_B(UCHAR_MAX)) - self.assertEqual(0, getargs_B(UCHAR_MAX+1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(0, getargs_B(UCHAR_MAX+1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(1, getargs_B(-UCHAR_MAX)) + self.assertEqual(SCHAR_MAX+1, getargs_B(SCHAR_MIN)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(SCHAR_MAX, getargs_B(SCHAR_MIN-1)) + + self.assertEqual(128, getargs_B(-2**7)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(127, getargs_B(-2**7-1)) self.assertEqual(42, getargs_B(42)) - self.assertEqual(UCHAR_MAX & VERY_LARGE, getargs_B(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(UCHAR_MAX & VERY_LARGE, getargs_B(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(UCHAR_MAX & -VERY_LARGE, getargs_B(-VERY_LARGE)) def test_H(self): from _testcapi import getargs_H @@ -233,11 +245,18 @@ def test_H(self): self.assertEqual(USHRT_MAX, getargs_H(-1)) self.assertEqual(0, getargs_H(0)) self.assertEqual(USHRT_MAX, getargs_H(USHRT_MAX)) - self.assertEqual(0, getargs_H(USHRT_MAX+1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(0, getargs_H(USHRT_MAX+1)) + self.assertEqual(SHRT_MAX+1, getargs_H(SHRT_MIN)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(SHRT_MAX, getargs_H(SHRT_MIN-1)) self.assertEqual(42, getargs_H(42)) - self.assertEqual(VERY_LARGE & USHRT_MAX, getargs_H(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(USHRT_MAX & VERY_LARGE, getargs_H(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(USHRT_MAX & -VERY_LARGE, getargs_H(-VERY_LARGE)) def test_I(self): from _testcapi import getargs_I @@ -258,11 +277,18 @@ def test_I(self): self.assertEqual(UINT_MAX, getargs_I(-1)) self.assertEqual(0, getargs_I(0)) self.assertEqual(UINT_MAX, getargs_I(UINT_MAX)) - self.assertEqual(0, getargs_I(UINT_MAX+1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(0, getargs_I(UINT_MAX+1)) + self.assertEqual(INT_MAX+1, getargs_I(INT_MIN)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(INT_MAX, getargs_I(INT_MIN-1)) self.assertEqual(42, getargs_I(42)) - self.assertEqual(VERY_LARGE & UINT_MAX, getargs_I(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(UINT_MAX & VERY_LARGE, getargs_I(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(UINT_MAX & -VERY_LARGE, getargs_I(-VERY_LARGE)) def test_k(self): from _testcapi import getargs_k @@ -283,11 +309,18 @@ def test_k(self): self.assertEqual(ULONG_MAX, getargs_k(-1)) self.assertEqual(0, getargs_k(0)) self.assertEqual(ULONG_MAX, getargs_k(ULONG_MAX)) - self.assertEqual(0, getargs_k(ULONG_MAX+1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(0, getargs_k(ULONG_MAX+1)) + self.assertEqual(LONG_MAX+1, getargs_k(LONG_MIN)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(LONG_MAX, getargs_k(LONG_MIN-1)) self.assertEqual(42, getargs_k(42)) - self.assertEqual(VERY_LARGE & ULONG_MAX, getargs_k(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ULONG_MAX & VERY_LARGE, getargs_k(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ULONG_MAX & -VERY_LARGE, getargs_k(-VERY_LARGE)) class Signed_TestCase(unittest.TestCase): def test_h(self): @@ -434,11 +467,18 @@ def test_K(self): self.assertEqual(ULLONG_MAX, getargs_K(ULLONG_MAX)) self.assertEqual(0, getargs_K(0)) self.assertEqual(ULLONG_MAX, getargs_K(ULLONG_MAX)) - self.assertEqual(0, getargs_K(ULLONG_MAX+1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(0, getargs_K(ULLONG_MAX+1)) + self.assertEqual(LLONG_MAX+1, getargs_K(LLONG_MIN)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(LLONG_MAX, getargs_K(LLONG_MIN-1)) self.assertEqual(42, getargs_K(42)) - self.assertEqual(VERY_LARGE & ULLONG_MAX, getargs_K(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ULLONG_MAX & VERY_LARGE, getargs_K(VERY_LARGE)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ULLONG_MAX & -VERY_LARGE, getargs_K(-VERY_LARGE)) class Float_TestCase(unittest.TestCase, FloatsAreIdenticalMixin): diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index 580d54e0eb094d..a6ed887999242f 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -3048,6 +3048,8 @@ def test_char_converter(self): def test_unsigned_char_converter(self): from _testcapi import UCHAR_MAX + SCHAR_MAX = UCHAR_MAX // 2 + SCHAR_MIN = SCHAR_MAX - UCHAR_MAX with self.assertRaises(OverflowError): ac_tester.unsigned_char_converter(-1) with self.assertRaises(OverflowError): @@ -3057,8 +3059,13 @@ def test_unsigned_char_converter(self): with self.assertRaises(TypeError): ac_tester.unsigned_char_converter([]) self.assertEqual(ac_tester.unsigned_char_converter(), (12, 34, 56)) - self.assertEqual(ac_tester.unsigned_char_converter(0, 0, UCHAR_MAX + 1), (0, 0, 0)) - self.assertEqual(ac_tester.unsigned_char_converter(0, 0, (UCHAR_MAX + 1) * 3 + 123), (0, 0, 123)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_char_converter(0, 0, UCHAR_MAX + 1), (0, 0, 0)) + self.assertEqual(ac_tester.unsigned_char_converter(0, 0, SCHAR_MIN), (0, 0, SCHAR_MAX + 1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_char_converter(0, 0, SCHAR_MIN - 1), (0, 0, SCHAR_MAX)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_char_converter(0, 0, (UCHAR_MAX + 1) * 3 + 123), (0, 0, 123)) def test_short_converter(self): from _testcapi import SHRT_MIN, SHRT_MAX @@ -3072,7 +3079,7 @@ def test_short_converter(self): self.assertEqual(ac_tester.short_converter(4321), (4321,)) def test_unsigned_short_converter(self): - from _testcapi import USHRT_MAX + from _testcapi import SHRT_MIN, SHRT_MAX, USHRT_MAX with self.assertRaises(ValueError): ac_tester.unsigned_short_converter(-1) with self.assertRaises(OverflowError): @@ -3082,8 +3089,13 @@ def test_unsigned_short_converter(self): with self.assertRaises(TypeError): ac_tester.unsigned_short_converter([]) self.assertEqual(ac_tester.unsigned_short_converter(), (12, 34, 56)) - self.assertEqual(ac_tester.unsigned_short_converter(0, 0, USHRT_MAX + 1), (0, 0, 0)) - self.assertEqual(ac_tester.unsigned_short_converter(0, 0, (USHRT_MAX + 1) * 3 + 123), (0, 0, 123)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_short_converter(0, 0, USHRT_MAX + 1), (0, 0, 0)) + self.assertEqual(ac_tester.unsigned_short_converter(0, 0, SHRT_MIN), (0, 0, SHRT_MAX + 1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_short_converter(0, 0, SHRT_MIN - 1), (0, 0, SHRT_MAX)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_short_converter(0, 0, (USHRT_MAX + 1) * 3 + 123), (0, 0, 123)) def test_int_converter(self): from _testcapi import INT_MIN, INT_MAX @@ -3099,7 +3111,7 @@ def test_int_converter(self): self.assertEqual(ac_tester.int_converter(1, 2, '3'), (1, 2, ord('3'))) def test_unsigned_int_converter(self): - from _testcapi import UINT_MAX + from _testcapi import INT_MIN, INT_MAX, UINT_MAX with self.assertRaises(ValueError): ac_tester.unsigned_int_converter(-1) with self.assertRaises(OverflowError): @@ -3109,8 +3121,13 @@ def test_unsigned_int_converter(self): with self.assertRaises(TypeError): ac_tester.unsigned_int_converter([]) self.assertEqual(ac_tester.unsigned_int_converter(), (12, 34, 56)) - self.assertEqual(ac_tester.unsigned_int_converter(0, 0, UINT_MAX + 1), (0, 0, 0)) - self.assertEqual(ac_tester.unsigned_int_converter(0, 0, (UINT_MAX + 1) * 3 + 123), (0, 0, 123)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_int_converter(0, 0, UINT_MAX + 1), (0, 0, 0)) + self.assertEqual(ac_tester.unsigned_int_converter(0, 0, INT_MIN), (0, 0, INT_MAX + 1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_int_converter(0, 0, INT_MIN - 1), (0, 0, INT_MAX)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_int_converter(0, 0, (UINT_MAX + 1) * 3 + 123), (0, 0, 123)) def test_long_converter(self): from _testcapi import LONG_MIN, LONG_MAX @@ -3124,7 +3141,7 @@ def test_long_converter(self): self.assertEqual(ac_tester.long_converter(-1234), (-1234,)) def test_unsigned_long_converter(self): - from _testcapi import ULONG_MAX + from _testcapi import LONG_MIN, LONG_MAX, ULONG_MAX with self.assertRaises(ValueError): ac_tester.unsigned_long_converter(-1) with self.assertRaises(OverflowError): @@ -3134,8 +3151,13 @@ def test_unsigned_long_converter(self): with self.assertRaises(TypeError): ac_tester.unsigned_long_converter([]) self.assertEqual(ac_tester.unsigned_long_converter(), (12, 34, 56)) - self.assertEqual(ac_tester.unsigned_long_converter(0, 0, ULONG_MAX + 1), (0, 0, 0)) - self.assertEqual(ac_tester.unsigned_long_converter(0, 0, (ULONG_MAX + 1) * 3 + 123), (0, 0, 123)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_long_converter(0, 0, ULONG_MAX + 1), (0, 0, 0)) + self.assertEqual(ac_tester.unsigned_long_converter(0, 0, LONG_MIN), (0, 0, LONG_MAX + 1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_long_converter(0, 0, LONG_MIN - 1), (0, 0, LONG_MAX)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_long_converter(0, 0, (ULONG_MAX + 1) * 3 + 123), (0, 0, 123)) def test_long_long_converter(self): from _testcapi import LLONG_MIN, LLONG_MAX @@ -3149,7 +3171,7 @@ def test_long_long_converter(self): self.assertEqual(ac_tester.long_long_converter(-1234), (-1234,)) def test_unsigned_long_long_converter(self): - from _testcapi import ULLONG_MAX + from _testcapi import LLONG_MIN, LLONG_MAX, ULLONG_MAX with self.assertRaises(ValueError): ac_tester.unsigned_long_long_converter(-1) with self.assertRaises(OverflowError): @@ -3159,8 +3181,13 @@ def test_unsigned_long_long_converter(self): with self.assertRaises(TypeError): ac_tester.unsigned_long_long_converter([]) self.assertEqual(ac_tester.unsigned_long_long_converter(), (12, 34, 56)) - self.assertEqual(ac_tester.unsigned_long_long_converter(0, 0, ULLONG_MAX + 1), (0, 0, 0)) - self.assertEqual(ac_tester.unsigned_long_long_converter(0, 0, (ULLONG_MAX + 1) * 3 + 123), (0, 0, 123)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_long_long_converter(0, 0, ULLONG_MAX + 1), (0, 0, 0)) + self.assertEqual(ac_tester.unsigned_long_long_converter(0, 0, LLONG_MIN), (0, 0, LLONG_MAX + 1)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_long_long_converter(0, 0, LLONG_MIN - 1), (0, 0, LLONG_MAX)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ac_tester.unsigned_long_long_converter(0, 0, (ULLONG_MAX + 1) * 3 + 123), (0, 0, 123)) def test_py_ssize_t_converter(self): from _testcapi import PY_SSIZE_T_MIN, PY_SSIZE_T_MAX diff --git a/Misc/NEWS.d/next/C_API/2025-04-17-12-37-27.gh-issue-132629.01ArwX.rst b/Misc/NEWS.d/next/C_API/2025-04-17-12-37-27.gh-issue-132629.01ArwX.rst new file mode 100644 index 00000000000000..38b7a0a493e39f --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-04-17-12-37-27.gh-issue-132629.01ArwX.rst @@ -0,0 +1,4 @@ +For unsigned integer formats in :c:func:`PyArg_ParseTuple`, accepting Python +integers with value that is larger than the maximal value for the C type or +less than the minimal value for the corresponding signed integer type +of the same size is now deprecated. diff --git a/Modules/clinic/_cursesmodule.c.h b/Modules/clinic/_cursesmodule.c.h index 49c864318c8aa8..a8c32d6510604a 100644 --- a/Modules/clinic/_cursesmodule.c.h +++ b/Modules/clinic/_cursesmodule.c.h @@ -2372,7 +2372,22 @@ _curses_ungetmouse(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("ungetmouse", "argument 5", "int", args[4]); goto exit; } - bstate = PyLong_AsUnsignedLongMask(args[4]); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[4], &bstate, sizeof(unsigned long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } return_value = _curses_ungetmouse_impl(module, id, x, y, z, bstate); exit: @@ -3138,7 +3153,22 @@ _curses_mousemask(PyObject *module, PyObject *arg) _PyArg_BadArgument("mousemask", "argument", "int", arg); goto exit; } - newmask = PyLong_AsUnsignedLongMask(arg); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(arg, &newmask, sizeof(unsigned long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } return_value = _curses_mousemask_impl(module, newmask); exit: @@ -4420,4 +4450,4 @@ _curses_has_extended_color_support(PyObject *module, PyObject *Py_UNUSED(ignored #ifndef _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #define _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #endif /* !defined(_CURSES_ASSUME_DEFAULT_COLORS_METHODDEF) */ -/*[clinic end generated code: output=a083473003179b30 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=79ddaae4da3b80df input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h index 970528ce9ea46d..68c92a86226bc9 100644 --- a/Modules/clinic/_testclinic.c.h +++ b/Modules/clinic/_testclinic.c.h @@ -746,12 +746,19 @@ unsigned_char_converter(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional; } { - unsigned long ival = PyLong_AsUnsignedLongMask(args[2]); - if (ival == (unsigned long)-1 && PyErr_Occurred()) { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned char), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { goto exit; } - else { - c = (unsigned char) ival; + if ((size_t)_bytes > sizeof(unsigned char)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } } } skip_optional: @@ -848,9 +855,21 @@ unsigned_short_converter(PyObject *module, PyObject *const *args, Py_ssize_t nar if (nargs < 3) { goto skip_optional; } - c = (unsigned short)PyLong_AsUnsignedLongMask(args[2]); - if (c == (unsigned short)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned short), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned short)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } skip_optional: return_value = unsigned_short_converter_impl(module, a, b, c); @@ -955,9 +974,21 @@ unsigned_int_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs if (nargs < 3) { goto skip_optional; } - c = (unsigned int)PyLong_AsUnsignedLongMask(args[2]); - if (c == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } skip_optional: return_value = unsigned_int_converter_impl(module, a, b, c); @@ -1042,7 +1073,22 @@ unsigned_long_converter(PyObject *module, PyObject *const *args, Py_ssize_t narg _PyArg_BadArgument("unsigned_long_converter", "argument 3", "int", args[2]); goto exit; } - c = PyLong_AsUnsignedLongMask(args[2]); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } skip_optional: return_value = unsigned_long_converter_impl(module, a, b, c); @@ -1126,7 +1172,22 @@ unsigned_long_long_converter(PyObject *module, PyObject *const *args, Py_ssize_t _PyArg_BadArgument("unsigned_long_long_converter", "argument 3", "int", args[2]); goto exit; } - c = PyLong_AsUnsignedLongLongMask(args[2]); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[2], &c, sizeof(unsigned long long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } skip_optional: return_value = unsigned_long_long_converter_impl(module, a, b, c); @@ -4481,4 +4542,4 @@ _testclinic_TestClass_posonly_poskw_varpos_array_no_fastcall(PyObject *type, PyO exit: return return_value; } -/*[clinic end generated code: output=84ffc31f27215baa input=a9049054013a1b77]*/ +/*[clinic end generated code: output=6b04671afdafbecf input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_zoneinfo.c.h b/Modules/clinic/_zoneinfo.c.h index 09ac157cbfd135..19564a6c13f246 100644 --- a/Modules/clinic/_zoneinfo.c.h +++ b/Modules/clinic/_zoneinfo.c.h @@ -434,12 +434,19 @@ zoneinfo_ZoneInfo__unpickle(PyObject *type, PyTypeObject *cls, PyObject *const * } key = args[0]; { - unsigned long ival = PyLong_AsUnsignedLongMask(args[1]); - if (ival == (unsigned long)-1 && PyErr_Occurred()) { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &from_cache, sizeof(unsigned char), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { goto exit; } - else { - from_cache = (unsigned char) ival; + if ((size_t)_bytes > sizeof(unsigned char)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } } } return_value = zoneinfo_ZoneInfo__unpickle_impl((PyTypeObject *)type, cls, key, from_cache); @@ -447,4 +454,4 @@ zoneinfo_ZoneInfo__unpickle(PyObject *type, PyTypeObject *cls, PyObject *const * exit: return return_value; } -/*[clinic end generated code: output=8e9e204f390261b9 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c6df04d7b400bd7f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 602e42a4c1aaa4..ce29e0d11a45cd 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -292,9 +292,21 @@ binascii_crc_hqx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { goto exit; } - crc = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); - if (crc == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &crc, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } return_value = binascii_crc_hqx_impl(module, &data, crc); @@ -336,9 +348,21 @@ binascii_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (nargs < 2) { goto skip_optional; } - crc = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); - if (crc == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &crc, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } skip_optional: _return_value = binascii_crc32_impl(module, &data, crc); @@ -788,4 +812,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=adb855a2797c3cad input=a9049054013a1b77]*/ +/*[clinic end generated code: output=fba6a71e0d7d092f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/fcntlmodule.c.h b/Modules/clinic/fcntlmodule.c.h index 00a929064ba700..005e9b9e12afd9 100644 --- a/Modules/clinic/fcntlmodule.c.h +++ b/Modules/clinic/fcntlmodule.c.h @@ -124,7 +124,22 @@ fcntl_ioctl(PyObject *module, PyObject *const *args, Py_ssize_t nargs) PyErr_Format(PyExc_TypeError, "ioctl() argument 2 must be int, not %T", args[1]); goto exit; } - code = PyLong_AsUnsignedLongMask(args[1]); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &code, sizeof(unsigned long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } if (nargs < 3) { goto skip_optional; } @@ -264,4 +279,4 @@ fcntl_lockf(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=65a16bc64c7b4de4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=bf84289b741e7cf6 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 3621a0625411d3..22f426c5192bf2 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -844,7 +844,22 @@ os_chflags(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * _PyArg_BadArgument("chflags", "argument 'flags'", "int", args[1]); goto exit; } - flags = PyLong_AsUnsignedLongMask(args[1]); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &flags, sizeof(unsigned long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } if (!noptargs) { goto skip_optional_pos; } @@ -928,7 +943,22 @@ os_lchflags(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("lchflags", "argument 'flags'", "int", args[1]); goto exit; } - flags = PyLong_AsUnsignedLongMask(args[1]); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &flags, sizeof(unsigned long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } return_value = os_lchflags_impl(module, &path, flags); exit: @@ -11373,9 +11403,21 @@ os_memfd_create(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj if (!noptargs) { goto skip_optional_pos; } - flags = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); - if (flags == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &flags, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } skip_optional_pos: return_value = os_memfd_create_impl(module, name, flags); @@ -13398,4 +13440,4 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF #define OS__EMSCRIPTEN_DEBUGGER_METHODDEF #endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */ -/*[clinic end generated code: output=ae64df0389746258 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5341daae6581a62b input=a9049054013a1b77]*/ diff --git a/Modules/clinic/selectmodule.c.h b/Modules/clinic/selectmodule.c.h index 253ad8c9e78f00..c0a5f678ad038d 100644 --- a/Modules/clinic/selectmodule.c.h +++ b/Modules/clinic/selectmodule.c.h @@ -783,9 +783,21 @@ select_epoll_register(PyObject *self, PyObject *const *args, Py_ssize_t nargs, P if (!noptargs) { goto skip_optional_pos; } - eventmask = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); - if (eventmask == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &eventmask, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } skip_optional_pos: return_value = select_epoll_register_impl((pyEpoll_Object *)self, fd, eventmask); @@ -860,9 +872,21 @@ select_epoll_modify(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyO if (fd < 0) { goto exit; } - eventmask = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); - if (eventmask == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &eventmask, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } return_value = select_epoll_modify_impl((pyEpoll_Object *)self, fd, eventmask); @@ -1375,4 +1399,4 @@ select_kqueue_control(PyObject *self, PyObject *const *args, Py_ssize_t nargs) #ifndef SELECT_KQUEUE_CONTROL_METHODDEF #define SELECT_KQUEUE_CONTROL_METHODDEF #endif /* !defined(SELECT_KQUEUE_CONTROL_METHODDEF) */ -/*[clinic end generated code: output=6fc20d78802511d1 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2a66dd831f22c696 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/signalmodule.c.h b/Modules/clinic/signalmodule.c.h index 955861b4da37d9..b0cd9e2e561640 100644 --- a/Modules/clinic/signalmodule.c.h +++ b/Modules/clinic/signalmodule.c.h @@ -660,7 +660,22 @@ signal_pthread_kill(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("pthread_kill", "argument 1", "int", args[0]); goto exit; } - thread_id = PyLong_AsUnsignedLongMask(args[0]); + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[0], &thread_id, sizeof(unsigned long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } + } signalnum = PyLong_AsInt(args[1]); if (signalnum == -1 && PyErr_Occurred()) { goto exit; @@ -779,4 +794,4 @@ signal_pidfd_send_signal(PyObject *module, PyObject *const *args, Py_ssize_t nar #ifndef SIGNAL_PIDFD_SEND_SIGNAL_METHODDEF #define SIGNAL_PIDFD_SEND_SIGNAL_METHODDEF #endif /* !defined(SIGNAL_PIDFD_SEND_SIGNAL_METHODDEF) */ -/*[clinic end generated code: output=48bfaffeb25df5d2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=37ae8ebeae4178fa input=a9049054013a1b77]*/ diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 146a7e250019f0..016af258d63dea 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -1028,9 +1028,21 @@ zlib_adler32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (nargs < 2) { goto skip_optional; } - value = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); - if (value == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &value, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } skip_optional: return_value = zlib_adler32_impl(module, &data, value); @@ -1080,13 +1092,37 @@ zlib_adler32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (!_PyArg_CheckPositional("adler32_combine", nargs, 3, 3)) { goto exit; } - adler1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]); - if (adler1 == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[0], &adler1, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } - adler2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); - if (adler2 == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &adler2, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } if (!PyLong_Check(args[2])) { _PyArg_BadArgument("adler32_combine", "argument 3", "int", args[2]); @@ -1137,9 +1173,21 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (nargs < 2) { goto skip_optional; } - value = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); - if (value == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &value, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } skip_optional: _return_value = zlib_crc32_impl(module, &data, value); @@ -1193,13 +1241,37 @@ zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (!_PyArg_CheckPositional("crc32_combine", nargs, 3, 3)) { goto exit; } - crc1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]); - if (crc1 == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[0], &crc1, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } - crc2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); - if (crc2 == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(args[1], &crc2, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } if (!PyLong_Check(args[2])) { _PyArg_BadArgument("crc32_combine", "argument 3", "int", args[2]); @@ -1239,4 +1311,4 @@ zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */ -/*[clinic end generated code: output=3f7692eb3b5d5a0c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=3054c8894aa44568 input=a9049054013a1b77]*/ diff --git a/Modules/fcntlmodule.c b/Modules/fcntlmodule.c index 90363b9dca3316..524eb54b984ca8 100644 --- a/Modules/fcntlmodule.c +++ b/Modules/fcntlmodule.c @@ -1,9 +1,9 @@ /* fcntl module */ -// Need limited C API version 3.13 for PyLong_AsInt() +// Need limited C API version 3.14 for PyLong_AsNativeBytes() in AC code #include "pyconfig.h" // Py_GIL_DISABLED #ifndef Py_GIL_DISABLED -# define Py_LIMITED_API 0x030d0000 +# define Py_LIMITED_API 0x030e0000 #endif #include "Python.h" diff --git a/PC/clinic/msvcrtmodule.c.h b/PC/clinic/msvcrtmodule.c.h index a77d0855af293f..647aadfa46bbed 100644 --- a/PC/clinic/msvcrtmodule.c.h +++ b/PC/clinic/msvcrtmodule.c.h @@ -690,9 +690,21 @@ msvcrt_SetErrorMode(PyObject *module, PyObject *arg) PyObject *return_value = NULL; unsigned int mode; - mode = (unsigned int)PyLong_AsUnsignedLongMask(arg); - if (mode == (unsigned int)-1 && PyErr_Occurred()) { - goto exit; + { + Py_ssize_t _bytes = PyLong_AsNativeBytes(arg, &mode, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) { + goto exit; + } + if ((size_t)_bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + goto exit; + } + } } return_value = msvcrt_SetErrorMode_impl(module, mode); @@ -731,4 +743,4 @@ msvcrt_SetErrorMode(PyObject *module, PyObject *arg) #ifndef MSVCRT_GETERRORMODE_METHODDEF #define MSVCRT_GETERRORMODE_METHODDEF #endif /* !defined(MSVCRT_GETERRORMODE_METHODDEF) */ -/*[clinic end generated code: output=692c6f52bb9193ce input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f67eaf745685429d input=a9049054013a1b77]*/ diff --git a/Python/getargs.c b/Python/getargs.c index 0cf596285cc7b5..02f5c0e37ee42a 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -734,11 +734,20 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, values allowed */ unsigned char *p = va_arg(*p_va, unsigned char *); HANDLE_NULLABLE; - unsigned long ival = PyLong_AsUnsignedLongMask(arg); - if (ival == (unsigned long)-1 && PyErr_Occurred()) + Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned char), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (bytes < 0) { RETURN_ERR_OCCURRED; - else - *p = (unsigned char) ival; + } + if ((size_t)bytes > sizeof(unsigned char)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + RETURN_ERR_OCCURRED; + } + } break; } @@ -767,11 +776,20 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, unsigned allowed */ unsigned short *p = va_arg(*p_va, unsigned short *); HANDLE_NULLABLE; - unsigned long ival = PyLong_AsUnsignedLongMask(arg); - if (ival == (unsigned long)-1 && PyErr_Occurred()) + Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned short), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (bytes < 0) { RETURN_ERR_OCCURRED; - else - *p = (unsigned short) ival; + } + if ((size_t)bytes > sizeof(unsigned short)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + RETURN_ERR_OCCURRED; + } + } break; } @@ -800,11 +818,20 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, unsigned allowed */ unsigned int *p = va_arg(*p_va, unsigned int *); HANDLE_NULLABLE; - unsigned long ival = PyLong_AsUnsignedLongMask(arg); - if (ival == (unsigned long)-1 && PyErr_Occurred()) + Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned int), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (bytes < 0) { RETURN_ERR_OCCURRED; - else - *p = (unsigned int) ival; + } + if ((size_t)bytes > sizeof(unsigned int)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + RETURN_ERR_OCCURRED; + } + } break; } @@ -838,15 +865,23 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'k': { /* long sized bitfield */ unsigned long *p = va_arg(*p_va, unsigned long *); HANDLE_NULLABLE; - unsigned long ival; if (!PyIndex_Check(arg)) { return converterr(nullable, "int", arg, msgbuf, bufsize); } - ival = PyLong_AsUnsignedLongMask(arg); - if (ival == (unsigned long)(long)-1 && PyErr_Occurred()) { + Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (bytes < 0) { RETURN_ERR_OCCURRED; } - *p = ival; + if ((size_t)bytes > sizeof(unsigned long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + RETURN_ERR_OCCURRED; + } + } break; } @@ -864,15 +899,23 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'K': { /* long long sized bitfield */ unsigned long long *p = va_arg(*p_va, unsigned long long *); HANDLE_NULLABLE; - unsigned long long ival; if (!PyIndex_Check(arg)) { return converterr(nullable, "int", arg, msgbuf, bufsize); } - ival = PyLong_AsUnsignedLongLongMask(arg); - if (ival == (unsigned long long)(long long)-1 && PyErr_Occurred()) { + Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned long long), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (bytes < 0) { RETURN_ERR_OCCURRED; } - *p = ival; + if ((size_t)bytes > sizeof(unsigned long long)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + { + RETURN_ERR_OCCURRED; + } + } break; } diff --git a/Tools/clinic/libclinic/converters.py b/Tools/clinic/libclinic/converters.py index 39d0ac557a60f5..6e89e8de7cccf1 100644 --- a/Tools/clinic/libclinic/converters.py +++ b/Tools/clinic/libclinic/converters.py @@ -18,6 +18,7 @@ class BaseUnsignedIntConverter(CConverter): + bitwise = False def use_converter(self) -> None: if self.converter: @@ -25,6 +26,38 @@ def use_converter(self) -> None: f'{self.converter}()') def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: + if self.bitwise: + result = self.format_code(""" + {{{{ + Py_ssize_t _bytes = PyLong_AsNativeBytes({argname}, &{paramname}, sizeof({type}), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) {{{{ + goto exit; + }}}} + if ((size_t)_bytes > sizeof({type})) {{{{ + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "integer value out of range", 1) < 0) + {{{{ + goto exit; + }}}} + }}}} + }}}} + """, + argname=argname, + type=self.type, + bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi)) + if self.format_unit in ('k', 'K'): + result = self.format_code(""" + if (!PyIndex_Check({argname})) {{{{ + {bad_argument} + goto exit; + }}}}""", + argname=argname, + bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi)) + result + return result + if not limited_capi: return super().parse_arg(argname, displayname, limited_capi=limited_capi) return self.format_code(""" @@ -172,13 +205,14 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st @add_legacy_c_converter('B', bitwise=True) -class unsigned_char_converter(CConverter): +class unsigned_char_converter(BaseUnsignedIntConverter): type = 'unsigned char' default_type = int format_unit = 'b' c_ignored_default = "'\0'" def converter_init(self, *, bitwise: bool = False) -> None: + self.bitwise = bitwise if bitwise: self.format_unit = 'B' @@ -206,19 +240,6 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st }}}} """, argname=argname) - elif self.format_unit == 'B': - return self.format_code(""" - {{{{ - unsigned long ival = PyLong_AsUnsignedLongMask({argname}); - if (ival == (unsigned long)-1 && PyErr_Occurred()) {{{{ - goto exit; - }}}} - else {{{{ - {paramname} = (unsigned char) ival; - }}}} - }}}} - """, - argname=argname) return super().parse_arg(argname, displayname, limited_capi=limited_capi) @@ -265,22 +286,12 @@ class unsigned_short_converter(BaseUnsignedIntConverter): c_ignored_default = "0" def converter_init(self, *, bitwise: bool = False) -> None: + self.bitwise = bitwise if bitwise: self.format_unit = 'H' else: self.converter = '_PyLong_UnsignedShort_Converter' - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: - if self.format_unit == 'H': - return self.format_code(""" - {paramname} = (unsigned short)PyLong_AsUnsignedLongMask({argname}); - if ({paramname} == (unsigned short)-1 && PyErr_Occurred()) {{{{ - goto exit; - }}}} - """, - argname=argname) - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - @add_legacy_c_converter('C', accept={str}) class int_converter(CConverter): @@ -336,22 +347,12 @@ class unsigned_int_converter(BaseUnsignedIntConverter): c_ignored_default = "0" def converter_init(self, *, bitwise: bool = False) -> None: + self.bitwise = bitwise if bitwise: self.format_unit = 'I' else: self.converter = '_PyLong_UnsignedInt_Converter' - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: - if self.format_unit == 'I': - return self.format_code(""" - {paramname} = (unsigned int)PyLong_AsUnsignedLongMask({argname}); - if ({paramname} == (unsigned int)-1 && PyErr_Occurred()) {{{{ - goto exit; - }}}} - """, - argname=argname) - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - class long_converter(CConverter): type = 'long' @@ -377,25 +378,12 @@ class unsigned_long_converter(BaseUnsignedIntConverter): c_ignored_default = "0" def converter_init(self, *, bitwise: bool = False) -> None: + self.bitwise = bitwise if bitwise: self.format_unit = 'k' else: self.converter = '_PyLong_UnsignedLong_Converter' - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: - if self.format_unit == 'k': - return self.format_code(""" - if (!PyIndex_Check({argname})) {{{{ - {bad_argument} - goto exit; - }}}} - {paramname} = PyLong_AsUnsignedLongMask({argname}); - """, - argname=argname, - bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi), - ) - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - class long_long_converter(CConverter): type = 'long long' @@ -421,25 +409,12 @@ class unsigned_long_long_converter(BaseUnsignedIntConverter): c_ignored_default = "0" def converter_init(self, *, bitwise: bool = False) -> None: + self.bitwise = bitwise if bitwise: self.format_unit = 'K' else: self.converter = '_PyLong_UnsignedLongLong_Converter' - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: - if self.format_unit == 'K': - return self.format_code(""" - if (!PyIndex_Check({argname})) {{{{ - {bad_argument} - goto exit; - }}}} - {paramname} = PyLong_AsUnsignedLongLongMask({argname}); - """, - argname=argname, - bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi), - ) - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - class Py_ssize_t_converter(CConverter): type = 'Py_ssize_t' From a93d9aaf62bb2565e9eec00a2a8d06a91305127b Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sun, 13 Jul 2025 15:12:46 +0200 Subject: [PATCH 879/989] gh-42237: Link to complete list of codec aliases (#136625) Closes #42237 --- Doc/library/codecs.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index c5dae7c8e8fd04..1cb0e225bca043 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1065,8 +1065,15 @@ or with dictionaries as mapping tables. The following table lists the codecs by name, together with a few common aliases, and the languages for which the encoding is likely used. Neither the list of aliases nor the list of languages is meant to be exhaustive. Notice that spelling alternatives that only differ in -case or use a hyphen instead of an underscore are also valid aliases; therefore, -e.g. ``'utf-8'`` is a valid alias for the ``'utf_8'`` codec. +case or use a hyphen instead of an underscore are also valid aliases +because they are equivalent when normalized by +:func:`~encodings.normalize_encoding`. For example, ``'utf-8'`` is a valid +alias for the ``'utf_8'`` codec. + +.. note:: + + The below table lists the most common aliases, for a complete list + refer to the source :source:`aliases.py ` file. On Windows, ``cpXXX`` codecs are available for all code pages. But only codecs listed in the following table are guarantead to exist on From 85ec3b3b503ffd5b7e45f8b3fa2cec0c10e4bef0 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Mon, 14 Jul 2025 01:33:34 +1200 Subject: [PATCH 880/989] gh-127971: fix off-by-one read beyond the end of a string during search (#132574) --- Lib/test/string_tests.py | 9 +++++++++ .../2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst | 1 + Objects/stringlib/fastsearch.h | 8 ++++---- 3 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 4b82d51b4508ac..1814a55b74ea0c 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -767,6 +767,15 @@ def test_replace(self): self.checkraises(TypeError, 'hello', 'replace', 42, 'h') self.checkraises(TypeError, 'hello', 'replace', 'h', 42) + def test_replacement_on_buffer_boundary(self): + # gh-127971: Check we don't read past the end of the buffer when a + # potential match misses on the last character. + any_3_nonblank_codepoints = '!!!' + seven_codepoints = any_3_nonblank_codepoints + ' ' + any_3_nonblank_codepoints + a = (' ' * 243) + seven_codepoints + (' ' * 7) + b = ' ' * 6 + chr(256) + a.replace(seven_codepoints, b) + def test_replace_uses_two_way_maxcount(self): # Test that maxcount works in _two_way_count in fastsearch.h A, B = "A"*1000, "B"*1000 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst new file mode 100644 index 00000000000000..ced7a9c9fd3e63 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst @@ -0,0 +1 @@ +Fix off-by-one read beyond the end of a string in string search. diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 05e700b06258f0..b447865c986bef 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -595,7 +595,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, continue; } /* miss: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) { + if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { i = i + m; } else { @@ -604,7 +604,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, } else { /* skip: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) { + if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { i = i + m; } } @@ -668,7 +668,7 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, } } /* miss: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) { + if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { i = i + m; } else { @@ -677,7 +677,7 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, } else { /* skip: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) { + if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { i = i + m; } } From da699ed7e546ec106609a3858dd529335facaa4d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 13 Jul 2025 21:09:42 +0300 Subject: [PATCH 881/989] gh-121914: Change the names of the symbol tables for lambda and genexpr (GH-135288) Change the names of the symbol tables for lambda expressions and generator expressions to "" and "" respectively to avoid conflicts with user-defined names. --- .../pycore_global_objects_fini_generated.h | 5 -- Include/internal/pycore_global_strings.h | 5 -- .../internal/pycore_runtime_init_generated.h | 5 -- .../internal/pycore_unicodeobject_generated.h | 20 ------- Lib/symtable.py | 12 ----- Lib/test/test_symtable.py | 52 +++++++++++++++++++ ...-06-09-10-16-55.gh-issue-121914.G6Avkq.rst | 3 ++ Python/symtable.c | 10 ++-- 8 files changed, 60 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-09-10-16-55.gh-issue-121914.G6Avkq.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index c461bc1786ddf4..493377b4c25040 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -904,7 +904,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(deterministic)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(device)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dict)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dictcomp)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(difference_update)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(digest)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(digest_size)); @@ -979,7 +978,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(func)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(future)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(generation)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(genexpr)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_debug)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_event_loop)); @@ -1055,7 +1053,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kw2)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kwdefaults)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(label)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(lambda)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last_exc)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last_node)); @@ -1071,7 +1068,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(line)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(line_buffering)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(lineno)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(listcomp)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(little)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(lo)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(locale)); @@ -1218,7 +1214,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(server_hostname)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(server_side)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(session)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setcomp)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setpgroup)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setsid)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setsigdef)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 72c2051bd97660..5dfea2f479d5fe 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -395,7 +395,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(deterministic) STRUCT_FOR_ID(device) STRUCT_FOR_ID(dict) - STRUCT_FOR_ID(dictcomp) STRUCT_FOR_ID(difference_update) STRUCT_FOR_ID(digest) STRUCT_FOR_ID(digest_size) @@ -470,7 +469,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(func) STRUCT_FOR_ID(future) STRUCT_FOR_ID(generation) - STRUCT_FOR_ID(genexpr) STRUCT_FOR_ID(get) STRUCT_FOR_ID(get_debug) STRUCT_FOR_ID(get_event_loop) @@ -546,7 +544,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(kw2) STRUCT_FOR_ID(kwdefaults) STRUCT_FOR_ID(label) - STRUCT_FOR_ID(lambda) STRUCT_FOR_ID(last) STRUCT_FOR_ID(last_exc) STRUCT_FOR_ID(last_node) @@ -562,7 +559,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(line) STRUCT_FOR_ID(line_buffering) STRUCT_FOR_ID(lineno) - STRUCT_FOR_ID(listcomp) STRUCT_FOR_ID(little) STRUCT_FOR_ID(lo) STRUCT_FOR_ID(locale) @@ -709,7 +705,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(server_hostname) STRUCT_FOR_ID(server_side) STRUCT_FOR_ID(session) - STRUCT_FOR_ID(setcomp) STRUCT_FOR_ID(setpgroup) STRUCT_FOR_ID(setsid) STRUCT_FOR_ID(setsigdef) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index d378fcae26cf35..85ced09d29d338 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -902,7 +902,6 @@ extern "C" { INIT_ID(deterministic), \ INIT_ID(device), \ INIT_ID(dict), \ - INIT_ID(dictcomp), \ INIT_ID(difference_update), \ INIT_ID(digest), \ INIT_ID(digest_size), \ @@ -977,7 +976,6 @@ extern "C" { INIT_ID(func), \ INIT_ID(future), \ INIT_ID(generation), \ - INIT_ID(genexpr), \ INIT_ID(get), \ INIT_ID(get_debug), \ INIT_ID(get_event_loop), \ @@ -1053,7 +1051,6 @@ extern "C" { INIT_ID(kw2), \ INIT_ID(kwdefaults), \ INIT_ID(label), \ - INIT_ID(lambda), \ INIT_ID(last), \ INIT_ID(last_exc), \ INIT_ID(last_node), \ @@ -1069,7 +1066,6 @@ extern "C" { INIT_ID(line), \ INIT_ID(line_buffering), \ INIT_ID(lineno), \ - INIT_ID(listcomp), \ INIT_ID(little), \ INIT_ID(lo), \ INIT_ID(locale), \ @@ -1216,7 +1212,6 @@ extern "C" { INIT_ID(server_hostname), \ INIT_ID(server_side), \ INIT_ID(session), \ - INIT_ID(setcomp), \ INIT_ID(setpgroup), \ INIT_ID(setsid), \ INIT_ID(setsigdef), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index e516211f6c6cbc..6018d98d156a65 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1368,10 +1368,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(dictcomp); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(difference_update); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1668,10 +1664,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(genexpr); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(get); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1972,10 +1964,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(lambda); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(last); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2036,10 +2024,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(listcomp); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(little); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2624,10 +2608,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(setcomp); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(setpgroup); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/symtable.py b/Lib/symtable.py index 7a30e1ac4ca378..77475c3ffd9224 100644 --- a/Lib/symtable.py +++ b/Lib/symtable.py @@ -255,11 +255,6 @@ def is_local_symbol(ident): if is_local_symbol(st.name): match st.type: case _symtable.TYPE_FUNCTION: - # generators are of type TYPE_FUNCTION with a ".0" - # parameter as a first parameter (which makes them - # distinguishable from a function named 'genexpr') - if st.name == 'genexpr' and '.0' in st.varnames: - continue d[st.name] = 1 case _symtable.TYPE_TYPE_PARAMETERS: # Get the function-def block in the annotation @@ -267,13 +262,6 @@ def is_local_symbol(ident): scope_name = st.name for c in st.children: if c.name == scope_name and c.type == _symtable.TYPE_FUNCTION: - # A generic generator of type TYPE_FUNCTION - # cannot be a direct child of 'st' (but it - # can be a descendant), e.g.: - # - # class A: - # type genexpr[genexpr] = (x for x in []) - assert scope_name != 'genexpr' or '.0' not in c.varnames d[scope_name] = 1 break self.__methods = tuple(d) diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py index 24d89b09d946ad..26c75fcbc2bd7f 100644 --- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -527,6 +527,58 @@ def test_symtable_entry_repr(self): expected = f"" self.assertEqual(repr(self.top._table), expected) + def test_lambda(self): + st = symtable.symtable("lambda x: x", "?", "exec") + self.assertEqual(len(st.get_children()), 1) + st = st.get_children()[0] + self.assertIs(st.get_type(), symtable.SymbolTableType.FUNCTION) + self.assertEqual(st.get_name(), "") + self.assertFalse(st.is_nested()) + self.assertEqual(sorted(st.get_identifiers()), ["x"]) + self.assertEqual(st.get_children(), []) + + def test_nested_lambda(self): + st = symtable.symtable("lambda x: lambda y=x: y", "?", "exec") + self.assertEqual(len(st.get_children()), 1) + st = st.get_children()[0] + self.assertIs(st.get_type(), symtable.SymbolTableType.FUNCTION) + self.assertEqual(st.get_name(), "") + self.assertFalse(st.is_nested()) + self.assertEqual(sorted(st.get_identifiers()), ["x"]) + self.assertEqual(len(st.get_children()), 1) + st = st.get_children()[0] + self.assertIs(st.get_type(), symtable.SymbolTableType.FUNCTION) + self.assertEqual(st.get_name(), "") + self.assertTrue(st.is_nested()) + self.assertEqual(sorted(st.get_identifiers()), ["y"]) + self.assertEqual(st.get_children(), []) + + def test_genexpr(self): + st = symtable.symtable("(x for x in a)", "?", "exec") + self.assertEqual(len(st.get_children()), 1) + st = st.get_children()[0] + self.assertIs(st.get_type(), symtable.SymbolTableType.FUNCTION) + self.assertEqual(st.get_name(), "") + self.assertFalse(st.is_nested()) + self.assertEqual(sorted(st.get_identifiers()), [".0", "x"]) + self.assertEqual(st.get_children(), []) + + def test_nested_genexpr(self): + st = symtable.symtable("((y for y in x) for x in a)", "?", "exec") + self.assertEqual(len(st.get_children()), 1) + st = st.get_children()[0] + self.assertIs(st.get_type(), symtable.SymbolTableType.FUNCTION) + self.assertEqual(st.get_name(), "") + self.assertFalse(st.is_nested()) + self.assertEqual(sorted(st.get_identifiers()), [".0", "x"]) + self.assertEqual(len(st.get_children()), 1) + st = st.get_children()[0] + self.assertIs(st.get_type(), symtable.SymbolTableType.FUNCTION) + self.assertEqual(st.get_name(), "") + self.assertTrue(st.is_nested()) + self.assertEqual(sorted(st.get_identifiers()), [".0", "y"]) + self.assertEqual(st.get_children(), []) + class ComprehensionTests(unittest.TestCase): def get_identifiers_recursive(self, st, res): diff --git a/Misc/NEWS.d/next/Library/2025-06-09-10-16-55.gh-issue-121914.G6Avkq.rst b/Misc/NEWS.d/next/Library/2025-06-09-10-16-55.gh-issue-121914.G6Avkq.rst new file mode 100644 index 00000000000000..a1314a9cb7f943 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-09-10-16-55.gh-issue-121914.G6Avkq.rst @@ -0,0 +1,3 @@ +Changed the names of the symbol tables for lambda expressions and generator +expressions to "" and "" respectively to avoid conflicts +with user-defined names. diff --git a/Python/symtable.c b/Python/symtable.c index a3d0fff80d24a1..bcd7365f8e1f14 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -2413,7 +2413,7 @@ symtable_visit_expr(struct symtable *st, expr_ty e) VISIT_SEQ(st, expr, e->v.Lambda.args->defaults); if (e->v.Lambda.args->kw_defaults) VISIT_SEQ_WITH_NULL(st, expr, e->v.Lambda.args->kw_defaults); - if (!symtable_enter_block(st, &_Py_ID(lambda), + if (!symtable_enter_block(st, &_Py_STR(anon_lambda), FunctionBlock, (void *)e, LOCATION(e))) { return 0; } @@ -3055,7 +3055,7 @@ symtable_handle_comprehension(struct symtable *st, expr_ty e, static int symtable_visit_genexp(struct symtable *st, expr_ty e) { - return symtable_handle_comprehension(st, e, &_Py_ID(genexpr), + return symtable_handle_comprehension(st, e, &_Py_STR(anon_genexpr), e->v.GeneratorExp.generators, e->v.GeneratorExp.elt, NULL); } @@ -3063,7 +3063,7 @@ symtable_visit_genexp(struct symtable *st, expr_ty e) static int symtable_visit_listcomp(struct symtable *st, expr_ty e) { - return symtable_handle_comprehension(st, e, &_Py_ID(listcomp), + return symtable_handle_comprehension(st, e, &_Py_STR(anon_listcomp), e->v.ListComp.generators, e->v.ListComp.elt, NULL); } @@ -3071,7 +3071,7 @@ symtable_visit_listcomp(struct symtable *st, expr_ty e) static int symtable_visit_setcomp(struct symtable *st, expr_ty e) { - return symtable_handle_comprehension(st, e, &_Py_ID(setcomp), + return symtable_handle_comprehension(st, e, &_Py_STR(anon_setcomp), e->v.SetComp.generators, e->v.SetComp.elt, NULL); } @@ -3079,7 +3079,7 @@ symtable_visit_setcomp(struct symtable *st, expr_ty e) static int symtable_visit_dictcomp(struct symtable *st, expr_ty e) { - return symtable_handle_comprehension(st, e, &_Py_ID(dictcomp), + return symtable_handle_comprehension(st, e, &_Py_STR(anon_dictcomp), e->v.DictComp.generators, e->v.DictComp.key, e->v.DictComp.value); From 283b05052338dd735cd4927011afc3735d9c6c7c Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Sun, 13 Jul 2025 20:30:38 +0200 Subject: [PATCH 882/989] gh-127146: Emscripten: Fix test_open_undecodable_uri by setting `-sTEXTDECODER=2` (#136624) Removes the JS text decoder fallback and gets rid of the bugs due to the differences in behavior on invalid utf8 strings. See https://github.com/emscripten-core/emscripten/issues/24690. --- configure | 1 + configure.ac | 2 ++ 2 files changed, 3 insertions(+) diff --git a/configure b/configure index 4292f33ce21dce..71d831e4d8a6bb 100755 --- a/configure +++ b/configure @@ -9606,6 +9606,7 @@ fi as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV" as_fn_append LINKFORSHARED " -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET,_PyGILState_GetThisThreadState,__Py_DumpTraceback" as_fn_append LINKFORSHARED " -sSTACK_SIZE=5MB" + as_fn_append LINKFORSHARED " -sTEXTDECODER=2" if test "x$enable_wasm_dynamic_linking" = xyes then : diff --git a/configure.ac b/configure.ac index cc7a6e9397dded..91c26f34a524c1 100644 --- a/configure.ac +++ b/configure.ac @@ -2338,6 +2338,8 @@ AS_CASE([$ac_sys_system], AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV"]) AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET,_PyGILState_GetThisThreadState,__Py_DumpTraceback"]) AS_VAR_APPEND([LINKFORSHARED], [" -sSTACK_SIZE=5MB"]) + dnl Avoid bugs in JS fallback string decoding path + AS_VAR_APPEND([LINKFORSHARED], [" -sTEXTDECODER=2"]) AS_VAR_IF([enable_wasm_dynamic_linking], [yes], [ AS_VAR_APPEND([LINKFORSHARED], [" -sMAIN_MODULE"]) From b74fb8e220a50a9580320dfd398a16995b845c69 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 13 Jul 2025 23:27:48 +0300 Subject: [PATCH 883/989] gh-135256: Simplify parsing parameters in Argument Clinic (GH-135257) --- Lib/test/test_clinic.py | 12 ++---- Tools/clinic/libclinic/dsl_parser.py | 64 ++++++---------------------- 2 files changed, 17 insertions(+), 59 deletions(-) diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index a6ed887999242f..e83114794519d5 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -1277,12 +1277,8 @@ def test_base_invalid_syntax(self): os.stat invalid syntax: int = 42 """ - err = dedent(r""" - Function 'stat' has an invalid parameter declaration: - \s+'invalid syntax: int = 42' - """).strip() - with self.assertRaisesRegex(ClinicError, err): - self.parse_function(block) + err = "Function 'stat' has an invalid parameter declaration: 'invalid syntax: int = 42'" + self.expect_failure(block, err, lineno=2) def test_param_default_invalid_syntax(self): block = """ @@ -1290,7 +1286,7 @@ def test_param_default_invalid_syntax(self): os.stat x: int = invalid syntax """ - err = r"Syntax error: 'x = invalid syntax\n'" + err = "Function 'stat' has an invalid parameter declaration:" self.expect_failure(block, err, lineno=2) def test_cloning_nonexistent_function_correctly_fails(self): @@ -2510,7 +2506,7 @@ def test_cannot_specify_pydefault_without_default(self): self.expect_failure(block, err, lineno=1) def test_vararg_cannot_take_default_value(self): - err = "Vararg can't take a default value!" + err = "Function 'fn' has an invalid parameter declaration:" block = """ fn *args: tuple = None diff --git a/Tools/clinic/libclinic/dsl_parser.py b/Tools/clinic/libclinic/dsl_parser.py index 282ff64cd33089..eca41531f7c8e9 100644 --- a/Tools/clinic/libclinic/dsl_parser.py +++ b/Tools/clinic/libclinic/dsl_parser.py @@ -877,43 +877,16 @@ def parse_parameter(self, line: str) -> None: # handle "as" for parameters too c_name = None - name, have_as_token, trailing = line.partition(' as ') - if have_as_token: - name = name.strip() - if ' ' not in name: - fields = trailing.strip().split(' ') - if not fields: - fail("Invalid 'as' clause!") - c_name = fields[0] - if c_name.endswith(':'): - name += ':' - c_name = c_name[:-1] - fields[0] = name - line = ' '.join(fields) - - default: str | None - base, equals, default = line.rpartition('=') - if not equals: - base = default - default = None - - module = None + m = re.match(r'(?:\* *)?\w+( +as +(\w+))', line) + if m: + c_name = m[2] + line = line[:m.start(1)] + line[m.end(1):] + try: - ast_input = f"def x({base}): pass" + ast_input = f"def x({line}\n): pass" module = ast.parse(ast_input) except SyntaxError: - try: - # the last = was probably inside a function call, like - # c: int(accept={str}) - # so assume there was no actual default value. - default = None - ast_input = f"def x({line}): pass" - module = ast.parse(ast_input) - except SyntaxError: - pass - if not module: - fail(f"Function {self.function.name!r} has an invalid parameter declaration:\n\t", - repr(line)) + fail(f"Function {self.function.name!r} has an invalid parameter declaration: {line!r}") function = module.body[0] assert isinstance(function, ast.FunctionDef) @@ -922,9 +895,6 @@ def parse_parameter(self, line: str) -> None: if len(function_args.args) > 1: fail(f"Function {self.function.name!r} has an " f"invalid parameter declaration (comma?): {line!r}") - if function_args.defaults or function_args.kw_defaults: - fail(f"Function {self.function.name!r} has an " - f"invalid parameter declaration (default value?): {line!r}") if function_args.kwarg: fail(f"Function {self.function.name!r} has an " f"invalid parameter declaration (**kwargs?): {line!r}") @@ -944,7 +914,7 @@ def parse_parameter(self, line: str) -> None: name = 'varpos_' + name value: object - if not default: + if not function_args.defaults: if is_vararg: value = NULL else: @@ -955,17 +925,13 @@ def parse_parameter(self, line: str) -> None: if 'py_default' in kwargs: fail("You can't specify py_default without specifying a default value!") else: - if is_vararg: - fail("Vararg can't take a default value!") + expr = function_args.defaults[0] + default = ast_input[expr.col_offset: expr.end_col_offset].strip() if self.parameter_state is ParamState.REQUIRED: self.parameter_state = ParamState.OPTIONAL - default = default.strip() bad = False - ast_input = f"x = {default}" try: - module = ast.parse(ast_input) - if 'c_default' not in kwargs: # we can only represent very simple data values in C. # detect whether default is okay, via a denylist @@ -992,13 +958,14 @@ def bad_node(self, node: ast.AST) -> None: visit_Starred = bad_node denylist = DetectBadNodes() - denylist.visit(module) + denylist.visit(expr) bad = denylist.bad else: # if they specify a c_default, we can be more lenient about the default value. # but at least make an attempt at ensuring it's a valid expression. + code = compile(ast.Expression(expr), '', 'eval') try: - value = eval(default) + value = eval(code) except NameError: pass # probably a named constant except Exception as e: @@ -1010,9 +977,6 @@ def bad_node(self, node: ast.AST) -> None: if bad: fail(f"Unsupported expression as default value: {default!r}") - assignment = module.body[0] - assert isinstance(assignment, ast.Assign) - expr = assignment.value # mild hack: explicitly support NULL as a default value c_default: str | None if isinstance(expr, ast.Name) and expr.id == 'NULL': @@ -1064,8 +1028,6 @@ def bad_node(self, node: ast.AST) -> None: else: c_default = py_default - except SyntaxError as e: - fail(f"Syntax error: {e.text!r}") except (ValueError, AttributeError): value = unknown c_default = kwargs.get("c_default") From cd3e7ac89b151d1b2d8088f6c424a064ff7e1fe9 Mon Sep 17 00:00:00 2001 From: Lee Dogeon Date: Mon, 14 Jul 2025 17:02:05 +0900 Subject: [PATCH 884/989] gh-67341: fix a typo in `Include/fileutils.h` (#136049) `IO_REPARSE_TAG_SYMLINK` is mapped to `S_IFLNK` in `fileutils.c`, not in `posixmodule.c` --- Include/fileutils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/fileutils.h b/Include/fileutils.h index 1509198e45f0ca..00c37fa4602428 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -16,7 +16,7 @@ # define S_IFMT 0170000 #endif #ifndef S_IFLNK - // Windows doesn't define S_IFLNK, but posixmodule.c maps + // Windows doesn't define S_IFLNK, but fileutils.c maps // IO_REPARSE_TAG_SYMLINK to S_IFLNK. # define S_IFLNK 0120000 #endif From 5bbf30e89e7e1cb0a2b9bed9e0e99be291becbd3 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:13:15 +0300 Subject: [PATCH 885/989] Partially revert "gh-101100: Fix sphinx warnings in `library/email.parser.rst` (#136475)" (#136629) --- Doc/library/email.parser.rst | 6 +++--- Doc/tools/.nitignore | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/library/email.parser.rst b/Doc/library/email.parser.rst index 90796370ebb407..6a70714dc3ee42 100644 --- a/Doc/library/email.parser.rst +++ b/Doc/library/email.parser.rst @@ -48,8 +48,8 @@ methods. FeedParser API ^^^^^^^^^^^^^^ -The :class:`BytesFeedParser`, imported from the :mod:`email.parser.FeedParser` -module, provides an API that is conducive to incremental parsing of email messages, +The :class:`BytesFeedParser`, imported from the :mod:`email.feedparser` module, +provides an API that is conducive to incremental parsing of email messages, such as would be necessary when reading the text of an email message from a source that can block (such as a socket). The :class:`BytesFeedParser` can of course be used to parse an email message fully contained in a :term:`bytes-like @@ -155,7 +155,7 @@ message body, instead setting the payload to the raw body. Read all the data from the binary file-like object *fp*, parse the resulting bytes, and return the message object. *fp* must support - both the :meth:`~io.IOBase.readline` and the :meth:`~io.TextIOBase.read` + both the :meth:`~io.IOBase.readline` and the :meth:`~io.IOBase.read` methods. The bytes contained in *fp* must be formatted as a block of :rfc:`5322` diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 510225afab8933..31b2e567f11056 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -15,6 +15,7 @@ Doc/extending/extending.rst Doc/library/ast.rst Doc/library/asyncio-extending.rst Doc/library/email.charset.rst +Doc/library/email.parser.rst Doc/library/http.cookiejar.rst Doc/library/http.server.rst Doc/library/importlib.rst From 75e2c5dd346d18c0e3c11d664c84d47becf1349b Mon Sep 17 00:00:00 2001 From: Garry Cairns <2401853+garry-cairns@users.noreply.github.com> Date: Mon, 14 Jul 2025 11:32:10 +0100 Subject: [PATCH 886/989] =?UTF-8?q?gh-134567:=20Move=20unittest=20What?= =?UTF-8?q?=E2=80=99s=20New=20entry=20(#136630)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Doc/whatsnew/3.15.rst | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 2f713fbb888c30..dd0bb6bd5b86b3 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -278,6 +278,14 @@ tarfile and :cve:`2025-4435`.) +unittest +-------- + +* :func:`unittest.TestCase.assertLogs` will now accept a formatter + to control how messages are formatted. + (Contributed by Garry Cairns in :gh:`134567`.) + + zlib ---- @@ -388,15 +396,6 @@ typing (Contributed by Bénédikt Tran in :gh:`133823`.) -unittest --------- - -* Lets users specify formatter in TestCase.assertLogs. - :func:`unittest.TestCase.assertLogs` will now accept a formatter - to control how messages are formatted. - (Contributed by Garry Cairns in :gh:`134567`.) - - wave ---- From a68ddea3bf7e9bb77d096c613bce2ec1e67a28f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:49:34 +0200 Subject: [PATCH 887/989] gh-90733: improve `hashlib.scrypt` interface (#136100) * add `scrypt` to `hashlib.__all__` * improve `hashlib.scrypt` exception messages --- Lib/hashlib.py | 3 +- Lib/test/test_hashlib.py | 117 +++++++++++------- ...5-06-29-15-22-13.gh-issue-90733.NiquaA.rst | 2 + Modules/_hashopenssl.c | 76 +++++++----- Modules/clinic/_hashopenssl.c.h | 10 +- 5 files changed, 120 insertions(+), 88 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-29-15-22-13.gh-issue-90733.NiquaA.rst diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 6d4f84a5a6e9ff..6c72fba03bf687 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -187,7 +187,8 @@ def __hash_new(name, *args, **kwargs): try: # OpenSSL's scrypt requires OpenSSL 1.1+ - from _hashlib import scrypt # noqa: F401 + from _hashlib import scrypt + __all__ += ('scrypt',) except ImportError: pass diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 5bad483ae9dafc..65e18639f82be5 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -1184,12 +1184,6 @@ class KDFTests(unittest.TestCase): (b'pass\0word', b'sa\0lt', 4096, 16), ] - scrypt_test_vectors = [ - (b'', b'', 16, 1, 1, unhexlify('77d6576238657b203b19ca42c18a0497f16b4844e3074ae8dfdffa3fede21442fcd0069ded0948f8326a753a0fc81f17e8d3e0fb2e0d3628cf35e20c38d18906')), - (b'password', b'NaCl', 1024, 8, 16, unhexlify('fdbabe1c9d3472007856e7190d01e9fe7c6ad7cbc8237830e77376634b3731622eaf30d92e22a3886ff109279d9830dac727afb94a83ee6d8360cbdfa2cc0640')), - (b'pleaseletmein', b'SodiumChloride', 16384, 8, 1, unhexlify('7023bdcb3afd7348461c06cd81fd38ebfda8fbba904f8e3ea9b543f6545da1f2d5432955613f0fcf62d49705242a9af9e61e85dc0d651e40dfcf017b45575887')), - ] - pbkdf2_results = { "sha1": [ # official test vectors from RFC 6070 @@ -1281,46 +1275,6 @@ def _test_pbkdf2_hmac(self, pbkdf2, supported): def test_pbkdf2_hmac_c(self): self._test_pbkdf2_hmac(openssl_hashlib.pbkdf2_hmac, openssl_md_meth_names) - @unittest.skipUnless(hasattr(hashlib, 'scrypt'), - ' test requires OpenSSL > 1.1') - @unittest.skipIf(get_fips_mode(), reason="scrypt is blocked in FIPS mode") - def test_scrypt(self): - for password, salt, n, r, p, expected in self.scrypt_test_vectors: - result = hashlib.scrypt(password, salt=salt, n=n, r=r, p=p) - self.assertEqual(result, expected) - - # this values should work - hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1) - # password and salt must be bytes-like - with self.assertRaises(TypeError): - hashlib.scrypt('password', salt=b'salt', n=2, r=8, p=1) - with self.assertRaises(TypeError): - hashlib.scrypt(b'password', salt='salt', n=2, r=8, p=1) - # require keyword args - with self.assertRaises(TypeError): - hashlib.scrypt(b'password') - with self.assertRaises(TypeError): - hashlib.scrypt(b'password', b'salt') - with self.assertRaises(TypeError): - hashlib.scrypt(b'password', 2, 8, 1, salt=b'salt') - for n in [-1, 0, 1, None]: - with self.assertRaises((ValueError, OverflowError, TypeError)): - hashlib.scrypt(b'password', salt=b'salt', n=n, r=8, p=1) - for r in [-1, 0, None]: - with self.assertRaises((ValueError, OverflowError, TypeError)): - hashlib.scrypt(b'password', salt=b'salt', n=2, r=r, p=1) - for p in [-1, 0, None]: - with self.assertRaises((ValueError, OverflowError, TypeError)): - hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=p) - for maxmem in [-1, None]: - with self.assertRaises((ValueError, OverflowError, TypeError)): - hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1, - maxmem=maxmem) - for dklen in [-1, None]: - with self.assertRaises((ValueError, OverflowError, TypeError)): - hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1, - dklen=dklen) - def test_normalized_name(self): self.assertNotIn("blake2b512", hashlib.algorithms_available) self.assertNotIn("sha3-512", hashlib.algorithms_available) @@ -1362,5 +1316,76 @@ def readable(self): hashlib.file_digest(NonBlocking(), hashlib.sha256) +@unittest.skipUnless(hasattr(hashlib, 'scrypt'), 'requires OpenSSL 1.1+') +@unittest.skipIf(get_fips_mode(), reason="scrypt is blocked in FIPS mode") +class TestScrypt(unittest.TestCase): + + scrypt_test_vectors = [ + (b'', b'', 16, 1, 1, unhexlify('77d6576238657b203b19ca42c18a0497f16b4844e3074ae8dfdffa3fede21442fcd0069ded0948f8326a753a0fc81f17e8d3e0fb2e0d3628cf35e20c38d18906')), + (b'password', b'NaCl', 1024, 8, 16, unhexlify('fdbabe1c9d3472007856e7190d01e9fe7c6ad7cbc8237830e77376634b3731622eaf30d92e22a3886ff109279d9830dac727afb94a83ee6d8360cbdfa2cc0640')), + (b'pleaseletmein', b'SodiumChloride', 16384, 8, 1, unhexlify('7023bdcb3afd7348461c06cd81fd38ebfda8fbba904f8e3ea9b543f6545da1f2d5432955613f0fcf62d49705242a9af9e61e85dc0d651e40dfcf017b45575887')), + ] + + def test_scrypt(self): + for password, salt, n, r, p, expected in self.scrypt_test_vectors: + result = hashlib.scrypt(password, salt=salt, n=n, r=r, p=p) + self.assertEqual(result, expected) + + # these parameters must be valid + hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1) + hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1, maxmem=0) + hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1, dklen=1) + + def test_scrypt_types(self): + # password and salt must be bytes-like + with self.assertRaises(TypeError): + hashlib.scrypt('password', salt=b'salt', n=2, r=8, p=1) + with self.assertRaises(TypeError): + hashlib.scrypt(b'password', salt='salt', n=2, r=8, p=1) + # require keyword args + with self.assertRaises(TypeError): + hashlib.scrypt(b'password') + with self.assertRaises(TypeError): + hashlib.scrypt(b'password', b'salt') + with self.assertRaises(TypeError): + hashlib.scrypt(b'password', 2, 8, 1, salt=b'salt') + + def test_scrypt_validate(self): + def scrypt(password=b"password", /, **kwargs): + # overwrite well-defined parameters with bad ones + kwargs = dict(salt=b'salt', n=2, r=8, p=1) | kwargs + return hashlib.scrypt(password, **kwargs) + + for param_name in ('n', 'r', 'p', 'maxmem', 'dklen'): + param = {param_name: None} + with self.subTest(**param): + self.assertRaises(TypeError, scrypt, **param) + + self.assertRaises(ValueError, scrypt, n=0) + self.assertRaises(ValueError, scrypt, n=-1) + self.assertRaises(ValueError, scrypt, n=1) + + self.assertRaises(ValueError, scrypt, r=0) + self.assertRaises(ValueError, scrypt, r=-1) + + self.assertRaises(ValueError, scrypt, p=-1) + self.assertRaises(ValueError, scrypt, p=0) + + self.assertRaises(ValueError, scrypt, maxmem=-1) + # OpenSSL hard limit for 'maxmem' is an 'uint64_t' but for now, + # we do not use the 'uint64' Clinic converter but the 'long' one. + self.assertRaises(OverflowError, scrypt, maxmem=(1 << 64)) + # Historically, Python allowed 'maxmem' to be at most INT_MAX, + # which is at most 2**32-1 (on Windows, sizeof(long) == 4, so + # an OverflowError will be raised instead of a ValueError). + numeric_exc_types = (OverflowError, ValueError) + self.assertRaises(numeric_exc_types, scrypt, maxmem=(1 << 32)) + + self.assertRaises(ValueError, scrypt, dklen=-1) + self.assertRaises(ValueError, scrypt, dklen=0) + MAX_DKLEN = ((1 << 32) - 1) * 32 # see RFC 7914 + self.assertRaises(numeric_exc_types, scrypt, dklen=MAX_DKLEN + 1) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-06-29-15-22-13.gh-issue-90733.NiquaA.rst b/Misc/NEWS.d/next/Library/2025-06-29-15-22-13.gh-issue-90733.NiquaA.rst new file mode 100644 index 00000000000000..cba930c5860522 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-29-15-22-13.gh-issue-90733.NiquaA.rst @@ -0,0 +1,2 @@ +Improve error messages when reporting invalid parameters in +:func:`hashlib.scrypt`. Patch by Bénédikt Tran. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 1a6c831e48377b..6f04d9ec0c479f 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -48,7 +48,6 @@ #define MUNCH_SIZE INT_MAX -#define PY_OPENSSL_HAS_SCRYPT 1 #if defined(NID_sha3_224) && defined(NID_sha3_256) && defined(NID_sha3_384) && defined(NID_sha3_512) #define PY_OPENSSL_HAS_SHA3 1 #endif @@ -1557,7 +1556,25 @@ pbkdf2_hmac_impl(PyObject *module, const char *hash_name, return key_obj; } -#ifdef PY_OPENSSL_HAS_SCRYPT +// --- PBKDF: scrypt (RFC 7914) ----------------------------------------------- + +/* + * By default, OpenSSL 1.1.0 restricts 'maxmem' in EVP_PBE_scrypt() + * to 32 MiB (1024 * 1024 * 32) but only if 'maxmem = 0' and allows + * for an arbitrary large limit fitting on an uint64_t otherwise. + * + * For legacy reasons, we limited 'maxmem' to be at most INTMAX, + * but if users need a more relaxed value, we will revisit this + * limit in the future. + */ +#define HASHLIB_SCRYPT_MAX_MAXMEM INT_MAX + +/* + * Limit 'dklen' to INT_MAX even if it can be at most (32 * UINT32_MAX). + * + * See https://datatracker.ietf.org/doc/html/rfc7914.html for details. + */ +#define HASHLIB_SCRYPT_MAX_DKLEN INT_MAX /*[clinic input] _hashlib.scrypt @@ -1571,7 +1588,6 @@ _hashlib.scrypt maxmem: long = 0 dklen: long = 64 - scrypt password-based key derivation function. [clinic start generated code]*/ @@ -1579,77 +1595,73 @@ static PyObject * _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, unsigned long n, unsigned long r, unsigned long p, long maxmem, long dklen) -/*[clinic end generated code: output=d424bc3e8c6b9654 input=0c9a84230238fd79]*/ +/*[clinic end generated code: output=d424bc3e8c6b9654 input=bdeac9628d07f7a1]*/ { - PyObject *key_obj = NULL; - char *key; + PyObject *key = NULL; int retval; if (password->len > INT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "password is too long."); + PyErr_SetString(PyExc_OverflowError, "password is too long"); return NULL; } if (salt->len > INT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "salt is too long."); + PyErr_SetString(PyExc_OverflowError, "salt is too long"); return NULL; } if (n < 2 || n & (n - 1)) { - PyErr_SetString(PyExc_ValueError, - "n must be a power of 2."); + PyErr_SetString(PyExc_ValueError, "n must be a power of 2"); return NULL; } - if (maxmem < 0 || maxmem > INT_MAX) { - /* OpenSSL 1.1.0 restricts maxmem to 32 MiB. It may change in the - future. The maxmem constant is private to OpenSSL. */ + if (maxmem < 0 || maxmem > HASHLIB_SCRYPT_MAX_MAXMEM) { PyErr_Format(PyExc_ValueError, - "maxmem must be positive and smaller than %d", - INT_MAX); + "maxmem must be positive and at most %d", + HASHLIB_SCRYPT_MAX_MAXMEM); return NULL; } - if (dklen < 1 || dklen > INT_MAX) { + if (dklen < 1 || dklen > HASHLIB_SCRYPT_MAX_DKLEN) { PyErr_Format(PyExc_ValueError, - "dklen must be greater than 0 and smaller than %d", - INT_MAX); + "dklen must be at least 1 and at most %d", + HASHLIB_SCRYPT_MAX_DKLEN); return NULL; } /* let OpenSSL validate the rest */ - retval = EVP_PBE_scrypt(NULL, 0, NULL, 0, n, r, p, maxmem, NULL, 0); + retval = EVP_PBE_scrypt(NULL, 0, NULL, 0, n, r, p, + (uint64_t)maxmem, NULL, 0); if (!retval) { - notify_ssl_error_occurred( - "Invalid parameter combination for n, r, p, maxmem."); + notify_ssl_error_occurred("invalid parameter combination for " + "n, r, p, and maxmem"); return NULL; } - key_obj = PyBytes_FromStringAndSize(NULL, dklen); - if (key_obj == NULL) { + key = PyBytes_FromStringAndSize(NULL, dklen); + if (key == NULL) { return NULL; } - key = PyBytes_AS_STRING(key_obj); Py_BEGIN_ALLOW_THREADS retval = EVP_PBE_scrypt( - (const char*)password->buf, (size_t)password->len, + (const char *)password->buf, (size_t)password->len, (const unsigned char *)salt->buf, (size_t)salt->len, - n, r, p, maxmem, - (unsigned char *)key, (size_t)dklen + (uint64_t)n, (uint64_t)r, (uint64_t)p, (uint64_t)maxmem, + (unsigned char *)PyBytes_AS_STRING(key), (size_t)dklen ); Py_END_ALLOW_THREADS if (!retval) { - Py_CLEAR(key_obj); + Py_DECREF(key); notify_ssl_error_occurred_in(Py_STRINGIFY(EVP_PBE_scrypt)); return NULL; } - return key_obj; + return key; } -#endif /* PY_OPENSSL_HAS_SCRYPT */ + +#undef HASHLIB_SCRYPT_MAX_DKLEN +#undef HASHLIB_SCRYPT_MAX_MAXMEM /* Fast HMAC for hmac.digest() */ diff --git a/Modules/clinic/_hashopenssl.c.h b/Modules/clinic/_hashopenssl.c.h index 61ea10e2a48284..3d81a6dcce1440 100644 --- a/Modules/clinic/_hashopenssl.c.h +++ b/Modules/clinic/_hashopenssl.c.h @@ -1492,8 +1492,6 @@ pbkdf2_hmac(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject return return_value; } -#if defined(PY_OPENSSL_HAS_SCRYPT) - PyDoc_STRVAR(_hashlib_scrypt__doc__, "scrypt($module, /, password, *, salt, n, r, p, maxmem=0, dklen=64)\n" "--\n" @@ -1601,8 +1599,6 @@ _hashlib_scrypt(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -#endif /* defined(PY_OPENSSL_HAS_SCRYPT) */ - PyDoc_STRVAR(_hashlib_hmac_singleshot__doc__, "hmac_digest($module, /, key, msg, digest)\n" "--\n" @@ -1980,8 +1976,4 @@ _hashlib_compare_digest(PyObject *module, PyObject *const *args, Py_ssize_t narg #ifndef _HASHLIB_OPENSSL_SHAKE_256_METHODDEF #define _HASHLIB_OPENSSL_SHAKE_256_METHODDEF #endif /* !defined(_HASHLIB_OPENSSL_SHAKE_256_METHODDEF) */ - -#ifndef _HASHLIB_SCRYPT_METHODDEF - #define _HASHLIB_SCRYPT_METHODDEF -#endif /* !defined(_HASHLIB_SCRYPT_METHODDEF) */ -/*[clinic end generated code: output=29f4aaf01714778e input=a9049054013a1b77]*/ +/*[clinic end generated code: output=cd5ff436f6dc2938 input=a9049054013a1b77]*/ From 3d8c38f6db0fea7845aafb92fe6bc795b536a367 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 14 Jul 2025 10:14:20 -0700 Subject: [PATCH 888/989] GH-135904: Improve the JIT's performance on macOS (GH-136528) --- Python/jit.c | 11 ++++--- Tools/jit/_optimizers.py | 40 +++++++----------------- Tools/jit/_targets.py | 67 ++++++++++++++++++++++++++-------------- Tools/jit/jit.h | 4 +++ Tools/jit/shim.c | 4 +-- Tools/jit/template.c | 8 ++--- 6 files changed, 73 insertions(+), 61 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index e232cc1f7d9250..01bc0076497c6d 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -431,8 +431,10 @@ void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *s #if defined(__aarch64__) || defined(_M_ARM64) #define TRAMPOLINE_SIZE 16 + #define DATA_ALIGN 8 #else #define TRAMPOLINE_SIZE 0 + #define DATA_ALIGN 1 #endif // Generate and patch AArch64 trampolines. The symbols to jump to are stored @@ -522,8 +524,9 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz // Round up to the nearest page: size_t page_size = get_page_size(); assert((page_size & (page_size - 1)) == 0); - size_t padding = page_size - ((code_size + state.trampolines.size + data_size) & (page_size - 1)); - size_t total_size = code_size + state.trampolines.size + data_size + padding; + size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1)); + size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size) & (page_size - 1)); + size_t total_size = code_size + state.trampolines.size + code_padding + data_size + padding; unsigned char *memory = jit_alloc(total_size); if (memory == NULL) { return -1; @@ -545,7 +548,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz // Loop again to emit the code: unsigned char *code = memory; state.trampolines.mem = memory + code_size; - unsigned char *data = memory + code_size + state.trampolines.size; + unsigned char *data = memory + code_size + state.trampolines.size + code_padding; // Compile the shim, which handles converting between the native // calling convention and the calling convention used by jitted code // (which may be different for efficiency reasons). @@ -567,7 +570,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz code += group->code_size; data += group->data_size; assert(code == memory + code_size); - assert(data == memory + code_size + state.trampolines.size + data_size); + assert(data == memory + code_size + state.trampolines.size + code_padding + data_size); #ifdef MAP_JIT pthread_jit_write_protect_np(1); #endif diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index 1077e4106fdfbd..33db110b728dba 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -70,21 +70,21 @@ class Optimizer: path: pathlib.Path _: dataclasses.KW_ONLY - # prefix used to mangle symbols on some platforms: - prefix: str = "" + # Prefixes used to mangle local labels and symbols: + label_prefix: str + symbol_prefix: str # The first block in the linked list: _root: _Block = dataclasses.field(init=False, default_factory=_Block) _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) # No groups: _re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile( - r"\s*(?:\.|#|//|$)" + r"\s*(?:\.|#|//|;|$)" ) # One group (label): _re_label: typing.ClassVar[re.Pattern[str]] = re.compile( r'\s*(?P