From c89a66feb12110e68e63a6293e3ed9c9fd180412 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Tue, 15 Jul 2025 10:45:41 +0100 Subject: [PATCH 1/8] GH-133711: Enable UTF-8 mode by default (PEP 686) (#133712) Co-authored-by: Victor Stinner --- Doc/c-api/init_config.rst | 4 +- Doc/library/os.rst | 37 +++++++++---------- Doc/using/windows.rst | 29 +++++++++------ Doc/whatsnew/3.15.rst | 26 ++++++++++++- Include/cpython/initconfig.h | 13 +++---- Lib/locale.py | 3 +- Lib/subprocess.py | 3 +- Lib/test/test_cmd_line.py | 7 +++- Lib/test/test_embed.py | 10 +---- Lib/test/test_utf8_mode.py | 6 +-- ...-05-08-22-19-10.gh-issue-133711.e91wUy.rst | 2 + Programs/_testembed.c | 4 +- Python/initconfig.c | 2 +- Python/preconfig.c | 32 ++++------------ 14 files changed, 93 insertions(+), 85 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-08-22-19-10.gh-issue-133711.e91wUy.rst diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 4fd10224262488..24be9ead3874d1 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -975,9 +975,7 @@ PyPreConfig Set to ``0`` or ``1`` by the :option:`-X utf8 <-X>` command line option and the :envvar:`PYTHONUTF8` environment variable. - Also set to ``1`` if the ``LC_CTYPE`` locale is ``C`` or ``POSIX``. - - Default: ``-1`` in Python config and ``0`` in isolated config. + Default: ``1``. .. _c-preinit: diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 1e54cfec609bd2..45ec6c7a51b7b0 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -108,6 +108,12 @@ Python UTF-8 Mode .. versionadded:: 3.7 See :pep:`540` for more details. +.. versionchanged:: next + + Python UTF-8 mode is now enabled by default (:pep:`686`). + It may be disabled with by setting :envvar:`PYTHONUTF8=0 ` as + an environment variable or by using the :option:`-X utf8=0 <-X>` command line option. + The Python UTF-8 Mode ignores the :term:`locale encoding` and forces the usage of the UTF-8 encoding: @@ -139,31 +145,22 @@ level APIs also exhibit different default behaviours: default so that attempting to open a binary file in text mode is likely to raise an exception rather than producing nonsense data. -The :ref:`Python UTF-8 Mode ` is enabled if the LC_CTYPE locale is -``C`` or ``POSIX`` at Python startup (see the :c:func:`PyConfig_Read` -function). - -It can be enabled or disabled using the :option:`-X utf8 <-X>` command line -option and the :envvar:`PYTHONUTF8` environment variable. - -If the :envvar:`PYTHONUTF8` environment variable is not set at all, then the -interpreter defaults to using the current locale settings, *unless* the current -locale is identified as a legacy ASCII-based locale (as described for -:envvar:`PYTHONCOERCECLOCALE`), and locale coercion is either disabled or -fails. In such legacy locales, the interpreter will default to enabling UTF-8 -mode unless explicitly instructed not to do so. - -The Python UTF-8 Mode can only be enabled at the Python startup. Its value +The :ref:`Python UTF-8 Mode ` is enabled by default. +It can be disabled using the :option:`-X utf8=0 <-X>` command line +option or the :envvar:`PYTHONUTF8=0 ` environment variable. +The Python UTF-8 Mode can only be disabled at Python startup. Its value can be read from :data:`sys.flags.utf8_mode `. +If the UTF-8 mode is disabled, the interpreter defaults to using +the current locale settings, *unless* the current locale is identified +as a legacy ASCII-based locale (as described for :envvar:`PYTHONCOERCECLOCALE`), +and locale coercion is either disabled or fails. +In such legacy locales, the interpreter will default to enabling UTF-8 mode +unless explicitly instructed not to do so. + See also the :ref:`UTF-8 mode on Windows ` and the :term:`filesystem encoding and error handler`. -.. seealso:: - - :pep:`686` - Python 3.15 will make :ref:`utf8-mode` default. - .. _os-procinfo: diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 9628da3d2f6b12..7cc50bccb3724a 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -1006,6 +1006,9 @@ UTF-8 mode ========== .. versionadded:: 3.7 +.. versionchanged:: next + + Python UTF-8 mode is now enabled by default (:pep:`686`). Windows still uses legacy encodings for the system encoding (the ANSI Code Page). Python uses it for the default encoding of text files (e.g. @@ -1014,20 +1017,22 @@ Page). Python uses it for the default encoding of text files (e.g. This may cause issues because UTF-8 is widely used on the internet and most Unix systems, including WSL (Windows Subsystem for Linux). -You can use the :ref:`Python UTF-8 Mode ` to change the default text -encoding to UTF-8. You can enable the :ref:`Python UTF-8 Mode ` via -the ``-X utf8`` command line option, or the ``PYTHONUTF8=1`` environment -variable. See :envvar:`PYTHONUTF8` for enabling UTF-8 mode, and -:ref:`setting-envvars` for how to modify environment variables. - -When the :ref:`Python UTF-8 Mode ` is enabled, you can still use the +The :ref:`Python UTF-8 Mode `, enabled by default, can help by +changing the default text encoding to UTF-8. +When the :ref:`UTF-8 mode ` is enabled, you can still use the system encoding (the ANSI Code Page) via the "mbcs" codec. -Note that adding ``PYTHONUTF8=1`` to the default environment variables -will affect all Python 3.7+ applications on your system. -If you have any Python 3.7+ applications which rely on the legacy -system encoding, it is recommended to set the environment variable -temporarily or use the ``-X utf8`` command line option. +You can disable the :ref:`Python UTF-8 Mode ` via +the ``-X utf8=0`` command line option, or the ``PYTHONUTF8=0`` environment +variable. See :envvar:`PYTHONUTF8` for disabling UTF-8 mode, and +:ref:`setting-envvars` for how to modify environment variables. + +.. hint:: + Adding ``PYTHONUTF8={0,1}`` to the default environment variables + will affect all Python 3.7+ applications on your system. + If you have any Python 3.7+ applications which rely on the legacy + system encoding, it is recommended to set the environment variable + temporarily or use the ``-X utf8`` command line option. .. note:: Even when UTF-8 mode is disabled, Python uses UTF-8 by default diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index dd0bb6bd5b86b3..fe3d45b83a512e 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -172,11 +172,35 @@ production systems where traditional profiling approaches would be too intrusive Other language changes ====================== +* Python now uses UTF-8_ as the default encoding, independent of the system's + environment. This means that I/O operations without an explicit encoding, + e.g. ``open('flying-circus.txt')``, will use UTF-8. + UTF-8 is a widely-supported Unicode_ character encoding that has become a + *de facto* standard for representing text, including nearly every webpage + on the internet, many common file formats, programming languages, and more. + + This only applies when no ``encoding`` argument is given. For best + compatibility between versions of Python, ensure that an explicit ``encoding`` + argument is always provided. The :ref:`opt-in encoding warning ` + can be used to identify code that may be affected by this change. + The special special ``encoding='locale'`` argument uses the current locale + encoding, and has been supported since Python 3.10. + + To retain the previous behaviour, Python's UTF-8 mode may be disabled with + the :envvar:`PYTHONUTF8=0 ` environment variable or the + :option:`-X utf8=0 <-X>` command line option. + + .. seealso:: :pep:`686` for further details. + + .. _UTF-8: https://en.wikipedia.org/wiki/UTF-8 + .. _Unicode: https://home.unicode.org/ + + (Contributed by Adam Turner in :gh:`133711`; PEP 686 written by Inada Naoki.) + * Several error messages incorrectly using the term "argument" have been corrected. (Contributed by Stan Ulbrych in :gh:`133382`.) - New modules =========== diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 7ce4acfeb7177d..1c979d91a40850 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -102,15 +102,14 @@ typedef struct PyPreConfig { /* Enable UTF-8 mode? (PEP 540) - Disabled by default (equals to 0). + If equal to 1, use the UTF-8 encoding and use "surrogateescape" for the + stdin & stdout error handlers. - Set to 1 by "-X utf8" and "-X utf8=1" command line options. - Set to 1 by PYTHONUTF8=1 environment variable. + Enabled by default (equal to 1; PEP 686), or if Py_UTF8Mode=1, + or if "-X utf8=1" or PYTHONUTF8=1. - Set to 0 by "-X utf8=0" and PYTHONUTF8=0. - - If equals to -1, it is set to 1 if the LC_CTYPE locale is "C" or - "POSIX", otherwise it is set to 0. Inherit Py_UTF8Mode value value. */ + Set to 0 by "-X utf8=0" or PYTHONUTF8=0. + */ int utf8_mode; /* If non-zero, enable the Python Development Mode. diff --git a/Lib/locale.py b/Lib/locale.py index dfedc6386cb891..0bde7ed51c66c1 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -651,7 +651,8 @@ def getpreferredencoding(do_setlocale=True): if sys.flags.warn_default_encoding: import warnings warnings.warn( - "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.", + "UTF-8 Mode affects locale.getpreferredencoding(). " + "Consider locale.getencoding() instead.", EncodingWarning, 2) if sys.flags.utf8_mode: return 'utf-8' diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 54c2eb515b60da..79251bd5310223 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -380,8 +380,7 @@ def _text_encoding(): if sys.flags.utf8_mode: return "utf-8" - else: - return locale.getencoding() + return locale.getencoding() def call(*popenargs, timeout=None, **kwargs): diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index c17d749d4a17ed..f30a1874ab96d4 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -300,6 +300,10 @@ def run_utf8_mode(arg): cmd = [sys.executable, '-X', 'utf8', '-c', code, arg] return subprocess.run(cmd, stdout=subprocess.PIPE, text=True) + def run_no_utf8_mode(arg): + cmd = [sys.executable, '-X', 'utf8=0', '-c', code, arg] + return subprocess.run(cmd, stdout=subprocess.PIPE, text=True) + valid_utf8 = 'e:\xe9, euro:\u20ac, non-bmp:\U0010ffff'.encode('utf-8') # invalid UTF-8 byte sequences with a valid UTF-8 sequence # in the middle. @@ -312,7 +316,8 @@ def run_utf8_mode(arg): ) test_args = [valid_utf8, invalid_utf8] - for run_cmd in (run_default, run_c_locale, run_utf8_mode): + for run_cmd in (run_default, run_c_locale, run_utf8_mode, + run_no_utf8_mode): with self.subTest(run_cmd=run_cmd): for arg in test_args: proc = run_cmd(arg) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 89f4aebe28f4a1..22dfdb6bb6f138 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -543,7 +543,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'configure_locale': True, 'coerce_c_locale': False, 'coerce_c_locale_warn': False, - 'utf8_mode': False, + 'utf8_mode': True, } if MS_WINDOWS: PRE_CONFIG_COMPAT.update({ @@ -560,7 +560,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): configure_locale=False, isolated=True, use_environment=False, - utf8_mode=False, + utf8_mode=True, dev_mode=False, coerce_c_locale=False, ) @@ -805,12 +805,6 @@ def get_expected_config(self, expected_preconfig, expected, 'stdio_encoding', 'stdio_errors'): expected[key] = self.IGNORE_CONFIG - if not expected_preconfig['configure_locale']: - # UTF-8 Mode depends on the locale. There is no easy way - # to guess if UTF-8 Mode will be enabled or not if the locale - # is not configured. - expected_preconfig['utf8_mode'] = self.IGNORE_CONFIG - if expected_preconfig['utf8_mode'] == 1: if expected['filesystem_encoding'] is self.GET_DEFAULT_CONFIG: expected['filesystem_encoding'] = 'utf-8' diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index f66881044e16df..b8e49440c9f7da 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -89,8 +89,8 @@ def test_env_var(self): # the UTF-8 mode if not self.posix_locale(): # PYTHONUTF8 should be ignored if -E is used - out = self.get_output('-E', '-c', code, PYTHONUTF8='1') - self.assertEqual(out, '0') + out = self.get_output('-E', '-c', code, PYTHONUTF8='0') + self.assertEqual(out, '1') # invalid mode out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True) @@ -116,7 +116,7 @@ def test_filesystemencoding(self): # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode # and has the priority over -X utf8 and PYTHONUTF8 out = self.get_output('-X', 'utf8', '-c', code, - PYTHONUTF8='strict', + PYTHONUTF8='xxx', PYTHONLEGACYWINDOWSFSENCODING='1') self.assertEqual(out, 'mbcs/replace') diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-08-22-19-10.gh-issue-133711.e91wUy.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-08-22-19-10.gh-issue-133711.e91wUy.rst new file mode 100644 index 00000000000000..c8d3d62763dc12 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-08-22-19-10.gh-issue-133711.e91wUy.rst @@ -0,0 +1,2 @@ +Implement :pep:`686`: Enable :ref:`Python UTF-8 Mode ` by +default. Patch by Adam Turner. diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 577da65c7cdafa..88936bbc699c30 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1854,9 +1854,9 @@ static int test_initconfig_get_api(void) assert(initconfig_getint(config, "dev_mode") == 1); // test PyInitConfig_GetInt() on a PyPreConfig option - assert(initconfig_getint(config, "utf8_mode") == 0); - assert(PyInitConfig_SetInt(config, "utf8_mode", 1) == 0); assert(initconfig_getint(config, "utf8_mode") == 1); + assert(PyInitConfig_SetInt(config, "utf8_mode", 0) == 0); + assert(initconfig_getint(config, "utf8_mode") == 0); // test PyInitConfig_GetStr() char *str; diff --git a/Python/initconfig.c b/Python/initconfig.c index 73a9a9bf1ca460..cc0db19d416058 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -459,7 +459,7 @@ static const char usage_envvars[] = /* --- Global configuration variables ----------------------------- */ -/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change +/* UTF-8 mode (PEP 540): if equal to 1, use the UTF-8 encoding, and change stdin and stdout error handler to "surrogateescape". */ int Py_UTF8Mode = 0; int Py_DebugFlag = 0; /* Needed by parser.c */ diff --git a/Python/preconfig.c b/Python/preconfig.c index 67b2d2f2dc186d..e4cd10d9e3d40d 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -291,12 +291,12 @@ _PyPreConfig_InitCompatConfig(PyPreConfig *config) config->use_environment = -1; config->configure_locale = 1; - /* bpo-36443: C locale coercion (PEP 538) and UTF-8 Mode (PEP 540) - are disabled by default using the Compat configuration. + /* gh-80624: C locale coercion (PEP 538) is disabled by default using + the Compat configuration. - Py_UTF8Mode=1 enables the UTF-8 mode. PYTHONUTF8 environment variable + Py_UTF8Mode=0 disables the UTF-8 mode. PYTHONUTF8 environment variable is ignored (even if use_environment=1). */ - config->utf8_mode = 0; + config->utf8_mode = 1; config->coerce_c_locale = 0; config->coerce_c_locale_warn = 0; @@ -317,8 +317,8 @@ PyPreConfig_InitPythonConfig(PyPreConfig *config) config->isolated = 0; config->parse_argv = 1; config->use_environment = 1; - /* Set to -1 to enable C locale coercion (PEP 538) and UTF-8 Mode (PEP 540) - depending on the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE + /* Set to -1 to enable C locale coercion (PEP 538) depending on + the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE environment variables. */ config->coerce_c_locale = -1; config->coerce_c_locale_warn = -1; @@ -338,7 +338,7 @@ PyPreConfig_InitIsolatedConfig(PyPreConfig *config) config->configure_locale = 0; config->isolated = 1; config->use_environment = 0; - config->utf8_mode = 0; + config->utf8_mode = 1; config->dev_mode = 0; #ifdef MS_WINDOWS config->legacy_windows_fs_encoding = 0; @@ -649,23 +649,7 @@ preconfig_init_utf8_mode(PyPreConfig *config, const _PyPreCmdline *cmdline) return _PyStatus_OK(); } - -#ifndef MS_WINDOWS - if (config->utf8_mode < 0) { - /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ - const char *ctype_loc = setlocale(LC_CTYPE, NULL); - if (ctype_loc != NULL - && (strcmp(ctype_loc, "C") == 0 - || strcmp(ctype_loc, "POSIX") == 0)) - { - config->utf8_mode = 1; - } - } -#endif - - if (config->utf8_mode < 0) { - config->utf8_mode = 0; - } + config->utf8_mode = 1; return _PyStatus_OK(); } From 5b969fd64502a6e2ba6513e2b18beaeae58b8aa1 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Tue, 15 Jul 2025 10:56:42 +0100 Subject: [PATCH 2/8] GH-132661: Add ``string.templatelib.convert()`` (#135217) --- Lib/string/templatelib.py | 17 ++++++++++++----- Lib/test/test_string/test_templatelib.py | 22 +++++++++++++++++++++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/Lib/string/templatelib.py b/Lib/string/templatelib.py index 14b40e1e36e30b..8164872432ad09 100644 --- a/Lib/string/templatelib.py +++ b/Lib/string/templatelib.py @@ -1,15 +1,22 @@ """Support for template string literals (t-strings).""" -__all__ = [ - "Interpolation", - "Template", -] - t = t"{0}" Template = type(t) Interpolation = type(t.interpolations[0]) del t +def convert(obj, /, conversion): + """Convert *obj* using formatted string literal semantics.""" + if conversion is None: + return obj + if conversion == 'r': + return repr(obj) + if conversion == 's': + return str(obj) + if conversion == 'a': + return ascii(obj) + raise ValueError(f'invalid conversion specifier: {conversion}') + def _template_unpickle(*args): import itertools diff --git a/Lib/test/test_string/test_templatelib.py b/Lib/test/test_string/test_templatelib.py index adaf590e64dad6..1c86717155fd5a 100644 --- a/Lib/test/test_string/test_templatelib.py +++ b/Lib/test/test_string/test_templatelib.py @@ -1,7 +1,7 @@ import pickle import unittest from collections.abc import Iterator, Iterable -from string.templatelib import Template, Interpolation +from string.templatelib import Template, Interpolation, convert from test.test_string._support import TStringBaseCase, fstring @@ -169,5 +169,25 @@ def test_exhausted(self): self.assertRaises(StopIteration, next, template_iter) +class TestFunctions(unittest.TestCase): + def test_convert(self): + from fractions import Fraction + + for obj in ('Café', None, 3.14, Fraction(1, 2)): + with self.subTest(f'{obj=}'): + self.assertEqual(convert(obj, None), obj) + self.assertEqual(convert(obj, 's'), str(obj)) + self.assertEqual(convert(obj, 'r'), repr(obj)) + self.assertEqual(convert(obj, 'a'), ascii(obj)) + + # Invalid conversion specifier + with self.assertRaises(ValueError): + convert(obj, 'z') + with self.assertRaises(ValueError): + convert(obj, 1) + with self.assertRaises(ValueError): + convert(obj, object()) + + if __name__ == '__main__': unittest.main() From a02cf19deed353d1e0e7564468f10aced61c12e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 15 Jul 2025 14:03:21 +0200 Subject: [PATCH 3/8] gh-72570: mention the incompatibility of XOFs with HMAC (#136676) --- Doc/library/hmac.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/library/hmac.rst b/Doc/library/hmac.rst index d6692033b2d4c3..57076c38086c79 100644 --- a/Doc/library/hmac.rst +++ b/Doc/library/hmac.rst @@ -12,6 +12,9 @@ -------------- This module implements the HMAC algorithm as described by :rfc:`2104`. +The interface allows to use any hash function with a *fixed* digest size. +In particular, extendable output functions such as SHAKE-128 or SHAKE-256 +cannot be used with HMAC. .. function:: new(key, msg=None, digestmod) From 624bf52c83abcb1f948f9059e29729fa94d38086 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Tue, 15 Jul 2025 14:26:24 +0200 Subject: [PATCH 4/8] gh-136155: Docs: check for EPUB fatal errors in CI (#134074) Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- .github/workflows/reusable-docs.yml | 14 ++++++++++- Doc/conf.py | 1 + Doc/tools/check-epub.py | 24 +++++++++++++++++++ ...-07-01-23-00-58.gh-issue-136155.4siQQO.rst | 1 + 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 Doc/tools/check-epub.py create mode 100644 Misc/NEWS.d/next/Documentation/2025-07-01-23-00-58.gh-issue-136155.4siQQO.rst diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 657e0a6bf662f7..7b9dc4818577eb 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -66,7 +66,7 @@ jobs: run: | set -Eeuo pipefail # Build docs with the nit-picky option; write warnings to file - make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet --nitpicky --fail-on-warning --warning-file sphinx-warnings.txt" html + make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet --nitpicky --warning-file sphinx-warnings.txt" html - name: 'Check warnings' if: github.event_name == 'pull_request' run: | @@ -75,6 +75,18 @@ jobs: --fail-if-regression \ --fail-if-improved \ --fail-if-new-news-nit + - name: 'Build EPUB documentation' + continue-on-error: true + run: | + set -Eeuo pipefail + make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet" epub + pip install epubcheck + epubcheck Doc/build/epub/Python.epub &> Doc/epubcheck.txt + - name: 'Check for fatal errors in EPUB' + if: github.event_name == 'pull_request' + continue-on-error: true # until gh-136155 is fixed + run: | + python Doc/tools/check-epub.py # Run "doctest" on HEAD as new syntax doesn't exist in the latest stable release doctest: diff --git a/Doc/conf.py b/Doc/conf.py index c1ed94d7b46ec2..1c1f36e5bc0737 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -448,6 +448,7 @@ epub_author = 'Python Documentation Authors' epub_publisher = 'Python Software Foundation' +epub_exclude_files = ('index.xhtml', 'download.xhtml') # index pages are not valid xhtml # https://github.com/sphinx-doc/sphinx/issues/12359 diff --git a/Doc/tools/check-epub.py b/Doc/tools/check-epub.py new file mode 100644 index 00000000000000..693dc239c8ad58 --- /dev/null +++ b/Doc/tools/check-epub.py @@ -0,0 +1,24 @@ +import sys +from pathlib import Path + + +def main() -> int: + wrong_directory_msg = "Must run this script from the repo root" + if not Path("Doc").exists() or not Path("Doc").is_dir(): + raise RuntimeError(wrong_directory_msg) + + with Path("Doc/epubcheck.txt").open(encoding="UTF-8") as f: + messages = [message.split(" - ") for message in f.read().splitlines()] + + fatal_errors = [message for message in messages if message[0] == "FATAL"] + + if fatal_errors: + print("\nError: must not contain fatal errors:\n") + for error in fatal_errors: + print(" - ".join(error)) + + return len(fatal_errors) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Misc/NEWS.d/next/Documentation/2025-07-01-23-00-58.gh-issue-136155.4siQQO.rst b/Misc/NEWS.d/next/Documentation/2025-07-01-23-00-58.gh-issue-136155.4siQQO.rst new file mode 100644 index 00000000000000..70f54936c80f55 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2025-07-01-23-00-58.gh-issue-136155.4siQQO.rst @@ -0,0 +1 @@ +We are now checking for fatal errors in EPUB builds in CI. From 7e10a103dfe52feb0ef3d541e08abc2640838101 Mon Sep 17 00:00:00 2001 From: Ran Benita Date: Tue, 15 Jul 2025 15:49:11 +0300 Subject: [PATCH 5/8] gh-136682: Remove incorrect statement that `os.path.samestat` accepts file-like objects (#136683) --- Doc/library/os.path.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 1c1cf07a655ae7..abb0131d7d058e 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -508,9 +508,6 @@ the :mod:`glob` module.) .. versionchanged:: 3.4 Added Windows support. - .. versionchanged:: 3.6 - Accepts a :term:`path-like object`. - .. function:: split(path) From a8f42e6e884e7d63d5d63a817bc490f3bbbdba17 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Tue, 15 Jul 2025 19:15:11 +0530 Subject: [PATCH 6/8] gh-111968: remove redundant fetching of interpreter state in `dict` implementation (#136673) --- Objects/dictobject.c | 104 ++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 60 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index be62ae5eefd00d..0ed52ac5e87b6e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -217,7 +217,7 @@ set_values(PyDictObject *mp, PyDictValues *values) #define LOAD_KEYS_NENTRIES(keys) _Py_atomic_load_ssize_relaxed(&keys->dk_nentries) #define INCREF_KEYS_FT(dk) dictkeys_incref(dk) -#define DECREF_KEYS_FT(dk, shared) dictkeys_decref(_PyInterpreterState_GET(), dk, shared) +#define DECREF_KEYS_FT(dk, shared) dictkeys_decref(dk, shared) static inline void split_keys_entry_added(PyDictKeysObject *keys) { @@ -380,8 +380,7 @@ equally good collision statistics, needed less code & used less memory. */ -static int dictresize(PyInterpreterState *interp, PyDictObject *mp, - uint8_t log_newsize, int unicode); +static int dictresize(PyDictObject *mp, uint8_t log_newsize, int unicode); static PyObject* dict_iter(PyObject *dict); @@ -444,7 +443,7 @@ dictkeys_incref(PyDictKeysObject *dk) } static inline void -dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk, bool use_qsbr) +dictkeys_decref(PyDictKeysObject *dk, bool use_qsbr) { if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); @@ -753,7 +752,7 @@ _PyDict_CheckConsistency(PyObject *op, int check_content) static PyDictKeysObject* -new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) +new_keys_object(uint8_t log2_size, bool unicode) { Py_ssize_t usable; int log2_bytes; @@ -867,8 +866,7 @@ free_values(PyDictValues *values, bool use_qsbr) /* Consumes a reference to the keys object */ static PyObject * -new_dict(PyInterpreterState *interp, - PyDictKeysObject *keys, PyDictValues *values, +new_dict(PyDictKeysObject *keys, PyDictValues *values, Py_ssize_t used, int free_values_on_failure) { assert(keys != NULL); @@ -876,7 +874,7 @@ new_dict(PyInterpreterState *interp, if (mp == NULL) { mp = PyObject_GC_New(PyDictObject, &PyDict_Type); if (mp == NULL) { - dictkeys_decref(interp, keys, false); + dictkeys_decref(keys, false); if (free_values_on_failure) { free_values(values, false); } @@ -894,7 +892,7 @@ new_dict(PyInterpreterState *interp, } static PyObject * -new_dict_with_shared_keys(PyInterpreterState *interp, PyDictKeysObject *keys) +new_dict_with_shared_keys(PyDictKeysObject *keys) { size_t size = shared_keys_usable_size(keys); PyDictValues *values = new_values(size); @@ -905,7 +903,7 @@ new_dict_with_shared_keys(PyInterpreterState *interp, PyDictKeysObject *keys) for (size_t i = 0; i < size; i++) { values->values[i] = NULL; } - return new_dict(interp, keys, values, 0, 1); + return new_dict(keys, values, 0, 1); } @@ -971,9 +969,8 @@ clone_combined_dict_keys(PyDictObject *orig) PyObject * PyDict_New(void) { - PyInterpreterState *interp = _PyInterpreterState_GET(); /* We don't incref Py_EMPTY_KEYS here because it is immortal. */ - return new_dict(interp, Py_EMPTY_KEYS, NULL, 0, 0); + return new_dict(Py_EMPTY_KEYS, NULL, 0, 0); } /* Search index of hash table from offset of entry table */ @@ -1714,9 +1711,9 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash) } static int -insertion_resize(PyInterpreterState *interp, PyDictObject *mp, int unicode) +insertion_resize(PyDictObject *mp, int unicode) { - return dictresize(interp, mp, calculate_log2_keysize(GROWTH_RATE(mp)), unicode); + return dictresize(mp, calculate_log2_keysize(GROWTH_RATE(mp)), unicode); } static inline int @@ -1725,7 +1722,7 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, { if (mp->ma_keys->dk_usable <= 0) { /* Need to resize. */ - if (insertion_resize(interp, mp, 1) < 0) { + if (insertion_resize(mp, 1) < 0) { return -1; } } @@ -1823,7 +1820,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, ASSERT_DICT_LOCKED(mp); if (DK_IS_UNICODE(mp->ma_keys) && !PyUnicode_CheckExact(key)) { - if (insertion_resize(interp, mp, 0) < 0) + if (insertion_resize(mp, 0) < 0) goto Fail; assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL); } @@ -1838,7 +1835,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, } /* No space in shared keys. Resize and continue below. */ - if (insertion_resize(interp, mp, 1) < 0) { + if (insertion_resize(mp, 1) < 0) { goto Fail; } } @@ -1893,8 +1890,7 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp, ASSERT_DICT_LOCKED(mp); int unicode = PyUnicode_CheckExact(key); - PyDictKeysObject *newkeys = new_keys_object( - interp, PyDict_LOG_MINSIZE, unicode); + PyDictKeysObject *newkeys = new_keys_object(PyDict_LOG_MINSIZE, unicode); if (newkeys == NULL) { Py_DECREF(key); Py_DECREF(value); @@ -1989,7 +1985,7 @@ This function supports: - Generic -> Generic */ static int -dictresize(PyInterpreterState *interp, PyDictObject *mp, +dictresize(PyDictObject *mp, uint8_t log2_newsize, int unicode) { PyDictKeysObject *oldkeys, *newkeys; @@ -2017,7 +2013,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, */ /* Allocate a new table. */ - newkeys = new_keys_object(interp, log2_newsize, unicode); + newkeys = new_keys_object(log2_newsize, unicode); if (newkeys == NULL) { return -1; } @@ -2060,7 +2056,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, } UNLOCK_KEYS(oldkeys); set_keys(mp, newkeys); - dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); + dictkeys_decref(oldkeys, IS_DICT_SHARED(mp)); set_values(mp, NULL); if (oldvalues->embedded) { assert(oldvalues->embedded == 1); @@ -2141,7 +2137,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, } static PyObject * -dict_new_presized(PyInterpreterState *interp, Py_ssize_t minused, bool unicode) +dict_new_presized(Py_ssize_t minused, bool unicode) { const uint8_t log2_max_presize = 17; const Py_ssize_t max_presize = ((Py_ssize_t)1) << log2_max_presize; @@ -2162,17 +2158,16 @@ dict_new_presized(PyInterpreterState *interp, Py_ssize_t minused, bool unicode) log2_newsize = estimate_log2_keysize(minused); } - new_keys = new_keys_object(interp, log2_newsize, unicode); + new_keys = new_keys_object(log2_newsize, unicode); if (new_keys == NULL) return NULL; - return new_dict(interp, new_keys, NULL, 0, 0); + return new_dict(new_keys, NULL, 0, 0); } PyObject * _PyDict_NewPresized(Py_ssize_t minused) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - return dict_new_presized(interp, minused, false); + return dict_new_presized(minused, false); } PyObject * @@ -2182,7 +2177,6 @@ _PyDict_FromItems(PyObject *const *keys, Py_ssize_t keys_offset, { bool unicode = true; PyObject *const *ks = keys; - PyInterpreterState *interp = _PyInterpreterState_GET(); for (Py_ssize_t i = 0; i < length; i++) { if (!PyUnicode_CheckExact(*ks)) { @@ -2192,7 +2186,7 @@ _PyDict_FromItems(PyObject *const *keys, Py_ssize_t keys_offset, ks += keys_offset; } - PyObject *dict = dict_new_presized(interp, length, unicode); + PyObject *dict = dict_new_presized(length, unicode); if (dict == NULL) { return NULL; } @@ -2895,7 +2889,7 @@ clear_lock_held(PyObject *op) if (oldvalues == NULL) { set_keys(mp, Py_EMPTY_KEYS); assert(oldkeys->dk_refcnt == 1); - dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); + dictkeys_decref(oldkeys, IS_DICT_SHARED(mp)); } else { n = oldkeys->dk_nentries; @@ -2909,7 +2903,7 @@ clear_lock_held(PyObject *op) set_values(mp, NULL); set_keys(mp, Py_EMPTY_KEYS); free_values(oldvalues, IS_DICT_SHARED(mp)); - dictkeys_decref(interp, oldkeys, false); + dictkeys_decref(oldkeys, false); } } ASSERT_CONSISTENT(mp); @@ -3161,7 +3155,7 @@ dict_dict_fromkeys(PyInterpreterState *interp, PyDictObject *mp, uint8_t new_size = Py_MAX( estimate_log2_keysize(PyDict_GET_SIZE(iterable)), DK_LOG_SIZE(mp->ma_keys)); - if (dictresize(interp, mp, new_size, unicode)) { + if (dictresize(mp, new_size, unicode)) { Py_DECREF(mp); return NULL; } @@ -3186,7 +3180,7 @@ dict_set_fromkeys(PyInterpreterState *interp, PyDictObject *mp, uint8_t new_size = Py_MAX( estimate_log2_keysize(PySet_GET_SIZE(iterable)), DK_LOG_SIZE(mp->ma_keys)); - if (dictresize(interp, mp, new_size, 0)) { + if (dictresize(mp, new_size, 0)) { Py_DECREF(mp); return NULL; } @@ -3298,11 +3292,11 @@ dict_dealloc(PyObject *self) } free_values(values, false); } - dictkeys_decref(interp, keys, false); + dictkeys_decref(keys, false); } else if (keys != NULL) { assert(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS); - dictkeys_decref(interp, keys, false); + dictkeys_decref(keys, false); } if (Py_IS_TYPE(mp, &PyDict_Type)) { _Py_FREELIST_FREE(dicts, mp, Py_TYPE(mp)->tp_free); @@ -3832,7 +3826,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe return -1; ensure_shared_on_resize(mp); - dictkeys_decref(interp, mp->ma_keys, IS_DICT_SHARED(mp)); + dictkeys_decref(mp->ma_keys, IS_DICT_SHARED(mp)); set_keys(mp, keys); STORE_USED(mp, other->ma_used); ASSERT_CONSISTENT(mp); @@ -3851,8 +3845,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe */ if (USABLE_FRACTION(DK_SIZE(mp->ma_keys)) < other->ma_used) { int unicode = DK_IS_UNICODE(other->ma_keys); - if (dictresize(interp, mp, - estimate_log2_keysize(mp->ma_used + other->ma_used), + if (dictresize(mp, estimate_log2_keysize(mp->ma_used + other->ma_used), unicode)) { return -1; } @@ -4117,7 +4110,7 @@ copy_lock_held(PyObject *o) if (keys == NULL) { return NULL; } - PyDictObject *new = (PyDictObject *)new_dict(interp, keys, NULL, 0, 0); + PyDictObject *new = (PyDictObject *)new_dict(keys, NULL, 0, 0); if (new == NULL) { /* In case of an error, `new_dict()` takes care of cleaning up `keys`. */ @@ -4362,7 +4355,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } if (!PyUnicode_CheckExact(key) && DK_IS_UNICODE(mp->ma_keys)) { - if (insertion_resize(interp, mp, 0) < 0) { + if (insertion_resize(mp, 0) < 0) { if (result) { *result = NULL; } @@ -4386,7 +4379,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } /* No space in shared keys. Resize and continue below. */ - if (insertion_resize(interp, mp, 1) < 0) { + if (insertion_resize(mp, 1) < 0) { goto error; } } @@ -4555,7 +4548,7 @@ dict_popitem_impl(PyDictObject *self) } /* Convert split table to combined table */ if (_PyDict_HasSplitTable(self)) { - if (dictresize(interp, self, DK_LOG_SIZE(self->ma_keys), 1) < 0) { + if (dictresize(self, DK_LOG_SIZE(self->ma_keys), 1) < 0) { Py_DECREF(res); return NULL; } @@ -6725,10 +6718,7 @@ dictvalues_reversed(PyObject *self, PyObject *Py_UNUSED(ignored)) PyDictKeysObject * _PyDict_NewKeysForClass(PyHeapTypeObject *cls) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - - PyDictKeysObject *keys = new_keys_object( - interp, NEXT_LOG2_SHARED_KEYS_MAX_SIZE, 1); + PyDictKeysObject *keys = new_keys_object(NEXT_LOG2_SHARED_KEYS_MAX_SIZE, 1); if (keys == NULL) { PyErr_Clear(); } @@ -6792,8 +6782,7 @@ _PyObject_InitInlineValues(PyObject *obj, PyTypeObject *tp) } static PyDictObject * -make_dict_from_instance_attributes(PyInterpreterState *interp, - PyDictKeysObject *keys, PyDictValues *values) +make_dict_from_instance_attributes(PyDictKeysObject *keys, PyDictValues *values) { dictkeys_incref(keys); Py_ssize_t used = 0; @@ -6804,7 +6793,7 @@ make_dict_from_instance_attributes(PyInterpreterState *interp, used += 1; } } - PyDictObject *res = (PyDictObject *)new_dict(interp, keys, values, used, 0); + PyDictObject *res = (PyDictObject *)new_dict(keys, values, used, 0); return res; } @@ -6818,9 +6807,8 @@ _PyObject_MaterializeManagedDict_LockHeld(PyObject *obj) PyDictValues *values = _PyObject_InlineValues(obj); PyDictObject *dict; if (values->valid) { - PyInterpreterState *interp = _PyInterpreterState_GET(); PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); - dict = make_dict_from_instance_attributes(interp, keys, values); + dict = make_dict_from_instance_attributes(keys, values); } else { dict = (PyDictObject *)PyDict_New(); @@ -6916,7 +6904,7 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, if (dict == NULL) { // Make the dict but don't publish it in the object // so that no one else will see it. - dict = make_dict_from_instance_attributes(PyInterpreterState_Get(), keys, values); + dict = make_dict_from_instance_attributes(keys, values); if (dict == NULL || _PyDict_SetItem_LockHeld(dict, name, value) < 0) { Py_XDECREF(dict); @@ -7449,11 +7437,10 @@ PyObject_ClearManagedDict(PyObject *obj) "clearing an object managed dict"); /* Clear the dict */ Py_BEGIN_CRITICAL_SECTION(dict); - PyInterpreterState *interp = _PyInterpreterState_GET(); PyDictKeysObject *oldkeys = dict->ma_keys; set_keys(dict, Py_EMPTY_KEYS); dict->ma_values = NULL; - dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(dict)); + dictkeys_decref(oldkeys, IS_DICT_SHARED(dict)); STORE_USED(dict, 0); clear_inline_values(_PyObject_InlineValues(obj)); Py_END_CRITICAL_SECTION(); @@ -7490,8 +7477,7 @@ ensure_managed_dict(PyObject *obj) goto done; } #endif - dict = (PyDictObject *)new_dict_with_shared_keys(_PyInterpreterState_GET(), - CACHED_KEYS(tp)); + dict = (PyDictObject *)new_dict_with_shared_keys(CACHED_KEYS(tp)); FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, (PyDictObject *)dict); @@ -7520,9 +7506,8 @@ ensure_nonmanaged_dict(PyObject *obj, PyObject **dictptr) #endif PyTypeObject *tp = Py_TYPE(obj); if (_PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) { - PyInterpreterState *interp = _PyInterpreterState_GET(); assert(!_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)); - dict = new_dict_with_shared_keys(interp, cached); + dict = new_dict_with_shared_keys(cached); } else { dict = PyDict_New(); @@ -7578,8 +7563,7 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject *obj, PyObject **dictptr, void _PyDictKeys_DecRef(PyDictKeysObject *keys) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - dictkeys_decref(interp, keys, false); + dictkeys_decref(keys, false); } static inline uint32_t From be02e68158aee4d70f15baa1d8329df2c35a57f2 Mon Sep 17 00:00:00 2001 From: Richard Si Date: Tue, 15 Jul 2025 10:25:07 -0400 Subject: [PATCH 7/8] gh-72327: Suggest using system terminal for pip install in PyREPL (#136328) Users new to Python packaging often try to use pip from the REPL only to be met with a confusing SyntaxError. If this happens, guide the user to use a system terminal instead to invoke pip. Closes #72327 --------- Co-authored-by: Tom Viner Co-authored-by: Brian Schubert Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Alyssa Coghlan --- Lib/_pyrepl/console.py | 15 ++++++++++++++- Lib/test/test_pyrepl/test_pyrepl.py | 11 +++++++++++ Misc/ACKS | 2 ++ .../2025-07-07-16-46-55.gh-issue-72327.wLvRuj.rst | 2 ++ 4 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-07-16-46-55.gh-issue-72327.wLvRuj.rst diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index 8956fb1242e52a..e0535d50396316 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -27,6 +27,7 @@ import linecache from dataclasses import dataclass, field import os.path +import re import sys @@ -195,7 +196,19 @@ def runsource(self, source, filename="", symbol="single"): ast.PyCF_ONLY_AST, incomplete_input=False, ) - except (SyntaxError, OverflowError, ValueError): + except SyntaxError as e: + # If it looks like pip install was entered (a common beginner + # mistake), provide a hint to use the system command prompt. + if re.match(r"^\s*(pip3?|py(thon3?)? -m pip) install.*", source): + e.add_note( + "The Python package manager (pip) can only be used" + " outside of the Python REPL.\n" + "Try the 'pip' command in a separate terminal or" + " command prompt." + ) + self.showsyntaxerror(filename, source=source) + return False + except (OverflowError, ValueError): self.showsyntaxerror(filename, source=source) return False if tree.body: diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index 98bae7dd703fd9..657a971f8769df 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -1757,3 +1757,14 @@ def test_showrefcount(self): output, _ = self.run_repl("1\n1+2\nexit()\n", cmdline_args=['-Xshowrefcount'], env=env) matches = re.findall(r'\[-?\d+ refs, \d+ blocks\]', output) self.assertEqual(len(matches), 3) + + def test_detect_pip_usage_in_repl(self): + for pip_cmd in ("pip", "pip3", "python -m pip", "python3 -m pip"): + with self.subTest(pip_cmd=pip_cmd): + output, exit_code = self.run_repl([f"{pip_cmd} install sampleproject", "exit"]) + self.assertIn("SyntaxError", output) + hint = ( + "The Python package manager (pip) can only be used" + " outside of the Python REPL" + ) + self.assertIn(hint, output) diff --git a/Misc/ACKS b/Misc/ACKS index 3814509aea030a..fabd79b9f74210 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1744,6 +1744,7 @@ Joel Shprentz Yue Shuaijie Jaysinh Shukla Terrel Shumway +Richard Si Eric Siegerman Reilly Tucker Siemens Paul Sijben @@ -1988,6 +1989,7 @@ Olivier Vielpeau Kannan Vijayan Kurt Vile Norman Vine +Tom Viner Pauli Virtanen Frank Visser Long Vo diff --git a/Misc/NEWS.d/next/Library/2025-07-07-16-46-55.gh-issue-72327.wLvRuj.rst b/Misc/NEWS.d/next/Library/2025-07-07-16-46-55.gh-issue-72327.wLvRuj.rst new file mode 100644 index 00000000000000..f305abb655a6f6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-07-16-46-55.gh-issue-72327.wLvRuj.rst @@ -0,0 +1,2 @@ +Suggest using the system command prompt when ``pip install`` is typed into +the REPL. Patch by Tom Viner, Richard Si, and Brian Schubert. From 2500eb96b260b05387d4ab1063fcfafebf37f1a4 Mon Sep 17 00:00:00 2001 From: andrewreds Date: Wed, 16 Jul 2025 01:26:16 +1000 Subject: [PATCH 8/8] gh-135909: Assert incoming `refcnt != 0` for the free threaded GC (GH-136009) This helps catch double deallocation bugs and is similar to the assertion in the GIL-enabled build. The call to `validate_refcounts` is moved up to start of the GC because `queue_untracked_obj_decref()` creates it own zero reference count garbage. --- Python/gc_free_threading.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index d46598b23b3b2f..0b0ddf227e4952 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1073,6 +1073,14 @@ validate_refcounts(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } + // This assert mirrors the one in Python/gc.c:update_refs(). There must be + // no tracked objects with a reference count of 0 when the cyclic + // collector starts. If there is, then the collector will double dealloc + // the object. The likely cause for hitting this is a faulty .tp_dealloc. + // Also see the comment in `update_refs()`. + _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0, + "tracked objects must have a reference count > 0"); + _PyObject_ASSERT_WITH_MSG(op, !gc_is_unreachable(op), "object should not be marked as unreachable yet"); @@ -1422,13 +1430,6 @@ static int deduce_unreachable_heap(PyInterpreterState *interp, struct collection_state *state) { - -#ifdef GC_DEBUG - // Check that all objects are marked as unreachable and that the computed - // reference count difference (stored in `ob_tid`) is non-negative. - gc_visit_heaps(interp, &validate_refcounts, &state->base); -#endif - // Identify objects that are directly reachable from outside the GC heap // by computing the difference between the refcount and the number of // incoming references. @@ -2158,6 +2159,13 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, state->gcstate->old[i-1].count = 0; } +#ifdef GC_DEBUG + // Before we start, check that the heap is in a good condition. There must + // be no objects with a zero reference count. And `ob_tid` must only have a + // thread if the refcount is unmerged. + gc_visit_heaps(interp, &validate_refcounts, &state->base); +#endif + _Py_FOR_EACH_TSTATE_BEGIN(interp, p) { _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;