From dbe3950a76cce176c6c185b873f9552503d87043 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Date: Thu, 30 Oct 2025 10:18:12 +0000
Subject: [PATCH 01/13] gh-129117: Add unicodedata.isxidstart() function
 (#140269)

Expose `_PyUnicode_IsXidContinue/Start` in `unicodedata`:
add isxidstart() and isxidcontinue() functions.

Co-authored-by: Victor Stinner <vstinner@python.org>
---
 Doc/library/unicodedata.rst                   | 30 ++++++++
 Doc/whatsnew/3.15.rst                         |  5 ++
 Include/internal/pycore_unicodectype.h        | 25 +++++++
 Include/internal/pycore_unicodeobject.h       | 12 ---
 Lib/test/test_unicodedata.py                  | 27 +++++++
 Makefile.pre.in                               |  1 +
 ...-10-17-20-42-38.gh-issue-129117.X9jr4p.rst |  3 +
 Modules/clinic/unicodedata.c.h                | 74 ++++++++++++++++++-
 Modules/unicodedata.c                         | 55 ++++++++++++++
 Objects/unicodectype.c                        |  1 +
 Objects/unicodeobject.c                       |  1 +
 PCbuild/pythoncore.vcxproj                    |  1 +
 PCbuild/pythoncore.vcxproj.filters            |  3 +
 13 files changed, 225 insertions(+), 13 deletions(-)
 create mode 100644 Include/internal/pycore_unicodectype.h
 create mode 100644 Misc/NEWS.d/next/Library/2025-10-17-20-42-38.gh-issue-129117.X9jr4p.rst

diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst
index 0369cd99c47c18..c49bf641704616 100644
--- a/Doc/library/unicodedata.rst
+++ b/Doc/library/unicodedata.rst
@@ -144,6 +144,36 @@ following functions:
       1
 
 
+.. function:: isxidstart(chr, /)
+
+   Return ``True`` if *chr* is a valid identifier start per the
+   `Unicode Standard Annex #31 <https://www.unicode.org/reports/tr31/>`_,
+   that is, it has the ``XID_Start`` property. Return ``False`` otherwise.
+   For example::
+
+      >>> unicodedata.isxidstart('S')
+      True
+      >>> unicodedata.isxidstart('0')
+      False
+
+   .. versionadded:: next
+
+
+.. function:: isxidcontinue(chr, /)
+
+   Return ``True`` if *chr* is a valid identifier character per the
+   `Unicode Standard Annex #31 <https://www.unicode.org/reports/tr31/>`_,
+   that is, it has the ``XID_Continue`` property. Return ``False`` otherwise.
+   For example::
+
+      >>> unicodedata.isxidcontinue('S')
+      True
+      >>> unicodedata.isxidcontinue(' ')
+      False
+
+   .. versionadded:: next
+
+
 .. function:: decomposition(chr)
 
    Returns the character decomposition mapping assigned to the character
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index 85b4c12544a0c9..fe9adfe9f730ec 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -794,6 +794,11 @@ unicodedata
 
 * The Unicode database has been updated to Unicode 17.0.0.
 
+* Add :func:`unicodedata.isxidstart` and :func:`unicodedata.isxidcontinue`
+  functions to check whether a character can start or continue a
+  `Unicode Standard Annex #31 <https://www.unicode.org/reports/tr31/>`_ identifier.
+  (Contributed by Stan Ulbrych in :gh:`129117`.)
+
 
 wave
 ----
diff --git a/Include/internal/pycore_unicodectype.h b/Include/internal/pycore_unicodectype.h
new file mode 100644
index 00000000000000..523bdb56b09cde
--- /dev/null
+++ b/Include/internal/pycore_unicodectype.h
@@ -0,0 +1,25 @@
+#ifndef Py_INTERNAL_UNICODECTYPE_H
+#define Py_INTERNAL_UNICODECTYPE_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+extern int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res);
+extern int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res);
+extern int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res);
+extern int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res);
+extern int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch);
+extern int _PyUnicode_IsCased(Py_UCS4 ch);
+
+// Export for 'unicodedata' shared extension.
+PyAPI_FUNC(int) _PyUnicode_IsXidStart(Py_UCS4 ch);
+PyAPI_FUNC(int) _PyUnicode_IsXidContinue(Py_UCS4 ch);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_UNICODECTYPE_H */
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index b83039c1869f23..f384fad8713adc 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -74,18 +74,6 @@ _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
     return 0;
 }
 
-
-/* --- Characters Type APIs ----------------------------------------------- */
-
-extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
-extern int _PyUnicode_IsXidContinue(Py_UCS4 ch);
-extern int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch);
-extern int _PyUnicode_IsCased(Py_UCS4 ch);
-
 /* --- Unicode API -------------------------------------------------------- */
 
 // Export for '_json' shared extension
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 8013eaf6e9d851..a3c22a4f27ee77 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -276,6 +276,33 @@ def test_east_asian_width_9_0_changes(self):
         self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N')
         self.assertEqual(self.db.east_asian_width('\u231a'), 'W')
 
+    def test_isxidstart(self):
+        self.assertTrue(self.db.isxidstart('S'))
+        self.assertTrue(self.db.isxidstart('\u0AD0'))  # GUJARATI OM
+        self.assertTrue(self.db.isxidstart('\u0EC6'))  # LAO KO LA
+        self.assertTrue(self.db.isxidstart('\u17DC'))  # KHMER SIGN AVAKRAHASANYA
+        self.assertTrue(self.db.isxidstart('\uA015'))  # YI SYLLABLE WU
+        self.assertTrue(self.db.isxidstart('\uFE7B'))  # ARABIC KASRA MEDIAL FORM
+
+        self.assertFalse(self.db.isxidstart(' '))
+        self.assertFalse(self.db.isxidstart('0'))
+        self.assertRaises(TypeError, self.db.isxidstart)
+        self.assertRaises(TypeError, self.db.isxidstart, 'xx')
+
+    def test_isxidcontinue(self):
+        self.assertTrue(self.db.isxidcontinue('S'))
+        self.assertTrue(self.db.isxidcontinue('_'))
+        self.assertTrue(self.db.isxidcontinue('0'))
+        self.assertTrue(self.db.isxidcontinue('\u00BA'))  # MASCULINE ORDINAL INDICATOR
+        self.assertTrue(self.db.isxidcontinue('\u0640'))  # ARABIC TATWEEL
+        self.assertTrue(self.db.isxidcontinue('\u0710'))  # SYRIAC LETTER ALAPH
+        self.assertTrue(self.db.isxidcontinue('\u0B3E'))  # ORIYA VOWEL SIGN AA
+        self.assertTrue(self.db.isxidcontinue('\u17D7'))  # KHMER SIGN LEK TOO
+
+        self.assertFalse(self.db.isxidcontinue(' '))
+        self.assertRaises(TypeError, self.db.isxidcontinue)
+        self.assertRaises(TypeError, self.db.isxidcontinue, 'xx')
+
 class UnicodeMiscTest(UnicodeDatabaseTest):
 
     @cpython_only
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 19423c11545c19..0a1b8d028addad 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -1433,6 +1433,7 @@ PYTHON_HEADERS= \
 		$(srcdir)/Include/internal/pycore_typeobject.h \
 		$(srcdir)/Include/internal/pycore_typevarobject.h \
 		$(srcdir)/Include/internal/pycore_ucnhash.h \
+		$(srcdir)/Include/internal/pycore_unicodectype.h \
 		$(srcdir)/Include/internal/pycore_unicodeobject.h \
 		$(srcdir)/Include/internal/pycore_unicodeobject_generated.h \
 		$(srcdir)/Include/internal/pycore_unionobject.h \
diff --git a/Misc/NEWS.d/next/Library/2025-10-17-20-42-38.gh-issue-129117.X9jr4p.rst b/Misc/NEWS.d/next/Library/2025-10-17-20-42-38.gh-issue-129117.X9jr4p.rst
new file mode 100644
index 00000000000000..8767b1bb4837ad
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-17-20-42-38.gh-issue-129117.X9jr4p.rst
@@ -0,0 +1,3 @@
+:mod:`unicodedata`: Add :func:`~unicodedata.isxidstart` and
+:func:`~unicodedata.isxidcontinue` functions to check whether a character can
+start or continue a `Unicode Standard Annex #31 <https://www.unicode.org/reports/tr31/>`_ identifier.
diff --git a/Modules/clinic/unicodedata.c.h b/Modules/clinic/unicodedata.c.h
index 345440eeee89a6..5fcba083c2f4ce 100644
--- a/Modules/clinic/unicodedata.c.h
+++ b/Modules/clinic/unicodedata.c.h
@@ -518,6 +518,78 @@ unicodedata_UCD_name(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
     return return_value;
 }
 
+PyDoc_STRVAR(unicodedata_UCD_isxidstart__doc__,
+"isxidstart($self, chr, /)\n"
+"--\n"
+"\n"
+"Return True if the character has the XID_Start property, else False.");
+
+#define UNICODEDATA_UCD_ISXIDSTART_METHODDEF    \
+    {"isxidstart", (PyCFunction)unicodedata_UCD_isxidstart, METH_O, unicodedata_UCD_isxidstart__doc__},
+
+static PyObject *
+unicodedata_UCD_isxidstart_impl(PyObject *self, int chr);
+
+static PyObject *
+unicodedata_UCD_isxidstart(PyObject *self, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    int chr;
+
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("isxidstart", "argument", "a unicode character", arg);
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(arg) != 1) {
+        PyErr_Format(PyExc_TypeError,
+            "isxidstart(): argument must be a unicode character, "
+            "not a string of length %zd",
+            PyUnicode_GET_LENGTH(arg));
+        goto exit;
+    }
+    chr = PyUnicode_READ_CHAR(arg, 0);
+    return_value = unicodedata_UCD_isxidstart_impl(self, chr);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(unicodedata_UCD_isxidcontinue__doc__,
+"isxidcontinue($self, chr, /)\n"
+"--\n"
+"\n"
+"Return True if the character has the XID_Continue property, else False.");
+
+#define UNICODEDATA_UCD_ISXIDCONTINUE_METHODDEF    \
+    {"isxidcontinue", (PyCFunction)unicodedata_UCD_isxidcontinue, METH_O, unicodedata_UCD_isxidcontinue__doc__},
+
+static PyObject *
+unicodedata_UCD_isxidcontinue_impl(PyObject *self, int chr);
+
+static PyObject *
+unicodedata_UCD_isxidcontinue(PyObject *self, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    int chr;
+
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("isxidcontinue", "argument", "a unicode character", arg);
+        goto exit;
+    }
+    if (PyUnicode_GET_LENGTH(arg) != 1) {
+        PyErr_Format(PyExc_TypeError,
+            "isxidcontinue(): argument must be a unicode character, "
+            "not a string of length %zd",
+            PyUnicode_GET_LENGTH(arg));
+        goto exit;
+    }
+    chr = PyUnicode_READ_CHAR(arg, 0);
+    return_value = unicodedata_UCD_isxidcontinue_impl(self, chr);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(unicodedata_UCD_lookup__doc__,
 "lookup($self, name, /)\n"
 "--\n"
@@ -549,4 +621,4 @@ unicodedata_UCD_lookup(PyObject *self, PyObject *arg)
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=8a59d430cee41058 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=c5e56c8f6bb80f93 input=a9049054013a1b77]*/
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index a3699beff7da01..a6094676d4194c 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -19,6 +19,7 @@
 #include "Python.h"
 #include "pycore_object.h"        // _PyObject_VisitType()
 #include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
+#include "pycore_unicodectype.h"  // _PyUnicode_IsXidStart()
 
 #include <stdbool.h>
 #include <stddef.h>               // offsetof()
@@ -1525,6 +1526,58 @@ unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value)
     return PyUnicode_FromString(name);
 }
 
+/*[clinic input]
+unicodedata.UCD.isxidstart
+
+    self: self
+    chr: int(accept={str})
+    /
+
+Return True if the character has the XID_Start property, else False.
+
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_isxidstart_impl(PyObject *self, int chr)
+/*[clinic end generated code: output=944005823c72c3ef input=9353f88d709c21fb]*/
+{
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, chr);
+        if (old->category_changed == 0) {
+            /* unassigned */
+            Py_RETURN_FALSE;
+        }
+    }
+
+    return PyBool_FromLong(_PyUnicode_IsXidStart(chr));
+}
+
+/*[clinic input]
+unicodedata.UCD.isxidcontinue
+
+    self: self
+    chr: int(accept={str})
+    /
+
+Return True if the character has the XID_Continue property, else False.
+
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_isxidcontinue_impl(PyObject *self, int chr)
+/*[clinic end generated code: output=9438dcbff5ca3e41 input=bbb8dd3ac0d2d709]*/
+{
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, chr);
+        if (old->category_changed == 0) {
+            /* unassigned */
+            Py_RETURN_FALSE;
+        }
+    }
+
+    return PyBool_FromLong(_PyUnicode_IsXidContinue(chr));
+}
+
 /*[clinic input]
 unicodedata.UCD.lookup
 
@@ -1590,6 +1643,8 @@ static PyMethodDef unicodedata_functions[] = {
     UNICODEDATA_UCD_EAST_ASIAN_WIDTH_METHODDEF
     UNICODEDATA_UCD_DECOMPOSITION_METHODDEF
     UNICODEDATA_UCD_NAME_METHODDEF
+    UNICODEDATA_UCD_ISXIDSTART_METHODDEF
+    UNICODEDATA_UCD_ISXIDCONTINUE_METHODDEF
     UNICODEDATA_UCD_LOOKUP_METHODDEF
     UNICODEDATA_UCD_IS_NORMALIZED_METHODDEF
     UNICODEDATA_UCD_NORMALIZE_METHODDEF
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
index 7cd0dca3d13545..fdd380190ac1ec 100644
--- a/Objects/unicodectype.c
+++ b/Objects/unicodectype.c
@@ -9,6 +9,7 @@
 */
 
 #include "Python.h"
+#include "pycore_unicodectype.h"   // export _PyUnicode_IsXidStart(), _PyUnicode_IsXidContinue()
 
 #define ALPHA_MASK 0x01
 #define DECIMAL_MASK 0x02
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f60f7dd2d13604..8a5638ac1406ab 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -57,6 +57,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #include "pycore_pylifecycle.h"   // _Py_SetFileSystemEncoding()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
+#include "pycore_unicodectype.h"  // _PyUnicode_IsXidStart
 #include "pycore_unicodeobject.h" // struct _Py_unicode_state
 #include "pycore_unicodeobject_generated.h"  // _PyUnicode_InitStaticStrings()
 
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 2657ee5c444e60..a101c1b45cf25c 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -328,6 +328,7 @@
     <ClInclude Include="..\Include\internal\pycore_typevarobject.h" />
     <ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
     <ClInclude Include="..\Include\internal\pycore_unionobject.h" />
+    <ClInclude Include="..\Include\internal\pycore_unicodectype.h" />
     <ClInclude Include="..\Include\internal\pycore_unicodeobject.h" />
     <ClInclude Include="..\Include\internal\pycore_unicodeobject_generated.h" />
     <ClInclude Include="..\Include\internal\pycore_uniqueid.h" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 9c12be6e9356a6..e3f261c2b92ab9 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -528,6 +528,9 @@
     <ClInclude Include="..\Include\cpython\initconfig.h">
       <Filter>Include\cpython</Filter>
     </ClInclude>
+    <ClInclude Include="..\Include\internal\pycore_unicodectype.h">
+      <Filter>Include\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\Include\internal\pycore_unicodeobject.h">
       <Filter>Include\internal</Filter>
     </ClInclude>

From c45e6e1bb3b3045b413574d25ebb2a473f6f5a1e Mon Sep 17 00:00:00 2001
From: Donghee Na <donghee.na@python.org>
Date: Thu, 30 Oct 2025 19:32:08 +0900
Subject: [PATCH 02/13] gh-137821: Improve Argument Clinic definitions in the
 ``_json`` module (#140780)

---
 Lib/test/test_json/test_scanstring.py         |  2 +-
 ...-10-30-15-33-07.gh-issue-137821.8_Iavt.rst |  1 -
 ...-10-30-15-33-07.gh-issue-137821.8_Iavt.rst |  2 +
 Modules/_json.c                               | 55 ++++---------------
 Modules/clinic/_json.c.h                      | 46 +++++++++++++++-
 5 files changed, 59 insertions(+), 47 deletions(-)
 delete mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-10-30-15-33-07.gh-issue-137821.8_Iavt.rst
 create mode 100644 Misc/NEWS.d/next/Library/2025-10-30-15-33-07.gh-issue-137821.8_Iavt.rst

diff --git a/Lib/test/test_json/test_scanstring.py b/Lib/test/test_json/test_scanstring.py
index cca556a3b95bab..9a6cdfe12d266c 100644
--- a/Lib/test/test_json/test_scanstring.py
+++ b/Lib/test/test_json/test_scanstring.py
@@ -144,7 +144,7 @@ def test_bad_escapes(self):
 
     def test_overflow(self):
         with self.assertRaises(OverflowError):
-            self.json.decoder.scanstring(b"xxx", sys.maxsize+1)
+            self.json.decoder.scanstring("xxx", sys.maxsize+1)
 
 
 class TestPyScanstring(TestScanstring, PyTest): pass
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-30-15-33-07.gh-issue-137821.8_Iavt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-30-15-33-07.gh-issue-137821.8_Iavt.rst
deleted file mode 100644
index 124ea3f9993814..00000000000000
--- a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-30-15-33-07.gh-issue-137821.8_Iavt.rst
+++ /dev/null
@@ -1 +0,0 @@
-Convert ``_json`` module to use Argument Clinic
diff --git a/Misc/NEWS.d/next/Library/2025-10-30-15-33-07.gh-issue-137821.8_Iavt.rst b/Misc/NEWS.d/next/Library/2025-10-30-15-33-07.gh-issue-137821.8_Iavt.rst
new file mode 100644
index 00000000000000..7ccbfc3cb950bf
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-30-15-33-07.gh-issue-137821.8_Iavt.rst
@@ -0,0 +1,2 @@
+Convert ``_json`` module to use Argument Clinic.
+Patched by Yoonho Hann.
diff --git a/Modules/_json.c b/Modules/_json.c
index 6a84661a243ea4..14714d4b346546 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -645,7 +645,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
 
 /*[clinic input]
 _json.scanstring as py_scanstring
-    pystr: object
+    pystr: unicode
     end: Py_ssize_t
     strict: bool = True
     /
@@ -664,74 +664,41 @@ after the end quote.
 static PyObject *
 py_scanstring_impl(PyObject *module, PyObject *pystr, Py_ssize_t end,
                    int strict)
-/*[clinic end generated code: output=961740cfae07cdb3 input=9d46d7df7ac749b0]*/
+/*[clinic end generated code: output=961740cfae07cdb3 input=cff59e47498f4d8e]*/
 {
-    PyObject *rval;
     Py_ssize_t next_end = -1;
-    if (PyUnicode_Check(pystr)) {
-        rval = scanstring_unicode(pystr, end, strict, &next_end);
-    }
-    else {
-        PyErr_Format(PyExc_TypeError,
-                     "first argument must be a string, not %.80s",
-                     Py_TYPE(pystr)->tp_name);
-        return NULL;
-    }
+    PyObject *rval = scanstring_unicode(pystr, end, strict, &next_end);
     return _build_rval_index_tuple(rval, next_end);
 }
 
 /*[clinic input]
 _json.encode_basestring_ascii as py_encode_basestring_ascii
-    pystr: object
+    pystr: unicode
     /
 
 Return an ASCII-only JSON representation of a Python string
 [clinic start generated code]*/
 
 static PyObject *
-py_encode_basestring_ascii(PyObject *module, PyObject *pystr)
-/*[clinic end generated code: output=a8afcd88eba0b572 input=f4085ccd5928ea55]*/
+py_encode_basestring_ascii_impl(PyObject *module, PyObject *pystr)
+/*[clinic end generated code: output=7b3841287cf211df input=4f3609498aff2de5]*/
 {
-    PyObject *rval;
-    /* Return an ASCII-only JSON representation of a Python string */
-    /* METH_O */
-    if (PyUnicode_Check(pystr)) {
-        rval = ascii_escape_unicode(pystr);
-    }
-    else {
-        PyErr_Format(PyExc_TypeError,
-                     "first argument must be a string, not %.80s",
-                     Py_TYPE(pystr)->tp_name);
-        return NULL;
-    }
-    return rval;
+    return ascii_escape_unicode(pystr);
 }
 
 /*[clinic input]
 _json.encode_basestring as py_encode_basestring
-    pystr: object
+    pystr: unicode
     /
 
 Return a JSON representation of a Python string
 [clinic start generated code]*/
 
 static PyObject *
-py_encode_basestring(PyObject *module, PyObject *pystr)
-/*[clinic end generated code: output=c87752300776d3b1 input=c3c7ef6e72624f6e]*/
+py_encode_basestring_impl(PyObject *module, PyObject *pystr)
+/*[clinic end generated code: output=900950f95df3f1c9 input=d42ef714b2c07386]*/
 {
-    PyObject *rval;
-    /* Return a JSON representation of a Python string */
-    /* METH_O */
-    if (PyUnicode_Check(pystr)) {
-        rval = escape_unicode(pystr);
-    }
-    else {
-        PyErr_Format(PyExc_TypeError,
-                     "first argument must be a string, not %.80s",
-                     Py_TYPE(pystr)->tp_name);
-        return NULL;
-    }
-    return rval;
+    return escape_unicode(pystr);
 }
 
 static void
diff --git a/Modules/clinic/_json.c.h b/Modules/clinic/_json.c.h
index b80e72ad00a62a..cd37a236c7611a 100644
--- a/Modules/clinic/_json.c.h
+++ b/Modules/clinic/_json.c.h
@@ -37,6 +37,10 @@ py_scanstring(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
     if (!_PyArg_CheckPositional("scanstring", nargs, 2, 3)) {
         goto exit;
     }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("scanstring", "argument 1", "str", args[0]);
+        goto exit;
+    }
     pystr = args[0];
     {
         Py_ssize_t ival = -1;
@@ -73,6 +77,26 @@ PyDoc_STRVAR(py_encode_basestring_ascii__doc__,
 #define PY_ENCODE_BASESTRING_ASCII_METHODDEF    \
     {"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii, METH_O, py_encode_basestring_ascii__doc__},
 
+static PyObject *
+py_encode_basestring_ascii_impl(PyObject *module, PyObject *pystr);
+
+static PyObject *
+py_encode_basestring_ascii(PyObject *module, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    PyObject *pystr;
+
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("encode_basestring_ascii", "argument", "str", arg);
+        goto exit;
+    }
+    pystr = arg;
+    return_value = py_encode_basestring_ascii_impl(module, pystr);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(py_encode_basestring__doc__,
 "encode_basestring($module, pystr, /)\n"
 "--\n"
@@ -81,4 +105,24 @@ PyDoc_STRVAR(py_encode_basestring__doc__,
 
 #define PY_ENCODE_BASESTRING_METHODDEF    \
     {"encode_basestring", (PyCFunction)py_encode_basestring, METH_O, py_encode_basestring__doc__},
-/*[clinic end generated code: output=d3aa505efc0acb3f input=a9049054013a1b77]*/
+
+static PyObject *
+py_encode_basestring_impl(PyObject *module, PyObject *pystr);
+
+static PyObject *
+py_encode_basestring(PyObject *module, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    PyObject *pystr;
+
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("encode_basestring", "argument", "str", arg);
+        goto exit;
+    }
+    pystr = arg;
+    return_value = py_encode_basestring_impl(module, pystr);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=5bdd16375c95a4d9 input=a9049054013a1b77]*/

From 622d97b8bbeb9ebdaa1061adf99a8b240d715e2f Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Date: Thu, 30 Oct 2025 10:50:16 +0000
Subject: [PATCH 03/13] gh-139198: Remove `Tools/scripts/checkpip.py` script
 (GH-139199)

Commit
---
 ...-09-21-10-30-08.gh-issue-139198.Fm7NfU.rst |  1 +
 Tools/scripts/README                          |  2 --
 Tools/scripts/checkpip.py                     | 32 -------------------
 3 files changed, 1 insertion(+), 34 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-09-21-10-30-08.gh-issue-139198.Fm7NfU.rst
 delete mode 100755 Tools/scripts/checkpip.py

diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-09-21-10-30-08.gh-issue-139198.Fm7NfU.rst b/Misc/NEWS.d/next/Tools-Demos/2025-09-21-10-30-08.gh-issue-139198.Fm7NfU.rst
new file mode 100644
index 00000000000000..0dc589c3986ad6
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2025-09-21-10-30-08.gh-issue-139198.Fm7NfU.rst
@@ -0,0 +1 @@
+Remove ``Tools/scripts/checkpip.py`` script.
diff --git a/Tools/scripts/README b/Tools/scripts/README
index a078bfbf662a37..4e52cda38e8d88 100644
--- a/Tools/scripts/README
+++ b/Tools/scripts/README
@@ -1,8 +1,6 @@
 This directory contains a collection of executable Python scripts that are
 useful while building, extending or managing Python.
 
-checkpip.py               Checks the version of the projects bundled in ensurepip
-                          are the latest available
 combinerefs.py            A helper for analyzing PYTHONDUMPREFS output
 divmod_threshold.py       Determine threshold for switching from longobject.c
                           divmod to _pylong.int_divmod()
diff --git a/Tools/scripts/checkpip.py b/Tools/scripts/checkpip.py
deleted file mode 100755
index a4a9ddfa6f324a..00000000000000
--- a/Tools/scripts/checkpip.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python3
-"""
-Checks that the version of the projects bundled in ensurepip are the latest
-versions available.
-"""
-import ensurepip
-import json
-import urllib.request
-import sys
-
-
-def main():
-    outofdate = False
-
-    for project, version in ensurepip._PROJECTS:
-        data = json.loads(urllib.request.urlopen(
-            "https://pypi.org/pypi/{}/json".format(project),
-            cadefault=True,
-        ).read().decode("utf8"))
-        upstream_version = data["info"]["version"]
-
-        if version != upstream_version:
-            outofdate = True
-            print("The latest version of {} on PyPI is {}, but ensurepip "
-                  "has {}".format(project, upstream_version, version))
-
-    if outofdate:
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()

From 327dbbedffa3f2c95e70129a11974b83e27864f9 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 30 Oct 2025 12:52:02 +0200
Subject: [PATCH 04/13] gh-138162: Fix logging.LoggerAdapter with
 merge_extra=True and without the extra argument (GH-140511)

---
 Doc/library/logging.rst                       | 15 ++++++---
 Lib/logging/__init__.py                       | 11 ++++---
 Lib/test/test_logging.py                      | 33 ++++++++++++++++++-
 ...-10-23-19-39-16.gh-issue-138162.Znw5DN.rst |  2 ++
 4 files changed, 50 insertions(+), 11 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2025-10-23-19-39-16.gh-issue-138162.Znw5DN.rst

diff --git a/Doc/library/logging.rst b/Doc/library/logging.rst
index 425025931d9835..0cf5b1c0d9bc3e 100644
--- a/Doc/library/logging.rst
+++ b/Doc/library/logging.rst
@@ -1082,12 +1082,13 @@ LoggerAdapter Objects
 information into logging calls. For a usage example, see the section on
 :ref:`adding contextual information to your logging output <context-info>`.
 
-.. class:: LoggerAdapter(logger, extra, merge_extra=False)
+.. class:: LoggerAdapter(logger, extra=None, merge_extra=False)
 
    Returns an instance of :class:`LoggerAdapter` initialized with an
-   underlying :class:`Logger` instance, a dict-like object (*extra*), and a
-   boolean (*merge_extra*) indicating whether or not the *extra* argument of
-   individual log calls should be merged with the :class:`LoggerAdapter` extra.
+   underlying :class:`Logger` instance, an optional dict-like object (*extra*),
+   and an optional boolean (*merge_extra*) indicating whether or not
+   the *extra* argument of individual log calls should be merged with
+   the :class:`LoggerAdapter` extra.
    The default behavior is to ignore the *extra* argument of individual log
    calls and only use the one of the :class:`LoggerAdapter` instance
 
@@ -1127,9 +1128,13 @@ information into logging calls. For a usage example, see the section on
       Attribute :attr:`!manager` and method :meth:`!_log` were added, which
       delegate to the underlying logger and allow adapters to be nested.
 
+   .. versionchanged:: 3.10
+
+      The *extra* argument is now optional.
+
    .. versionchanged:: 3.13
 
-      The *merge_extra* argument was added.
+      The *merge_extra* parameter was added.
 
 
 Thread Safety
diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py
index 431ff41b352048..39689a57e6ecd6 100644
--- a/Lib/logging/__init__.py
+++ b/Lib/logging/__init__.py
@@ -1849,9 +1849,9 @@ class LoggerAdapter(object):
 
     def __init__(self, logger, extra=None, merge_extra=False):
         """
-        Initialize the adapter with a logger and a dict-like object which
-        provides contextual information. This constructor signature allows
-        easy stacking of LoggerAdapters, if so desired.
+        Initialize the adapter with a logger and an optional dict-like object
+        which provides contextual information. This constructor signature
+        allows easy stacking of LoggerAdapters, if so desired.
 
         You can effectively pass keyword arguments as shown in the
         following example:
@@ -1882,8 +1882,9 @@ def process(self, msg, kwargs):
         Normally, you'll only need to override this one method in a
         LoggerAdapter subclass for your specific needs.
         """
-        if self.merge_extra and "extra" in kwargs:
-            kwargs["extra"] = {**self.extra, **kwargs["extra"]}
+        if self.merge_extra and kwargs.get("extra") is not None:
+            if self.extra is not None:
+                kwargs["extra"] = {**self.extra, **kwargs["extra"]}
         else:
             kwargs["extra"] = self.extra
         return msg, kwargs
diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py
index 1f7a4d9e197f9c..8815426fc99c39 100644
--- a/Lib/test/test_logging.py
+++ b/Lib/test/test_logging.py
@@ -5826,7 +5826,7 @@ def cleanup():
 
         self.addCleanup(cleanup)
         self.addCleanup(logging.shutdown)
-        self.adapter = logging.LoggerAdapter(logger=self.logger, extra=None)
+        self.adapter = logging.LoggerAdapter(logger=self.logger)
 
     def test_exception(self):
         msg = 'testing exception: %r'
@@ -5997,6 +5997,18 @@ def test_extra_merged(self):
         self.assertEqual(record.foo, '1')
         self.assertEqual(record.bar, '2')
 
+        self.adapter.critical('no extra')  # should not fail
+        self.assertEqual(len(self.recording.records), 2)
+        record = self.recording.records[-1]
+        self.assertEqual(record.foo, '1')
+        self.assertNotHasAttr(record, 'bar')
+
+        self.adapter.critical('none extra', extra=None)  # should not fail
+        self.assertEqual(len(self.recording.records), 3)
+        record = self.recording.records[-1]
+        self.assertEqual(record.foo, '1')
+        self.assertNotHasAttr(record, 'bar')
+
     def test_extra_merged_log_call_has_precedence(self):
         self.adapter = logging.LoggerAdapter(logger=self.logger,
                                              extra={'foo': '1'},
@@ -6008,6 +6020,25 @@ def test_extra_merged_log_call_has_precedence(self):
         self.assertHasAttr(record, 'foo')
         self.assertEqual(record.foo, '2')
 
+    def test_extra_merged_without_extra(self):
+        self.adapter = logging.LoggerAdapter(logger=self.logger,
+                                             merge_extra=True)
+
+        self.adapter.critical('foo should be here', extra={'foo': '1'})
+        self.assertEqual(len(self.recording.records), 1)
+        record = self.recording.records[-1]
+        self.assertEqual(record.foo, '1')
+
+        self.adapter.critical('no extra')  # should not fail
+        self.assertEqual(len(self.recording.records), 2)
+        record = self.recording.records[-1]
+        self.assertNotHasAttr(record, 'foo')
+
+        self.adapter.critical('none extra', extra=None)  # should not fail
+        self.assertEqual(len(self.recording.records), 3)
+        record = self.recording.records[-1]
+        self.assertNotHasAttr(record, 'foo')
+
 
 class PrefixAdapter(logging.LoggerAdapter):
     prefix = 'Adapter'
diff --git a/Misc/NEWS.d/next/Library/2025-10-23-19-39-16.gh-issue-138162.Znw5DN.rst b/Misc/NEWS.d/next/Library/2025-10-23-19-39-16.gh-issue-138162.Znw5DN.rst
new file mode 100644
index 00000000000000..ef7a90bc37e650
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-23-19-39-16.gh-issue-138162.Znw5DN.rst
@@ -0,0 +1,2 @@
+Fix :class:`logging.LoggerAdapter` with ``merge_extra=True`` and without the
+*extra* argument.

From 2a904263aa0bc7c4a13beb4d8baa8cbc060a45ee Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Date: Thu, 30 Oct 2025 10:54:03 +0000
Subject: [PATCH 05/13] gh-131885: Update unicodedata function signatures to
 use `/` (#140270)

---
 Doc/library/unicodedata.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst
index c49bf641704616..fd5f56bd7eaaeb 100644
--- a/Doc/library/unicodedata.rst
+++ b/Doc/library/unicodedata.rst
@@ -31,7 +31,7 @@ following functions:
    this module.
 
 
-.. function:: lookup(name)
+.. function:: lookup(name, /)
 
    Look up character by name.  If a character with the given name is found, return
    the corresponding character.  If not found, :exc:`KeyError` is raised.
@@ -94,7 +94,7 @@ following functions:
       0.5
 
 
-.. function:: category(chr)
+.. function:: category(chr, /)
 
    Returns the general category assigned to the character *chr* as
    string. General category names consist of two letters.
@@ -106,7 +106,7 @@ following functions:
       'Lu'
 
 
-.. function:: bidirectional(chr)
+.. function:: bidirectional(chr, /)
 
    Returns the bidirectional class assigned to the character *chr* as
    string. If no such value is defined, an empty string is returned.
@@ -118,7 +118,7 @@ following functions:
       'AN'
 
 
-.. function:: combining(chr)
+.. function:: combining(chr, /)
 
    Returns the canonical combining class assigned to the character *chr*
    as integer. Returns ``0`` if no combining class is defined.
@@ -127,14 +127,14 @@ following functions:
    for more information.
 
 
-.. function:: east_asian_width(chr)
+.. function:: east_asian_width(chr, /)
 
    Returns the east asian width assigned to the character *chr* as
    string. For a list of widths and or more information, see the
    `Unicode Standard Annex #11 <https://www.unicode.org/reports/tr11/>`_.
 
 
-.. function:: mirrored(chr)
+.. function:: mirrored(chr, /)
 
    Returns the mirrored property assigned to the character *chr* as
    integer. Returns ``1`` if the character has been identified as a "mirrored"
@@ -174,7 +174,7 @@ following functions:
    .. versionadded:: next
 
 
-.. function:: decomposition(chr)
+.. function:: decomposition(chr, /)
 
    Returns the character decomposition mapping assigned to the character
    *chr* as string. An empty string is returned in case no such mapping is
@@ -184,7 +184,7 @@ following functions:
       '0041 0303'
 
 
-.. function:: normalize(form, unistr)
+.. function:: normalize(form, unistr, /)
 
    Return the normal form *form* for the Unicode string *unistr*. Valid values for
    *form* are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
@@ -217,7 +217,7 @@ following functions:
    doesn't, they may not compare equal.
 
 
-.. function:: is_normalized(form, unistr)
+.. function:: is_normalized(form, unistr, /)
 
    Return whether the Unicode string *unistr* is in the normal form *form*. Valid
    values for *form* are 'NFC', 'NFKC', 'NFD', and 'NFKD'.

From ad0a3f733b23e7fc69aff13055c7fac8ab9dcd66 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 30 Oct 2025 13:00:42 +0200
Subject: [PATCH 06/13] gh-131927: Do not emit PEP 765 warnings in ast.parse()
 (GH-139642)

ast.parse() no longer emits syntax warnings for
return/break/continue in finally (see PEP-765) -- they are only
emitted during compilation.
---
 Include/internal/pycore_compile.h             |  3 +-
 Lib/test/test_ast/test_ast.py                 | 55 -----------------
 Lib/test/test_compile.py                      | 60 +++++++++++++++++++
 Lib/test/test_pyrepl/test_interact.py         | 26 ++++++++
 ...10-06-10-03-37.gh-issue-139640.gY5oTb2.rst |  3 +
 Python/ast_preprocess.c                       |  8 ++-
 Python/compile.c                              |  4 +-
 7 files changed, 98 insertions(+), 61 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-10-03-37.gh-issue-139640.gY5oTb2.rst

diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h
index c18e04bf67a5df..1c60834fa2058c 100644
--- a/Include/internal/pycore_compile.h
+++ b/Include/internal/pycore_compile.h
@@ -49,7 +49,8 @@ extern int _PyAST_Preprocess(
     PyObject *filename,
     int optimize,
     int ff_features,
-    int syntax_check_only);
+    int syntax_check_only,
+    int enable_warnings);
 
 
 typedef struct {
diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py
index 5fdb3a458ae999..a979a4b1da1ad1 100644
--- a/Lib/test/test_ast/test_ast.py
+++ b/Lib/test/test_ast/test_ast.py
@@ -1057,61 +1057,6 @@ def test_repr_large_input_crash(self):
                                     r"Exceeds the limit \(\d+ digits\)"):
             repr(ast.Constant(value=eval(source)))
 
-    def test_pep_765_warnings(self):
-        srcs = [
-            textwrap.dedent("""
-                 def f():
-                     try:
-                         pass
-                     finally:
-                         return 42
-                 """),
-            textwrap.dedent("""
-                 for x in y:
-                     try:
-                         pass
-                     finally:
-                         break
-                 """),
-            textwrap.dedent("""
-                 for x in y:
-                     try:
-                         pass
-                     finally:
-                         continue
-                 """),
-        ]
-        for src in srcs:
-            with self.assertWarnsRegex(SyntaxWarning, 'finally'):
-                ast.parse(src)
-
-    def test_pep_765_no_warnings(self):
-        srcs = [
-            textwrap.dedent("""
-                 try:
-                     pass
-                 finally:
-                     def f():
-                         return 42
-                 """),
-            textwrap.dedent("""
-                 try:
-                     pass
-                 finally:
-                     for x in y:
-                         break
-                 """),
-            textwrap.dedent("""
-                 try:
-                     pass
-                 finally:
-                     for x in y:
-                         continue
-                 """),
-        ]
-        for src in srcs:
-            ast.parse(src)
-
     def test_tstring(self):
         # Test AST structure for simple t-string
         tree = ast.parse('t"Hello"')
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index bc8ef93cb8f9de..846d38ae561fc5 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -1745,6 +1745,66 @@ def test_compile_warning_in_finally(self):
             self.assertEqual(wm.category, SyntaxWarning)
             self.assertIn("\"is\" with 'int' literal", str(wm.message))
 
+    @support.subTests('src', [
+        textwrap.dedent("""
+            def f():
+                try:
+                    pass
+                finally:
+                    return 42
+            """),
+        textwrap.dedent("""
+            for x in y:
+                try:
+                    pass
+                finally:
+                    break
+            """),
+        textwrap.dedent("""
+            for x in y:
+                try:
+                    pass
+                finally:
+                    continue
+            """),
+    ])
+    def test_pep_765_warnings(self, src):
+        with self.assertWarnsRegex(SyntaxWarning, 'finally'):
+            compile(src, '<string>', 'exec')
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            tree = ast.parse(src)
+        with self.assertWarnsRegex(SyntaxWarning, 'finally'):
+            compile(tree, '<string>', 'exec')
+
+    @support.subTests('src', [
+        textwrap.dedent("""
+            try:
+                pass
+            finally:
+                def f():
+                    return 42
+            """),
+        textwrap.dedent("""
+            try:
+                pass
+            finally:
+                for x in y:
+                    break
+            """),
+        textwrap.dedent("""
+            try:
+                pass
+            finally:
+                for x in y:
+                    continue
+            """),
+    ])
+    def test_pep_765_no_warnings(self, src):
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            compile(src, '<string>', 'exec')
+
 
 class TestBooleanExpression(unittest.TestCase):
     class Value:
diff --git a/Lib/test/test_pyrepl/test_interact.py b/Lib/test/test_pyrepl/test_interact.py
index 1a3146da8eadc8..fd4530ebc004aa 100644
--- a/Lib/test/test_pyrepl/test_interact.py
+++ b/Lib/test/test_pyrepl/test_interact.py
@@ -1,5 +1,6 @@
 import contextlib
 import io
+import warnings
 import unittest
 from unittest.mock import patch
 from textwrap import dedent
@@ -273,3 +274,28 @@ def test_incomplete_statement(self):
         code = "if foo:"
         console = InteractiveColoredConsole(namespace, filename="<stdin>")
         self.assertTrue(_more_lines(console, code))
+
+
+class TestWarnings(unittest.TestCase):
+    def test_pep_765_warning(self):
+        """
+        Test that a SyntaxWarning emitted from the
+        AST optimizer is only shown once in the REPL.
+        """
+        # gh-131927
+        console = InteractiveColoredConsole()
+        code = dedent("""\
+        def f():
+            try:
+                return 1
+            finally:
+                return 2
+        """)
+
+        with warnings.catch_warnings(record=True) as caught:
+            warnings.simplefilter("always")
+            console.runsource(code)
+
+        count = sum("'return' in a 'finally' block" in str(w.message)
+                    for w in caught)
+        self.assertEqual(count, 1)
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-10-03-37.gh-issue-139640.gY5oTb2.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-10-03-37.gh-issue-139640.gY5oTb2.rst
new file mode 100644
index 00000000000000..b147b430ccccf5
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-10-03-37.gh-issue-139640.gY5oTb2.rst
@@ -0,0 +1,3 @@
+:func:`ast.parse` no longer emits syntax warnings for
+``return``/``break``/``continue`` in ``finally`` (see :pep:`765`) -- they are
+only emitted during compilation.
diff --git a/Python/ast_preprocess.c b/Python/ast_preprocess.c
index 44d3075098be75..fe6fd9479d1531 100644
--- a/Python/ast_preprocess.c
+++ b/Python/ast_preprocess.c
@@ -19,6 +19,7 @@ typedef struct {
     int optimize;
     int ff_features;
     int syntax_check_only;
+    int enable_warnings;
 
     _Py_c_array_t cf_finally;       /* context for PEP 765 check */
     int cf_finally_used;
@@ -78,7 +79,7 @@ control_flow_in_finally_warning(const char *kw, stmt_ty n, _PyASTPreprocessState
 static int
 before_return(_PyASTPreprocessState *state, stmt_ty node_)
 {
-    if (state->cf_finally_used > 0) {
+    if (state->enable_warnings && state->cf_finally_used > 0) {
         ControlFlowInFinallyContext *ctx = get_cf_finally_top(state);
         if (ctx->in_finally && ! ctx->in_funcdef) {
             if (!control_flow_in_finally_warning("return", node_, state)) {
@@ -92,7 +93,7 @@ before_return(_PyASTPreprocessState *state, stmt_ty node_)
 static int
 before_loop_exit(_PyASTPreprocessState *state, stmt_ty node_, const char *kw)
 {
-    if (state->cf_finally_used > 0) {
+    if (state->enable_warnings && state->cf_finally_used > 0) {
         ControlFlowInFinallyContext *ctx = get_cf_finally_top(state);
         if (ctx->in_finally && ! ctx->in_loop) {
             if (!control_flow_in_finally_warning(kw, node_, state)) {
@@ -968,7 +969,7 @@ astfold_type_param(type_param_ty node_, PyArena *ctx_, _PyASTPreprocessState *st
 
 int
 _PyAST_Preprocess(mod_ty mod, PyArena *arena, PyObject *filename, int optimize,
-                  int ff_features, int syntax_check_only)
+                  int ff_features, int syntax_check_only, int enable_warnings)
 {
     _PyASTPreprocessState state;
     memset(&state, 0, sizeof(_PyASTPreprocessState));
@@ -976,6 +977,7 @@ _PyAST_Preprocess(mod_ty mod, PyArena *arena, PyObject *filename, int optimize,
     state.optimize = optimize;
     state.ff_features = ff_features;
     state.syntax_check_only = syntax_check_only;
+    state.enable_warnings = enable_warnings;
     if (_Py_CArray_Init(&state.cf_finally, sizeof(ControlFlowInFinallyContext), 20) < 0) {
         return -1;
     }
diff --git a/Python/compile.c b/Python/compile.c
index 8070d3f03760ef..e2f1c7e8eb5bce 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -136,7 +136,7 @@ compiler_setup(compiler *c, mod_ty mod, PyObject *filename,
     c->c_optimize = (optimize == -1) ? _Py_GetConfig()->optimization_level : optimize;
     c->c_save_nested_seqs = false;
 
-    if (!_PyAST_Preprocess(mod, arena, filename, c->c_optimize, merged, 0)) {
+    if (!_PyAST_Preprocess(mod, arena, filename, c->c_optimize, merged, 0, 1)) {
         return ERROR;
     }
     c->c_st = _PySymtable_Build(mod, filename, &c->c_future);
@@ -1502,7 +1502,7 @@ _PyCompile_AstPreprocess(mod_ty mod, PyObject *filename, PyCompilerFlags *cf,
     if (optimize == -1) {
         optimize = _Py_GetConfig()->optimization_level;
     }
-    if (!_PyAST_Preprocess(mod, arena, filename, optimize, flags, no_const_folding)) {
+    if (!_PyAST_Preprocess(mod, arena, filename, optimize, flags, no_const_folding, 0)) {
         return -1;
     }
     return 0;

From 09b1f10ef7b1183d40fe08e56d42dc6152d31f9a Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 30 Oct 2025 13:11:56 +0200
Subject: [PATCH 07/13] gh-140481: Improve error message when trying to iterate
 a Tk widget, image or font (GH-140501)

---
 Lib/test/test_tkinter/test_font.py            | 11 ++++
 Lib/test/test_tkinter/test_images.py          | 52 +++++++++++--------
 Lib/test/test_tkinter/test_misc.py            | 12 +++++
 Lib/tkinter/__init__.py                       |  3 ++
 Lib/tkinter/font.py                           |  2 +
 ...-10-23-13-42-15.gh-issue-140481.XKxWpq.rst |  1 +
 6 files changed, 59 insertions(+), 22 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2025-10-23-13-42-15.gh-issue-140481.XKxWpq.rst

diff --git a/Lib/test/test_tkinter/test_font.py b/Lib/test/test_tkinter/test_font.py
index 3616da54cf7075..fc50f9fdbb588c 100644
--- a/Lib/test/test_tkinter/test_font.py
+++ b/Lib/test/test_tkinter/test_font.py
@@ -1,3 +1,4 @@
+import collections.abc
 import unittest
 import tkinter
 from tkinter import font
@@ -118,6 +119,16 @@ def test_repr(self):
             repr(self.font), f'<tkinter.font.Font object {fontname!r}>'
         )
 
+    def test_iterable_protocol(self):
+        self.assertNotIsSubclass(font.Font, collections.abc.Iterable)
+        self.assertNotIsSubclass(font.Font, collections.abc.Container)
+        self.assertNotIsInstance(self.font, collections.abc.Iterable)
+        self.assertNotIsInstance(self.font, collections.abc.Container)
+        with self.assertRaisesRegex(TypeError, 'is not iterable'):
+            iter(self.font)
+        with self.assertRaisesRegex(TypeError, 'is not a container or iterable'):
+            self.font in self.font
+
 
 class DefaultRootTest(AbstractDefaultRootTest, unittest.TestCase):
 
diff --git a/Lib/test/test_tkinter/test_images.py b/Lib/test/test_tkinter/test_images.py
index 38371fe00d6eb5..358a18beee2571 100644
--- a/Lib/test/test_tkinter/test_images.py
+++ b/Lib/test/test_tkinter/test_images.py
@@ -1,3 +1,4 @@
+import collections.abc
 import unittest
 import tkinter
 from test import support
@@ -61,7 +62,33 @@ def test_image_create_photo(self):
         self.assertRaises(RuntimeError, tkinter.PhotoImage)
 
 
-class BitmapImageTest(AbstractTkTest, unittest.TestCase):
+class BaseImageTest:
+    def create(self):
+        return self.image_class('::img::test', master=self.root,
+                                file=self.testfile)
+
+    def test_bug_100814(self):
+        # gh-100814: Passing a callable option value causes AttributeError.
+        with self.assertRaises(tkinter.TclError):
+            self.image_class('::img::test', master=self.root, spam=print)
+        image = self.image_class('::img::test', master=self.root)
+        with self.assertRaises(tkinter.TclError):
+            image.configure(spam=print)
+
+    def test_iterable_protocol(self):
+        image = self.create()
+        self.assertNotIsSubclass(self.image_class, collections.abc.Iterable)
+        self.assertNotIsSubclass(self.image_class, collections.abc.Container)
+        self.assertNotIsInstance(image, collections.abc.Iterable)
+        self.assertNotIsInstance(image, collections.abc.Container)
+        with self.assertRaisesRegex(TypeError, 'is not iterable'):
+            iter(image)
+        with self.assertRaisesRegex(TypeError, 'is not a container or iterable'):
+            image in image
+
+
+class BitmapImageTest(BaseImageTest, AbstractTkTest, unittest.TestCase):
+    image_class = tkinter.BitmapImage
 
     @classmethod
     def setUpClass(cls):
@@ -144,26 +171,15 @@ def test_configure_foreground(self):
         self.assertEqual(image['foreground'],
                          '-foreground {} {} #000000 yellow')
 
-    def test_bug_100814(self):
-        # gh-100814: Passing a callable option value causes AttributeError.
-        with self.assertRaises(tkinter.TclError):
-            tkinter.BitmapImage('::img::test', master=self.root, spam=print)
-        image = tkinter.BitmapImage('::img::test', master=self.root)
-        with self.assertRaises(tkinter.TclError):
-            image.configure(spam=print)
-
 
-class PhotoImageTest(AbstractTkTest, unittest.TestCase):
+class PhotoImageTest(BaseImageTest, AbstractTkTest, unittest.TestCase):
+    image_class = tkinter.PhotoImage
 
     @classmethod
     def setUpClass(cls):
         AbstractTkTest.setUpClass.__func__(cls)
         cls.testfile = support.findfile('python.gif', subdir='tkinterdata')
 
-    def create(self):
-        return tkinter.PhotoImage('::img::test', master=self.root,
-                                  file=self.testfile)
-
     def colorlist(self, *args):
         if tkinter.TkVersion >= 8.6 and self.wantobjects:
             return args
@@ -282,14 +298,6 @@ def test_configure_palette(self):
         image.configure(palette='3/4/2')
         self.assertEqual(image['palette'], '3/4/2')
 
-    def test_bug_100814(self):
-        # gh-100814: Passing a callable option value causes AttributeError.
-        with self.assertRaises(tkinter.TclError):
-            tkinter.PhotoImage('::img::test', master=self.root, spam=print)
-        image = tkinter.PhotoImage('::img::test', master=self.root)
-        with self.assertRaises(tkinter.TclError):
-            image.configure(spam=print)
-
     def test_blank(self):
         image = self.create()
         image.blank()
diff --git a/Lib/test/test_tkinter/test_misc.py b/Lib/test/test_tkinter/test_misc.py
index 0c76e07066f8a8..32e2329506e7ff 100644
--- a/Lib/test/test_tkinter/test_misc.py
+++ b/Lib/test/test_tkinter/test_misc.py
@@ -1,3 +1,4 @@
+import collections.abc
 import functools
 import unittest
 import tkinter
@@ -508,6 +509,17 @@ def test_embedded_null(self):
         widget.selection_range(0, 'end')
         self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def')
 
+    def test_iterable_protocol(self):
+        widget = tkinter.Entry(self.root)
+        self.assertNotIsSubclass(tkinter.Entry, collections.abc.Iterable)
+        self.assertNotIsSubclass(tkinter.Entry, collections.abc.Container)
+        self.assertNotIsInstance(widget, collections.abc.Iterable)
+        self.assertNotIsInstance(widget, collections.abc.Container)
+        with self.assertRaisesRegex(TypeError, 'is not iterable'):
+            iter(widget)
+        with self.assertRaisesRegex(TypeError, 'is not a container or iterable'):
+            widget in widget
+
 
 class WmTest(AbstractTkTest, unittest.TestCase):
 
diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py
index 9526d8b949fa3b..c54530740395f7 100644
--- a/Lib/tkinter/__init__.py
+++ b/Lib/tkinter/__init__.py
@@ -1848,6 +1848,7 @@ def cget(self, key):
         return self.tk.call(self._w, 'cget', '-' + key)
 
     __getitem__ = cget
+    __iter__ = None  # prevent using __getitem__ for iteration
 
     def __setitem__(self, key, value):
         self.configure({key: value})
@@ -4280,6 +4281,8 @@ def __setitem__(self, key, value):
     def __getitem__(self, key):
         return self.tk.call(self.name, 'configure', '-'+key)
 
+    __iter__ = None  # prevent using __getitem__ for iteration
+
     def configure(self, **kw):
         """Configure the image."""
         res = ()
diff --git a/Lib/tkinter/font.py b/Lib/tkinter/font.py
index 7aed523cce3784..896e910d69f6f3 100644
--- a/Lib/tkinter/font.py
+++ b/Lib/tkinter/font.py
@@ -114,6 +114,8 @@ def __getitem__(self, key):
     def __setitem__(self, key, value):
         self.configure(**{key: value})
 
+    __iter__ = None  # prevent using __getitem__ for iteration
+
     def __del__(self):
         try:
             if self.delete_font:
diff --git a/Misc/NEWS.d/next/Library/2025-10-23-13-42-15.gh-issue-140481.XKxWpq.rst b/Misc/NEWS.d/next/Library/2025-10-23-13-42-15.gh-issue-140481.XKxWpq.rst
new file mode 100644
index 00000000000000..1f511c3b9d0583
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-23-13-42-15.gh-issue-140481.XKxWpq.rst
@@ -0,0 +1 @@
+Improve error message when trying to iterate a Tk widget, image or font.

From 134adb32e86c5c4f1335c5884483adf9c56120b0 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Date: Thu, 30 Oct 2025 11:12:45 +0000
Subject: [PATCH 08/13] gh-139188: Remove `Tools/tz/zdump.py` script
 (GH-139189)

---
 ...-09-20-20-31-54.gh-issue-139188.zfcxkW.rst |  1 +
 Tools/README                                  |  2 -
 Tools/tz/zdump.py                             | 81 -------------------
 3 files changed, 1 insertion(+), 83 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-09-20-20-31-54.gh-issue-139188.zfcxkW.rst
 delete mode 100644 Tools/tz/zdump.py

diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-09-20-20-31-54.gh-issue-139188.zfcxkW.rst b/Misc/NEWS.d/next/Tools-Demos/2025-09-20-20-31-54.gh-issue-139188.zfcxkW.rst
new file mode 100644
index 00000000000000..9f52d0163ab038
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2025-09-20-20-31-54.gh-issue-139188.zfcxkW.rst
@@ -0,0 +1 @@
+Remove ``Tools/tz/zdump.py`` script.
diff --git a/Tools/README b/Tools/README
index c8a34d82206672..22d76dfdbcf4a4 100644
--- a/Tools/README
+++ b/Tools/README
@@ -51,8 +51,6 @@ ssl             Scripts to generate ssl_data.h from OpenSSL sources, and run
 
 tsan            Utilities for building CPython with thread-sanitizer.
 
-tz              A script to dump timezone from /usr/share/zoneinfo.
-
 unicode         Tools for generating unicodedata and codecs from unicode.org
                 and other mapping files (by Fredrik Lundh, Marc-Andre Lemburg
                 and Martin von Loewis).
diff --git a/Tools/tz/zdump.py b/Tools/tz/zdump.py
deleted file mode 100644
index 39de0a416d0251..00000000000000
--- a/Tools/tz/zdump.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import sys
-import os
-import struct
-from array import array
-from collections import namedtuple
-from datetime import datetime
-
-ttinfo = namedtuple('ttinfo', ['tt_gmtoff', 'tt_isdst', 'tt_abbrind'])
-
-class TZInfo:
-    def __init__(self, transitions, type_indices, ttis, abbrs):
-        self.transitions = transitions
-        self.type_indices = type_indices
-        self.ttis = ttis
-        self.abbrs = abbrs
-
-    @classmethod
-    def fromfile(cls, fileobj):
-        if fileobj.read(4).decode() != "TZif":
-            raise ValueError("not a zoneinfo file")
-        fileobj.seek(20)
-        header = fileobj.read(24)
-        tzh = (tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt,
-               tzh_timecnt, tzh_typecnt, tzh_charcnt) = struct.unpack(">6l", header)
-        transitions = array('i')
-        transitions.fromfile(fileobj, tzh_timecnt)
-        if sys.byteorder != 'big':
-            transitions.byteswap()
-
-        type_indices = array('B')
-        type_indices.fromfile(fileobj, tzh_timecnt)
-
-        ttis = []
-        for i in range(tzh_typecnt):
-            ttis.append(ttinfo._make(struct.unpack(">lbb", fileobj.read(6))))
-
-        abbrs = fileobj.read(tzh_charcnt)
-
-        self = cls(transitions, type_indices, ttis, abbrs)
-        self.tzh = tzh
-
-        return self
-
-    def dump(self, stream, start=None, end=None):
-        for j, (trans, i) in enumerate(zip(self.transitions, self.type_indices)):
-            utc = datetime.utcfromtimestamp(trans)
-            tti = self.ttis[i]
-            lmt = datetime.utcfromtimestamp(trans + tti.tt_gmtoff)
-            abbrind = tti.tt_abbrind
-            abbr = self.abbrs[abbrind:self.abbrs.find(0, abbrind)].decode()
-            if j > 0:
-                prev_tti = self.ttis[self.type_indices[j - 1]]
-                shift = " %+g" % ((tti.tt_gmtoff - prev_tti.tt_gmtoff) / 3600)
-            else:
-                shift = ''
-            print("%s UTC = %s %-5s isdst=%d" % (utc, lmt, abbr, tti[1]) + shift, file=stream)
-
-    @classmethod
-    def zonelist(cls, zonedir='/usr/share/zoneinfo'):
-        zones = []
-        for root, _, files in os.walk(zonedir):
-            for f in files:
-                p = os.path.join(root, f)
-                with open(p, 'rb') as o:
-                    magic =  o.read(4)
-                if magic == b'TZif':
-                    zones.append(p[len(zonedir) + 1:])
-        return zones
-
-if __name__ == '__main__':
-    if len(sys.argv) < 2:
-        zones = TZInfo.zonelist()
-        for z in zones:
-            print(z)
-        sys.exit()
-    filepath = sys.argv[1]
-    if not filepath.startswith('/'):
-        filepath = os.path.join('/usr/share/zoneinfo', filepath)
-    with open(filepath, 'rb') as fileobj:
-        tzi = TZInfo.fromfile(fileobj)
-    tzi.dump(sys.stdout)

From 75a1cbdd38b142b359849eae16a2ecc12f6b3881 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Thu, 30 Oct 2025 12:14:17 +0100
Subject: [PATCH 09/13] gh-140748: socket_helper.transient_internet: Unwrap
 UrlError.__cause__ (GH-140749)

---
 Lib/test/support/socket_helper.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Lib/test/support/socket_helper.py b/Lib/test/support/socket_helper.py
index 87941ee1791b4e..a41e487f3e4bc5 100644
--- a/Lib/test/support/socket_helper.py
+++ b/Lib/test/support/socket_helper.py
@@ -259,6 +259,10 @@ def filter_error(err):
             #        raise OSError('socket error', msg) from msg
             elif len(a) >= 2 and isinstance(a[1], OSError):
                 err = a[1]
+            # The error can also be wrapped as __cause__:
+            #    raise URLError(f"ftp error: {exp}") from exp
+            elif isinstance(err, urllib.error.URLError) and err.__cause__:
+                err = err.__cause__
             else:
                 break
         filter_error(err)

From efc37ba49eef07dad83698cf8e68820c955aacf9 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Thu, 30 Oct 2025 14:36:15 +0100
Subject: [PATCH 10/13] gh-139353: Add Objects/unicode_writer.c file (#139911)

Move the public PyUnicodeWriter API and the private _PyUnicodeWriter
API to a new Objects/unicode_writer.c file.

Rename a few helper functions to share them between unicodeobject.c
and unicode_writer.c, such as resize_compact() or unicode_result().
---
 Include/internal/pycore_unicodeobject.h |  40 ++
 Makefile.pre.in                         |   1 +
 Objects/unicode_writer.c                | 639 +++++++++++++++++++++++
 Objects/unicodeobject.c                 | 667 ++----------------------
 PCbuild/_freeze_module.vcxproj          |   1 +
 PCbuild/_freeze_module.vcxproj.filters  |   3 +
 PCbuild/pythoncore.vcxproj              |   1 +
 PCbuild/pythoncore.vcxproj.filters      |   3 +
 8 files changed, 717 insertions(+), 638 deletions(-)
 create mode 100644 Objects/unicode_writer.c

diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index f384fad8713adc..e7ca65a56b6ec3 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -17,6 +17,46 @@ extern "C" {
 
 
 extern int _PyUnicode_IsModifiable(PyObject *unicode);
+extern void _PyUnicodeWriter_InitWithBuffer(
+    _PyUnicodeWriter *writer,
+    PyObject *buffer);
+extern PyObject* _PyUnicode_Result(PyObject *unicode);
+extern int _PyUnicode_DecodeUTF8Writer(
+    _PyUnicodeWriter *writer,
+    const char *s,
+    Py_ssize_t size,
+    _Py_error_handler error_handler,
+    const char *errors,
+    Py_ssize_t *consumed);
+extern PyObject* _PyUnicode_ResizeCompact(
+    PyObject *unicode,
+    Py_ssize_t length);
+extern PyObject* _PyUnicode_GetEmpty(void);
+
+
+/* Generic helper macro to convert characters of different types.
+   from_type and to_type have to be valid type names, begin and end
+   are pointers to the source characters which should be of type
+   "from_type *".  to is a pointer of type "to_type *" and points to the
+   buffer where the result characters are written to. */
+#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
+    do {                                                \
+        to_type *_to = (to_type *)(to);                 \
+        const from_type *_iter = (const from_type *)(begin);\
+        const from_type *_end = (const from_type *)(end);\
+        Py_ssize_t n = (_end) - (_iter);                \
+        const from_type *_unrolled_end =                \
+            _iter + _Py_SIZE_ROUND_DOWN(n, 4);          \
+        while (_iter < (_unrolled_end)) {               \
+            _to[0] = (to_type) _iter[0];                \
+            _to[1] = (to_type) _iter[1];                \
+            _to[2] = (to_type) _iter[2];                \
+            _to[3] = (to_type) _iter[3];                \
+            _iter += 4; _to += 4;                       \
+        }                                               \
+        while (_iter < (_end))                          \
+            *_to++ = (to_type) *_iter++;                \
+    } while (0)
 
 
 static inline void
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 0a1b8d028addad..656d9dacd962e3 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -559,6 +559,7 @@ OBJECT_OBJS=	\
 		Objects/typevarobject.o \
 		Objects/unicode_format.o \
 		Objects/unicode_formatter.o \
+		Objects/unicode_writer.o \
 		Objects/unicodectype.o \
 		Objects/unicodeobject.o \
 		Objects/unionobject.o \
diff --git a/Objects/unicode_writer.c b/Objects/unicode_writer.c
new file mode 100644
index 00000000000000..2b944bf1ea8cde
--- /dev/null
+++ b/Objects/unicode_writer.c
@@ -0,0 +1,639 @@
+/*
+
+Unicode implementation based on original code by Fredrik Lundh,
+modified by Marc-Andre Lemburg <mal@lemburg.com>.
+
+Major speed upgrades to the method implementations at the Reykjavik
+NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
+
+Copyright (c) Corporation for National Research Initiatives.
+
+--------------------------------------------------------------------
+The original string type implementation is:
+
+  Copyright (c) 1999 by Secret Labs AB
+  Copyright (c) 1999 by Fredrik Lundh
+
+By obtaining, using, and/or copying this software and/or its
+associated documentation, you agree that you have read, understood,
+and will comply with the following terms and conditions:
+
+Permission to use, copy, modify, and distribute this software and its
+associated documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appears in all
+copies, and that both that copyright notice and this permission notice
+appear in supporting documentation, and that the name of Secret Labs
+AB or the author not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+--------------------------------------------------------------------
+
+*/
+
+#include "Python.h"
+#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
+#include "pycore_long.h"          // _PyLong_FormatWriter()
+#include "pycore_unicodeobject.h" // _PyUnicode_Result()
+
+
+#ifdef MS_WINDOWS
+   /* On Windows, overallocate by 50% is the best factor */
+#  define OVERALLOCATE_FACTOR 2
+#else
+   /* On Linux, overallocate by 25% is the best factor */
+#  define OVERALLOCATE_FACTOR 4
+#endif
+
+
+/* Compilation of templated routines */
+
+#define STRINGLIB_GET_EMPTY() _PyUnicode_GetEmpty()
+
+#include "stringlib/ucs1lib.h"
+#include "stringlib/find_max_char.h"
+#include "stringlib/undef.h"
+
+
+/* Copy an ASCII or latin1 char* string into a Python Unicode string.
+
+   WARNING: The function doesn't copy the terminating null character and
+   doesn't check the maximum character (may write a latin1 character in an
+   ASCII string). */
+static void
+unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
+                   const char *str, Py_ssize_t len)
+{
+    int kind = PyUnicode_KIND(unicode);
+    const void *data = PyUnicode_DATA(unicode);
+    const char *end = str + len;
+
+    assert(index + len <= PyUnicode_GET_LENGTH(unicode));
+    switch (kind) {
+    case PyUnicode_1BYTE_KIND: {
+#ifdef Py_DEBUG
+        if (PyUnicode_IS_ASCII(unicode)) {
+            Py_UCS4 maxchar = ucs1lib_find_max_char(
+                (const Py_UCS1*)str,
+                (const Py_UCS1*)str + len);
+            assert(maxchar < 128);
+        }
+#endif
+        memcpy((char *) data + index, str, len);
+        break;
+    }
+    case PyUnicode_2BYTE_KIND: {
+        Py_UCS2 *start = (Py_UCS2 *)data + index;
+        Py_UCS2 *ucs2 = start;
+
+        for (; str < end; ++ucs2, ++str)
+            *ucs2 = (Py_UCS2)*str;
+
+        assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
+        break;
+    }
+    case PyUnicode_4BYTE_KIND: {
+        Py_UCS4 *start = (Py_UCS4 *)data + index;
+        Py_UCS4 *ucs4 = start;
+
+        for (; str < end; ++ucs4, ++str)
+            *ucs4 = (Py_UCS4)*str;
+
+        assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
+        break;
+    }
+    default:
+        Py_UNREACHABLE();
+    }
+}
+
+
+static inline void
+_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
+{
+    writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
+    writer->data = PyUnicode_DATA(writer->buffer);
+
+    if (!writer->readonly) {
+        writer->kind = PyUnicode_KIND(writer->buffer);
+        writer->size = PyUnicode_GET_LENGTH(writer->buffer);
+    }
+    else {
+        /* use a value smaller than PyUnicode_1BYTE_KIND() so
+           _PyUnicodeWriter_PrepareKind() will copy the buffer. */
+        writer->kind = 0;
+        assert(writer->kind <= PyUnicode_1BYTE_KIND);
+
+        /* Copy-on-write mode: set buffer size to 0 so
+         * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
+         * next write. */
+        writer->size = 0;
+    }
+}
+
+
+void
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
+{
+    memset(writer, 0, sizeof(*writer));
+
+    /* ASCII is the bare minimum */
+    writer->min_char = 127;
+
+    /* use a kind value smaller than PyUnicode_1BYTE_KIND so
+       _PyUnicodeWriter_PrepareKind() will copy the buffer. */
+    assert(writer->kind == 0);
+    assert(writer->kind < PyUnicode_1BYTE_KIND);
+}
+
+
+PyUnicodeWriter*
+PyUnicodeWriter_Create(Py_ssize_t length)
+{
+    if (length < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "length must be positive");
+        return NULL;
+    }
+
+    const size_t size = sizeof(_PyUnicodeWriter);
+    PyUnicodeWriter *pub_writer;
+    pub_writer = _Py_FREELIST_POP_MEM(unicode_writers);
+    if (pub_writer == NULL) {
+        pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
+        if (pub_writer == NULL) {
+            return (PyUnicodeWriter *)PyErr_NoMemory();
+        }
+    }
+    _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
+
+    _PyUnicodeWriter_Init(writer);
+    if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
+        PyUnicodeWriter_Discard(pub_writer);
+        return NULL;
+    }
+    writer->overallocate = 1;
+
+    return pub_writer;
+}
+
+
+void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
+{
+    if (writer == NULL) {
+        return;
+    }
+    _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
+    _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
+}
+
+
+// Initialize _PyUnicodeWriter with initial buffer
+void
+_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
+{
+    memset(writer, 0, sizeof(*writer));
+    writer->buffer = buffer;
+    _PyUnicodeWriter_Update(writer);
+    writer->min_length = writer->size;
+}
+
+
+int
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
+                                 Py_ssize_t length, Py_UCS4 maxchar)
+{
+    Py_ssize_t newlen;
+    PyObject *newbuffer;
+
+    assert(length >= 0);
+    assert(maxchar <= _Py_MAX_UNICODE);
+
+    /* ensure that the _PyUnicodeWriter_Prepare macro was used */
+    assert((maxchar > writer->maxchar && length >= 0)
+           || length > 0);
+
+    if (length > PY_SSIZE_T_MAX - writer->pos) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    newlen = writer->pos + length;
+
+    maxchar = Py_MAX(maxchar, writer->min_char);
+
+    if (writer->buffer == NULL) {
+        assert(!writer->readonly);
+        if (writer->overallocate
+            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
+            /* overallocate to limit the number of realloc() */
+            newlen += newlen / OVERALLOCATE_FACTOR;
+        }
+        if (newlen < writer->min_length)
+            newlen = writer->min_length;
+
+        writer->buffer = PyUnicode_New(newlen, maxchar);
+        if (writer->buffer == NULL)
+            return -1;
+    }
+    else if (newlen > writer->size) {
+        if (writer->overallocate
+            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
+            /* overallocate to limit the number of realloc() */
+            newlen += newlen / OVERALLOCATE_FACTOR;
+        }
+        if (newlen < writer->min_length)
+            newlen = writer->min_length;
+
+        if (maxchar > writer->maxchar || writer->readonly) {
+            /* resize + widen */
+            maxchar = Py_MAX(maxchar, writer->maxchar);
+            newbuffer = PyUnicode_New(newlen, maxchar);
+            if (newbuffer == NULL)
+                return -1;
+            _PyUnicode_FastCopyCharacters(newbuffer, 0,
+                                          writer->buffer, 0, writer->pos);
+            Py_DECREF(writer->buffer);
+            writer->readonly = 0;
+        }
+        else {
+            newbuffer = _PyUnicode_ResizeCompact(writer->buffer, newlen);
+            if (newbuffer == NULL)
+                return -1;
+        }
+        writer->buffer = newbuffer;
+    }
+    else if (maxchar > writer->maxchar) {
+        assert(!writer->readonly);
+        newbuffer = PyUnicode_New(writer->size, maxchar);
+        if (newbuffer == NULL)
+            return -1;
+        _PyUnicode_FastCopyCharacters(newbuffer, 0,
+                                      writer->buffer, 0, writer->pos);
+        Py_SETREF(writer->buffer, newbuffer);
+    }
+    _PyUnicodeWriter_Update(writer);
+    return 0;
+
+#undef OVERALLOCATE_FACTOR
+}
+
+int
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+                                     int kind)
+{
+    Py_UCS4 maxchar;
+
+    /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */
+    assert(writer->kind < kind);
+
+    switch (kind)
+    {
+    case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
+    case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
+    case PyUnicode_4BYTE_KIND: maxchar = _Py_MAX_UNICODE; break;
+    default:
+        Py_UNREACHABLE();
+    }
+
+    return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
+}
+
+
+int
+_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    return _PyUnicodeWriter_WriteCharInline(writer, ch);
+}
+
+
+int
+PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    if (ch > _Py_MAX_UNICODE) {
+        PyErr_SetString(PyExc_ValueError,
+                        "character must be in range(0x110000)");
+        return -1;
+    }
+
+    return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
+}
+
+
+int
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
+{
+    assert(PyUnicode_Check(str));
+
+    Py_UCS4 maxchar;
+    Py_ssize_t len;
+
+    len = PyUnicode_GET_LENGTH(str);
+    if (len == 0)
+        return 0;
+    maxchar = PyUnicode_MAX_CHAR_VALUE(str);
+    if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
+        if (writer->buffer == NULL && !writer->overallocate) {
+            assert(_PyUnicode_CheckConsistency(str, 1));
+            writer->readonly = 1;
+            writer->buffer = Py_NewRef(str);
+            _PyUnicodeWriter_Update(writer);
+            writer->pos += len;
+            return 0;
+        }
+        if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
+            return -1;
+    }
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  str, 0, len);
+    writer->pos += len;
+    return 0;
+}
+
+
+int
+PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
+{
+    PyTypeObject *type = Py_TYPE(obj);
+    if (type == &PyUnicode_Type) {
+        return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj);
+    }
+
+    if (type == &PyLong_Type) {
+        return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
+    }
+
+    PyObject *str = PyObject_Str(obj);
+    if (str == NULL) {
+        return -1;
+    }
+
+    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
+    Py_DECREF(str);
+    return res;
+}
+
+
+int
+PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
+{
+    if (Py_TYPE(obj) == &PyLong_Type) {
+        return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
+    }
+
+    PyObject *repr = PyObject_Repr(obj);
+    if (repr == NULL) {
+        return -1;
+    }
+
+    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr);
+    Py_DECREF(repr);
+    return res;
+}
+
+
+int
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
+                                Py_ssize_t start, Py_ssize_t end)
+{
+    assert(0 <= start);
+    assert(end <= PyUnicode_GET_LENGTH(str));
+    assert(start <= end);
+
+    if (start == 0 && end == PyUnicode_GET_LENGTH(str))
+        return _PyUnicodeWriter_WriteStr(writer, str);
+
+    Py_ssize_t len = end - start;
+    if (len == 0) {
+        return 0;
+    }
+
+    Py_UCS4 maxchar;
+    if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) {
+        maxchar = _PyUnicode_FindMaxChar(str, start, end);
+    }
+    else {
+        maxchar = writer->maxchar;
+    }
+    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) {
+        return -1;
+    }
+
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  str, start, len);
+    writer->pos += len;
+    return 0;
+}
+
+
+int
+PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
+                               Py_ssize_t start, Py_ssize_t end)
+{
+    if (!PyUnicode_Check(str)) {
+        PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
+        return -1;
+    }
+    if (start < 0 || start > end) {
+        PyErr_Format(PyExc_ValueError, "invalid start argument");
+        return -1;
+    }
+    if (end > PyUnicode_GET_LENGTH(str)) {
+        PyErr_Format(PyExc_ValueError, "invalid end argument");
+        return -1;
+    }
+
+    return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
+                                           start, end);
+}
+
+
+int
+_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
+                                  const char *ascii, Py_ssize_t len)
+{
+    if (len == -1)
+        len = strlen(ascii);
+
+    assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128);
+
+    if (writer->buffer == NULL && !writer->overallocate) {
+        PyObject *str;
+
+        str = _PyUnicode_FromASCII(ascii, len);
+        if (str == NULL)
+            return -1;
+
+        writer->readonly = 1;
+        writer->buffer = str;
+        _PyUnicodeWriter_Update(writer);
+        writer->pos += len;
+        return 0;
+    }
+
+    if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
+        return -1;
+
+    switch (writer->kind)
+    {
+    case PyUnicode_1BYTE_KIND:
+    {
+        const Py_UCS1 *str = (const Py_UCS1 *)ascii;
+        Py_UCS1 *data = writer->data;
+
+        memcpy(data + writer->pos, str, len);
+        break;
+    }
+    case PyUnicode_2BYTE_KIND:
+    {
+        _PyUnicode_CONVERT_BYTES(
+            Py_UCS1, Py_UCS2,
+            ascii, ascii + len,
+            (Py_UCS2 *)writer->data + writer->pos);
+        break;
+    }
+    case PyUnicode_4BYTE_KIND:
+    {
+        _PyUnicode_CONVERT_BYTES(
+            Py_UCS1, Py_UCS4,
+            ascii, ascii + len,
+            (Py_UCS4 *)writer->data + writer->pos);
+        break;
+    }
+    default:
+        Py_UNREACHABLE();
+    }
+
+    writer->pos += len;
+    return 0;
+}
+
+
+int
+PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
+                           const char *str,
+                           Py_ssize_t size)
+{
+    assert(writer != NULL);
+    _Py_AssertHoldsTstate();
+
+    _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
+    return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
+}
+
+
+int
+PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
+                          const char *str,
+                          Py_ssize_t size)
+{
+    if (size < 0) {
+        size = strlen(str);
+    }
+
+    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
+    Py_ssize_t old_pos = _writer->pos;
+    int res = _PyUnicode_DecodeUTF8Writer(_writer, str, size,
+                                          _Py_ERROR_STRICT, NULL, NULL);
+    if (res < 0) {
+        _writer->pos = old_pos;
+    }
+    return res;
+}
+
+
+int
+PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer,
+                                   const char *string,
+                                   Py_ssize_t length,
+                                   const char *errors,
+                                   Py_ssize_t *consumed)
+{
+    if (length < 0) {
+        length = strlen(string);
+    }
+
+    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
+    Py_ssize_t old_pos = _writer->pos;
+    int res = _PyUnicode_DecodeUTF8Writer(_writer, string, length,
+                                          _Py_ERROR_UNKNOWN, errors,
+                                          consumed);
+    if (res < 0) {
+        _writer->pos = old_pos;
+        if (consumed) {
+            *consumed = 0;
+        }
+    }
+    return res;
+}
+
+
+int
+_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
+                                   const char *str, Py_ssize_t len)
+{
+    Py_UCS4 maxchar;
+
+    maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len);
+    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
+        return -1;
+    unicode_write_cstr(writer->buffer, writer->pos, str, len);
+    writer->pos += len;
+    return 0;
+}
+
+
+PyObject *
+_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
+{
+    PyObject *str;
+
+    if (writer->pos == 0) {
+        Py_CLEAR(writer->buffer);
+        return _PyUnicode_GetEmpty();
+    }
+
+    str = writer->buffer;
+    writer->buffer = NULL;
+
+    if (writer->readonly) {
+        assert(PyUnicode_GET_LENGTH(str) == writer->pos);
+        return str;
+    }
+
+    if (PyUnicode_GET_LENGTH(str) != writer->pos) {
+        PyObject *str2;
+        str2 = _PyUnicode_ResizeCompact(str, writer->pos);
+        if (str2 == NULL) {
+            Py_DECREF(str);
+            return NULL;
+        }
+        str = str2;
+    }
+
+    assert(_PyUnicode_CheckConsistency(str, 1));
+    return _PyUnicode_Result(str);
+}
+
+
+PyObject*
+PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
+{
+    PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
+    assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
+    _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
+    return str;
+}
+
+
+void
+_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
+{
+    Py_CLEAR(writer->buffer);
+}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 8a5638ac1406ab..1c443e88e05029 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -46,7 +46,6 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #include "pycore_codecs.h"        // _PyCodec_Lookup()
 #include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST
 #include "pycore_format.h"        // F_LJUST
-#include "pycore_freelist.h"      // _Py_FREELIST_FREE(), _Py_FREELIST_POP()
 #include "pycore_initconfig.h"    // _PyStatus_OK()
 #include "pycore_interp.h"        // PyInterpreterState.fs_codec
 #include "pycore_long.h"          // _PyLong_FormatWriter()
@@ -184,45 +183,9 @@ static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op)
 }
 
 
-/* Generic helper macro to convert characters of different types.
-   from_type and to_type have to be valid type names, begin and end
-   are pointers to the source characters which should be of type
-   "from_type *".  to is a pointer of type "to_type *" and points to the
-   buffer where the result characters are written to. */
-#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
-    do {                                                \
-        to_type *_to = (to_type *)(to);                 \
-        const from_type *_iter = (const from_type *)(begin);\
-        const from_type *_end = (const from_type *)(end);\
-        Py_ssize_t n = (_end) - (_iter);                \
-        const from_type *_unrolled_end =                \
-            _iter + _Py_SIZE_ROUND_DOWN(n, 4);          \
-        while (_iter < (_unrolled_end)) {               \
-            _to[0] = (to_type) _iter[0];                \
-            _to[1] = (to_type) _iter[1];                \
-            _to[2] = (to_type) _iter[2];                \
-            _to[3] = (to_type) _iter[3];                \
-            _iter += 4; _to += 4;                       \
-        }                                               \
-        while (_iter < (_end))                          \
-            *_to++ = (to_type) *_iter++;                \
-    } while (0)
-
 #define LATIN1 _Py_LATIN1_CHR
 
-#ifdef MS_WINDOWS
-   /* On Windows, overallocate by 50% is the best factor */
-#  define OVERALLOCATE_FACTOR 2
-#else
-   /* On Linux, overallocate by 25% is the best factor */
-#  define OVERALLOCATE_FACTOR 4
-#endif
-
 /* Forward declaration */
-static inline int
-_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
-static inline void
-_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
 static PyObject *
 unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
                     const char *errors);
@@ -230,11 +193,6 @@ static PyObject *
 unicode_decode_utf8(const char *s, Py_ssize_t size,
                     _Py_error_handler error_handler, const char *errors,
                     Py_ssize_t *consumed);
-static int
-unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
-                           const char *s, Py_ssize_t size,
-                           _Py_error_handler error_handler, const char *errors,
-                           Py_ssize_t *consumed);
 #ifdef Py_DEBUG
 static inline int unicode_is_finalizing(void);
 static int unicode_is_singleton(PyObject *unicode);
@@ -242,7 +200,8 @@ static int unicode_is_singleton(PyObject *unicode);
 
 
 // Return a reference to the immortal empty string singleton.
-static inline PyObject* unicode_get_empty(void)
+PyObject*
+_PyUnicode_GetEmpty(void)
 {
     _Py_DECLARE_STR(empty, "");
     return &_Py_STR(empty);
@@ -416,7 +375,7 @@ static void clear_global_interned_strings(void)
 
 #define _Py_RETURN_UNICODE_EMPTY()   \
     do {                             \
-        return unicode_get_empty();  \
+        return _PyUnicode_GetEmpty();\
     } while (0)
 
 
@@ -748,14 +707,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
 #undef CHECK
 }
 
-static PyObject*
-unicode_result(PyObject *unicode)
+PyObject*
+_PyUnicode_Result(PyObject *unicode)
 {
     assert(_PyUnicode_CHECK(unicode));
 
     Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
     if (length == 0) {
-        PyObject *empty = unicode_get_empty();
+        PyObject *empty = _PyUnicode_GetEmpty();
         if (unicode != empty) {
             Py_DECREF(unicode);
         }
@@ -778,6 +737,7 @@ unicode_result(PyObject *unicode)
     assert(_PyUnicode_CheckConsistency(unicode, 1));
     return unicode;
 }
+#define unicode_result _PyUnicode_Result
 
 static PyObject*
 unicode_result_unchanged(PyObject *unicode)
@@ -985,7 +945,7 @@ make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
 
 /* Compilation of templated routines */
 
-#define STRINGLIB_GET_EMPTY() unicode_get_empty()
+#define STRINGLIB_GET_EMPTY() _PyUnicode_GetEmpty()
 
 #include "stringlib/asciilib.h"
 #include "stringlib/fastsearch.h"
@@ -1097,8 +1057,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
     return copy;
 }
 
-static PyObject*
-resize_compact(PyObject *unicode, Py_ssize_t length)
+PyObject*
+_PyUnicode_ResizeCompact(PyObject *unicode, Py_ssize_t length)
 {
     Py_ssize_t char_size;
     Py_ssize_t struct_size;
@@ -1306,7 +1266,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
 {
     /* Optimization for empty strings */
     if (size == 0) {
-        return unicode_get_empty();
+        return _PyUnicode_GetEmpty();
     }
 
     PyObject *obj;
@@ -1799,7 +1759,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
         return 0;
 
     if (length == 0) {
-        PyObject *empty = unicode_get_empty();
+        PyObject *empty = _PyUnicode_GetEmpty();
         Py_SETREF(*p_unicode, empty);
         return 0;
     }
@@ -1813,7 +1773,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
     }
 
     if (PyUnicode_IS_COMPACT(unicode)) {
-        PyObject *new_unicode = resize_compact(unicode, length);
+        PyObject *new_unicode = _PyUnicode_ResizeCompact(unicode, length);
         if (new_unicode == NULL)
             return -1;
         *p_unicode = new_unicode;
@@ -1839,58 +1799,6 @@ PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length)
     return unicode_resize(p_unicode, length);
 }
 
-/* Copy an ASCII or latin1 char* string into a Python Unicode string.
-
-   WARNING: The function doesn't copy the terminating null character and
-   doesn't check the maximum character (may write a latin1 character in an
-   ASCII string). */
-static void
-unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
-                   const char *str, Py_ssize_t len)
-{
-    int kind = PyUnicode_KIND(unicode);
-    const void *data = PyUnicode_DATA(unicode);
-    const char *end = str + len;
-
-    assert(index + len <= PyUnicode_GET_LENGTH(unicode));
-    switch (kind) {
-    case PyUnicode_1BYTE_KIND: {
-#ifdef Py_DEBUG
-        if (PyUnicode_IS_ASCII(unicode)) {
-            Py_UCS4 maxchar = ucs1lib_find_max_char(
-                (const Py_UCS1*)str,
-                (const Py_UCS1*)str + len);
-            assert(maxchar < 128);
-        }
-#endif
-        memcpy((char *) data + index, str, len);
-        break;
-    }
-    case PyUnicode_2BYTE_KIND: {
-        Py_UCS2 *start = (Py_UCS2 *)data + index;
-        Py_UCS2 *ucs2 = start;
-
-        for (; str < end; ++ucs2, ++str)
-            *ucs2 = (Py_UCS2)*str;
-
-        assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
-        break;
-    }
-    case PyUnicode_4BYTE_KIND: {
-        Py_UCS4 *start = (Py_UCS4 *)data + index;
-        Py_UCS4 *ucs4 = start;
-
-        for (; str < end; ++ucs4, ++str)
-            *ucs4 = (Py_UCS4)*str;
-
-        assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
-        break;
-    }
-    default:
-        Py_UNREACHABLE();
-    }
-}
-
 static PyObject*
 get_latin1_char(Py_UCS1 ch)
 {
@@ -2105,7 +2013,7 @@ PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
             "NULL string with positive size with NULL passed to PyUnicode_FromStringAndSize");
         return NULL;
     }
-    return unicode_get_empty();
+    return _PyUnicode_GetEmpty();
 }
 
 PyObject *
@@ -2672,8 +2580,8 @@ unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str,
     }
 
     if (width < 0) {
-        return unicode_decode_utf8_writer(writer, str, length,
-                                          _Py_ERROR_REPLACE, "replace", pconsumed);
+        return _PyUnicode_DecodeUTF8Writer(writer, str, length,
+                                           _Py_ERROR_REPLACE, "replace", pconsumed);
     }
 
     PyObject *unicode = PyUnicode_DecodeUTF8Stateful(str, length,
@@ -5424,11 +5332,11 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
 
 
 // Used by PyUnicodeWriter_WriteUTF8() implementation
-static int
-unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
-                           const char *s, Py_ssize_t size,
-                           _Py_error_handler error_handler, const char *errors,
-                           Py_ssize_t *consumed)
+int
+_PyUnicode_DecodeUTF8Writer(_PyUnicodeWriter *writer,
+                            const char *s, Py_ssize_t size,
+                            _Py_error_handler error_handler, const char *errors,
+                            Py_ssize_t *consumed)
 {
     if (size == 0) {
         if (consumed) {
@@ -10766,7 +10674,7 @@ replace(PyObject *self, PyObject *str1,
         }
         new_size = slen + n * (len2 - len1);
         if (new_size == 0) {
-            u = unicode_get_empty();
+            u = _PyUnicode_GetEmpty();
             goto done;
         }
         if (new_size > (PY_SSIZE_T_MAX / rkind)) {
@@ -11439,7 +11347,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
     }
 
     /* Shortcuts */
-    PyObject *empty = unicode_get_empty();  // Borrowed reference
+    PyObject *empty = _PyUnicode_GetEmpty();  // Borrowed reference
     if (left == empty) {
         return PyUnicode_FromObject(right);
     }
@@ -11491,7 +11399,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
     }
 
     /* Shortcuts */
-    PyObject *empty = unicode_get_empty();  // Borrowed reference
+    PyObject *empty = _PyUnicode_GetEmpty();  // Borrowed reference
     if (left == empty) {
         Py_DECREF(left);
         *p_left = Py_NewRef(right);
@@ -12987,7 +12895,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
     len1 = PyUnicode_GET_LENGTH(str_obj);
     len2 = PyUnicode_GET_LENGTH(sep_obj);
     if (kind1 < kind2 || len1 < len2) {
-        PyObject *empty = unicode_get_empty();  // Borrowed reference
+        PyObject *empty = _PyUnicode_GetEmpty();  // Borrowed reference
         return PyTuple_Pack(3, str_obj, empty, empty);
     }
     buf1 = PyUnicode_DATA(str_obj);
@@ -13039,7 +12947,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
     len1 = PyUnicode_GET_LENGTH(str_obj);
     len2 = PyUnicode_GET_LENGTH(sep_obj);
     if (kind1 < kind2 || len1 < len2) {
-        PyObject *empty = unicode_get_empty();  // Borrowed reference
+        PyObject *empty = _PyUnicode_GetEmpty();  // Borrowed reference
         return PyTuple_Pack(3, empty, empty, str_obj);
     }
     buf1 = PyUnicode_DATA(str_obj);
@@ -13518,523 +13426,6 @@ unicode_endswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start,
 }
 
 
-static inline void
-_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
-{
-    writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
-    writer->data = PyUnicode_DATA(writer->buffer);
-
-    if (!writer->readonly) {
-        writer->kind = PyUnicode_KIND(writer->buffer);
-        writer->size = PyUnicode_GET_LENGTH(writer->buffer);
-    }
-    else {
-        /* use a value smaller than PyUnicode_1BYTE_KIND() so
-           _PyUnicodeWriter_PrepareKind() will copy the buffer. */
-        writer->kind = 0;
-        assert(writer->kind <= PyUnicode_1BYTE_KIND);
-
-        /* Copy-on-write mode: set buffer size to 0 so
-         * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
-         * next write. */
-        writer->size = 0;
-    }
-}
-
-
-void
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
-{
-    memset(writer, 0, sizeof(*writer));
-
-    /* ASCII is the bare minimum */
-    writer->min_char = 127;
-
-    /* use a kind value smaller than PyUnicode_1BYTE_KIND so
-       _PyUnicodeWriter_PrepareKind() will copy the buffer. */
-    assert(writer->kind == 0);
-    assert(writer->kind < PyUnicode_1BYTE_KIND);
-}
-
-
-PyUnicodeWriter*
-PyUnicodeWriter_Create(Py_ssize_t length)
-{
-    if (length < 0) {
-        PyErr_SetString(PyExc_ValueError,
-                        "length must be positive");
-        return NULL;
-    }
-
-    const size_t size = sizeof(_PyUnicodeWriter);
-    PyUnicodeWriter *pub_writer;
-    pub_writer = _Py_FREELIST_POP_MEM(unicode_writers);
-    if (pub_writer == NULL) {
-        pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
-        if (pub_writer == NULL) {
-            return (PyUnicodeWriter *)PyErr_NoMemory();
-        }
-    }
-    _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
-
-    _PyUnicodeWriter_Init(writer);
-    if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
-        PyUnicodeWriter_Discard(pub_writer);
-        return NULL;
-    }
-    writer->overallocate = 1;
-
-    return pub_writer;
-}
-
-
-void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
-{
-    if (writer == NULL) {
-        return;
-    }
-    _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
-    _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
-}
-
-
-// Initialize _PyUnicodeWriter with initial buffer
-static inline void
-_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
-{
-    memset(writer, 0, sizeof(*writer));
-    writer->buffer = buffer;
-    _PyUnicodeWriter_Update(writer);
-    writer->min_length = writer->size;
-}
-
-
-int
-_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
-                                 Py_ssize_t length, Py_UCS4 maxchar)
-{
-    Py_ssize_t newlen;
-    PyObject *newbuffer;
-
-    assert(length >= 0);
-    assert(maxchar <= MAX_UNICODE);
-
-    /* ensure that the _PyUnicodeWriter_Prepare macro was used */
-    assert((maxchar > writer->maxchar && length >= 0)
-           || length > 0);
-
-    if (length > PY_SSIZE_T_MAX - writer->pos) {
-        PyErr_NoMemory();
-        return -1;
-    }
-    newlen = writer->pos + length;
-
-    maxchar = Py_MAX(maxchar, writer->min_char);
-
-    if (writer->buffer == NULL) {
-        assert(!writer->readonly);
-        if (writer->overallocate
-            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
-            /* overallocate to limit the number of realloc() */
-            newlen += newlen / OVERALLOCATE_FACTOR;
-        }
-        if (newlen < writer->min_length)
-            newlen = writer->min_length;
-
-        writer->buffer = PyUnicode_New(newlen, maxchar);
-        if (writer->buffer == NULL)
-            return -1;
-    }
-    else if (newlen > writer->size) {
-        if (writer->overallocate
-            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
-            /* overallocate to limit the number of realloc() */
-            newlen += newlen / OVERALLOCATE_FACTOR;
-        }
-        if (newlen < writer->min_length)
-            newlen = writer->min_length;
-
-        if (maxchar > writer->maxchar || writer->readonly) {
-            /* resize + widen */
-            maxchar = Py_MAX(maxchar, writer->maxchar);
-            newbuffer = PyUnicode_New(newlen, maxchar);
-            if (newbuffer == NULL)
-                return -1;
-            _PyUnicode_FastCopyCharacters(newbuffer, 0,
-                                          writer->buffer, 0, writer->pos);
-            Py_DECREF(writer->buffer);
-            writer->readonly = 0;
-        }
-        else {
-            newbuffer = resize_compact(writer->buffer, newlen);
-            if (newbuffer == NULL)
-                return -1;
-        }
-        writer->buffer = newbuffer;
-    }
-    else if (maxchar > writer->maxchar) {
-        assert(!writer->readonly);
-        newbuffer = PyUnicode_New(writer->size, maxchar);
-        if (newbuffer == NULL)
-            return -1;
-        _PyUnicode_FastCopyCharacters(newbuffer, 0,
-                                      writer->buffer, 0, writer->pos);
-        Py_SETREF(writer->buffer, newbuffer);
-    }
-    _PyUnicodeWriter_Update(writer);
-    return 0;
-
-#undef OVERALLOCATE_FACTOR
-}
-
-int
-_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
-                                     int kind)
-{
-    Py_UCS4 maxchar;
-
-    /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */
-    assert(writer->kind < kind);
-
-    switch (kind)
-    {
-    case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
-    case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
-    case PyUnicode_4BYTE_KIND: maxchar = MAX_UNICODE; break;
-    default:
-        Py_UNREACHABLE();
-    }
-
-    return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
-}
-
-int
-_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
-{
-    return _PyUnicodeWriter_WriteCharInline(writer, ch);
-}
-
-int
-PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
-{
-    if (ch > MAX_UNICODE) {
-        PyErr_SetString(PyExc_ValueError,
-                        "character must be in range(0x110000)");
-        return -1;
-    }
-
-    return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
-}
-
-int
-_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
-{
-    assert(PyUnicode_Check(str));
-
-    Py_UCS4 maxchar;
-    Py_ssize_t len;
-
-    len = PyUnicode_GET_LENGTH(str);
-    if (len == 0)
-        return 0;
-    maxchar = PyUnicode_MAX_CHAR_VALUE(str);
-    if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
-        if (writer->buffer == NULL && !writer->overallocate) {
-            assert(_PyUnicode_CheckConsistency(str, 1));
-            writer->readonly = 1;
-            writer->buffer = Py_NewRef(str);
-            _PyUnicodeWriter_Update(writer);
-            writer->pos += len;
-            return 0;
-        }
-        if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
-            return -1;
-    }
-    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
-                                  str, 0, len);
-    writer->pos += len;
-    return 0;
-}
-
-int
-PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
-{
-    PyTypeObject *type = Py_TYPE(obj);
-    if (type == &PyUnicode_Type) {
-        return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj);
-    }
-
-    if (type == &PyLong_Type) {
-        return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
-    }
-
-    PyObject *str = PyObject_Str(obj);
-    if (str == NULL) {
-        return -1;
-    }
-
-    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
-    Py_DECREF(str);
-    return res;
-}
-
-
-int
-PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
-{
-    if (Py_TYPE(obj) == &PyLong_Type) {
-        return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
-    }
-
-    PyObject *repr = PyObject_Repr(obj);
-    if (repr == NULL) {
-        return -1;
-    }
-
-    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr);
-    Py_DECREF(repr);
-    return res;
-}
-
-
-int
-_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
-                                Py_ssize_t start, Py_ssize_t end)
-{
-    assert(0 <= start);
-    assert(end <= PyUnicode_GET_LENGTH(str));
-    assert(start <= end);
-
-    if (start == 0 && end == PyUnicode_GET_LENGTH(str))
-        return _PyUnicodeWriter_WriteStr(writer, str);
-
-    Py_ssize_t len = end - start;
-    if (len == 0) {
-        return 0;
-    }
-
-    Py_UCS4 maxchar;
-    if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) {
-        maxchar = _PyUnicode_FindMaxChar(str, start, end);
-    }
-    else {
-        maxchar = writer->maxchar;
-    }
-    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) {
-        return -1;
-    }
-
-    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
-                                  str, start, len);
-    writer->pos += len;
-    return 0;
-}
-
-
-int
-PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
-                               Py_ssize_t start, Py_ssize_t end)
-{
-    if (!PyUnicode_Check(str)) {
-        PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
-        return -1;
-    }
-    if (start < 0 || start > end) {
-        PyErr_Format(PyExc_ValueError, "invalid start argument");
-        return -1;
-    }
-    if (end > PyUnicode_GET_LENGTH(str)) {
-        PyErr_Format(PyExc_ValueError, "invalid end argument");
-        return -1;
-    }
-
-    return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
-                                           start, end);
-}
-
-
-int
-_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
-                                  const char *ascii, Py_ssize_t len)
-{
-    if (len == -1)
-        len = strlen(ascii);
-
-    assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128);
-
-    if (writer->buffer == NULL && !writer->overallocate) {
-        PyObject *str;
-
-        str = _PyUnicode_FromASCII(ascii, len);
-        if (str == NULL)
-            return -1;
-
-        writer->readonly = 1;
-        writer->buffer = str;
-        _PyUnicodeWriter_Update(writer);
-        writer->pos += len;
-        return 0;
-    }
-
-    if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
-        return -1;
-
-    switch (writer->kind)
-    {
-    case PyUnicode_1BYTE_KIND:
-    {
-        const Py_UCS1 *str = (const Py_UCS1 *)ascii;
-        Py_UCS1 *data = writer->data;
-
-        memcpy(data + writer->pos, str, len);
-        break;
-    }
-    case PyUnicode_2BYTE_KIND:
-    {
-        _PyUnicode_CONVERT_BYTES(
-            Py_UCS1, Py_UCS2,
-            ascii, ascii + len,
-            (Py_UCS2 *)writer->data + writer->pos);
-        break;
-    }
-    case PyUnicode_4BYTE_KIND:
-    {
-        _PyUnicode_CONVERT_BYTES(
-            Py_UCS1, Py_UCS4,
-            ascii, ascii + len,
-            (Py_UCS4 *)writer->data + writer->pos);
-        break;
-    }
-    default:
-        Py_UNREACHABLE();
-    }
-
-    writer->pos += len;
-    return 0;
-}
-
-
-int
-PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
-                           const char *str,
-                           Py_ssize_t size)
-{
-    assert(writer != NULL);
-    _Py_AssertHoldsTstate();
-
-    _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
-    return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
-}
-
-
-int
-PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
-                          const char *str,
-                          Py_ssize_t size)
-{
-    if (size < 0) {
-        size = strlen(str);
-    }
-
-    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
-    Py_ssize_t old_pos = _writer->pos;
-    int res = unicode_decode_utf8_writer(_writer, str, size,
-                                         _Py_ERROR_STRICT, NULL, NULL);
-    if (res < 0) {
-        _writer->pos = old_pos;
-    }
-    return res;
-}
-
-
-int
-PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer,
-                                   const char *string,
-                                   Py_ssize_t length,
-                                   const char *errors,
-                                   Py_ssize_t *consumed)
-{
-    if (length < 0) {
-        length = strlen(string);
-    }
-
-    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
-    Py_ssize_t old_pos = _writer->pos;
-    int res = unicode_decode_utf8_writer(_writer, string, length,
-                                         _Py_ERROR_UNKNOWN, errors, consumed);
-    if (res < 0) {
-        _writer->pos = old_pos;
-        if (consumed) {
-            *consumed = 0;
-        }
-    }
-    return res;
-}
-
-
-int
-_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
-                                   const char *str, Py_ssize_t len)
-{
-    Py_UCS4 maxchar;
-
-    maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len);
-    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
-        return -1;
-    unicode_write_cstr(writer->buffer, writer->pos, str, len);
-    writer->pos += len;
-    return 0;
-}
-
-PyObject *
-_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
-{
-    PyObject *str;
-
-    if (writer->pos == 0) {
-        Py_CLEAR(writer->buffer);
-        _Py_RETURN_UNICODE_EMPTY();
-    }
-
-    str = writer->buffer;
-    writer->buffer = NULL;
-
-    if (writer->readonly) {
-        assert(PyUnicode_GET_LENGTH(str) == writer->pos);
-        return str;
-    }
-
-    if (PyUnicode_GET_LENGTH(str) != writer->pos) {
-        PyObject *str2;
-        str2 = resize_compact(str, writer->pos);
-        if (str2 == NULL) {
-            Py_DECREF(str);
-            return NULL;
-        }
-        str = str2;
-    }
-
-    assert(_PyUnicode_CheckConsistency(str, 1));
-    return unicode_result(str);
-}
-
-
-PyObject*
-PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
-{
-    PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
-    assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
-    _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
-    return str;
-}
-
-
-void
-_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
-{
-    Py_CLEAR(writer->buffer);
-}
-
 #include "stringlib/unicode_format.h"
 
 PyDoc_STRVAR(format__doc__,
@@ -14456,7 +13847,7 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
 {
     PyObject *unicode;
     if (x == NULL) {
-        unicode = unicode_get_empty();
+        unicode = _PyUnicode_GetEmpty();
     }
     else if (encoding == NULL && errors == NULL) {
         unicode = PyObject_Str(x);
@@ -14510,7 +13901,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args,
         return NULL;
     }
     if (nargs == 0) {
-        return unicode_get_empty();
+        return _PyUnicode_GetEmpty();
     }
     PyObject *object = args[0];
     if (nargs == 1) {
@@ -15186,7 +14577,7 @@ unicodeiter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
     if (it->it_seq != NULL) {
         return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
     } else {
-        PyObject *u = unicode_get_empty();
+        PyObject *u = _PyUnicode_GetEmpty();
         if (u == NULL) {
             Py_XDECREF(iter);
             return NULL;
diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj
index 02b6f35798f845..e65f201623fbbe 100644
--- a/PCbuild/_freeze_module.vcxproj
+++ b/PCbuild/_freeze_module.vcxproj
@@ -167,6 +167,7 @@
     <ClCompile Include="..\Objects\unicode_format.c" />
     <ClCompile Include="..\Objects\unicodectype.c" />
     <ClCompile Include="..\Objects\unicode_formatter.c" />
+    <ClCompile Include="..\Objects\unicode_writer.c" />
     <ClCompile Include="..\Objects\unicodeobject.c" />
     <ClCompile Include="..\Objects\unionobject.c" />
     <ClCompile Include="..\Objects\weakrefobject.c" />
diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters
index 39462a6380cd21..a9fb6f2328ad95 100644
--- a/PCbuild/_freeze_module.vcxproj.filters
+++ b/PCbuild/_freeze_module.vcxproj.filters
@@ -490,6 +490,9 @@
     <ClCompile Include="..\Objects\unicode_formatter.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\Objects\unicode_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\Objects\unicodeobject.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index a101c1b45cf25c..5e101ee1d0e697 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -562,6 +562,7 @@
     <ClCompile Include="..\Objects\unicode_format.c" />
     <ClCompile Include="..\Objects\unicodectype.c" />
     <ClCompile Include="..\Objects\unicode_formatter.c" />
+    <ClCompile Include="..\Objects\unicode_writer.c" />
     <ClCompile Include="..\Objects\unicodeobject.c" />
     <ClCompile Include="..\Objects\unionobject.c" />
     <ClCompile Include="..\Objects\weakrefobject.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index e3f261c2b92ab9..3d20ea41cd2476 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -1283,6 +1283,9 @@
     <ClCompile Include="..\Objects\unicode_formatter.c">
       <Filter>Objects</Filter>
     </ClCompile>
+    <ClCompile Include="..\Objects\unicode_writer.c">
+      <Filter>Objects</Filter>
+    </ClCompile>
     <ClCompile Include="..\Objects\unicodeobject.c">
       <Filter>Objects</Filter>
     </ClCompile>

From 6826166280d6518441a729b444173db205c4ab20 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 30 Oct 2025 15:55:39 +0200
Subject: [PATCH 11/13] gh-135801: Improve filtering by module in
 warn_explicit() without module argument (GH-140151)

* Try to match the module name pattern with module names constructed
  starting from different parent directories of the filename.
  E.g., for "/path/to/package/module" try to match with
  "path.to.package.module", "to.package.module", "package.module" and
  "module".
* Ignore trailing "/__init__.py".
* Ignore trailing ".pyw" on Windows.
* Keep matching with the full filename (without optional ".py" extension)
  for compatibility.
* Only ignore the case of the ".py" extension on Windows.
---
 Doc/library/warnings.rst                      | 13 +++-
 Doc/whatsnew/3.15.rst                         | 12 +++
 Lib/_py_warnings.py                           | 46 ++++++++++--
 Lib/test/test_ast/test_ast.py                 | 14 ++++
 Lib/test/test_builtin.py                      | 22 ++++++
 Lib/test/test_cmd_line_script.py              | 13 ++++
 Lib/test/test_compile.py                      | 14 ++++
 Lib/test/test_import/__init__.py              | 34 ++++++++-
 Lib/test/test_import/data/syntax_warnings.py  | 21 ++++++
 Lib/test/test_symtable.py                     | 15 ++++
 Lib/test/test_warnings/__init__.py            | 31 +++++---
 ...-10-16-17-17-20.gh-issue-135801.faH3fa.rst |  6 ++
 Python/_warnings.c                            | 75 ++++++-------------
 13 files changed, 243 insertions(+), 73 deletions(-)
 create mode 100644 Lib/test/test_import/data/syntax_warnings.py
 create mode 100644 Misc/NEWS.d/next/Library/2025-10-16-17-17-20.gh-issue-135801.faH3fa.rst

diff --git a/Doc/library/warnings.rst b/Doc/library/warnings.rst
index 03b7a8dc378ef3..2f3cf6008f58e2 100644
--- a/Doc/library/warnings.rst
+++ b/Doc/library/warnings.rst
@@ -487,7 +487,14 @@ Available Functions
    ignored.
 
    *module*, if supplied, should be the module name.
-   If no module is passed, the filename with ``.py`` stripped is used.
+   If no module is passed, the module regular expression in
+   :ref:`warnings filter <warning-filter>` will be tested against the module
+   names constructed from the path components starting from all parent
+   directories (with ``/__init__.py``, ``.py`` and, on Windows, ``.pyw``
+   stripped) and against the filename with ``.py`` stripped.
+   For example, when the filename is ``'/path/to/package/module.py'``, it will
+   be tested against  ``'path.to.package.module'``, ``'to.package.module'``
+   ``'package.module'``, ``'module'``, and ``'/path/to/package/module'``.
 
    *registry*, if supplied, should be the ``__warningregistry__`` dictionary
    of the module.
@@ -506,6 +513,10 @@ Available Functions
    .. versionchanged:: 3.6
       Add the *source* parameter.
 
+   .. versionchanged:: next
+      If no module is passed, test the filter regular expression against
+      module names created from the path, not only the path itself.
+
 
 .. function:: showwarning(message, category, filename, lineno, file=None, line=None)
 
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index fe9adfe9f730ec..903645fb40bb14 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -611,6 +611,18 @@ unittest
   (Contributed by Garry Cairns in :gh:`134567`.)
 
 
+warnings
+--------
+
+* Improve filtering by module in :func:`warnings.warn_explicit` if no *module*
+  argument is passed.
+  It now tests the module regular expression in the warnings filter not only
+  against the filename with ``.py`` stripped, but also against module names
+  constructed starting from different parent directories of the filename
+  (with ``/__init__.py``, ``.py`` and, on Windows, ``.pyw`` stripped).
+  (Contributed by Serhiy Storchaka in :gh:`135801`.)
+
+
 venv
 ----
 
diff --git a/Lib/_py_warnings.py b/Lib/_py_warnings.py
index 576a17ea7b8501..91a9f44b201733 100644
--- a/Lib/_py_warnings.py
+++ b/Lib/_py_warnings.py
@@ -520,20 +520,50 @@ def warn(message, category=None, stacklevel=1, source=None,
     )
 
 
+def _match_filename(pattern, filename, *, MS_WINDOWS=(sys.platform == 'win32')):
+    if not filename:
+        return pattern.match('<unknown>') is not None
+    if filename[0] == '<' and filename[-1] == '>':
+        return pattern.match(filename) is not None
+
+    is_py = (filename[-3:].lower() == '.py'
+             if MS_WINDOWS else
+             filename.endswith('.py'))
+    if is_py:
+        filename = filename[:-3]
+    if pattern.match(filename):  # for backward compatibility
+        return True
+    if MS_WINDOWS:
+        if not is_py and filename[-4:].lower() == '.pyw':
+            filename = filename[:-4]
+            is_py = True
+        if is_py and filename[-9:].lower() in (r'\__init__', '/__init__'):
+            filename = filename[:-9]
+        filename = filename.replace('\\', '/')
+    else:
+        if is_py and filename.endswith('/__init__'):
+            filename = filename[:-9]
+    filename = filename.replace('/', '.')
+    i = 0
+    while True:
+        if pattern.match(filename, i):
+            return True
+        i = filename.find('.', i) + 1
+        if not i:
+            return False
+
+
 def warn_explicit(message, category, filename, lineno,
                   module=None, registry=None, module_globals=None,
                   source=None):
     lineno = int(lineno)
-    if module is None:
-        module = filename or "<unknown>"
-        if module[-3:].lower() == ".py":
-            module = module[:-3] # XXX What about leading pathname?
     if isinstance(message, Warning):
         text = str(message)
         category = message.__class__
     else:
         text = message
         message = category(message)
+    modules = None
     key = (text, category, lineno)
     with _wm._lock:
         if registry is None:
@@ -549,9 +579,11 @@ def warn_explicit(message, category, filename, lineno,
             action, msg, cat, mod, ln = item
             if ((msg is None or msg.match(text)) and
                 issubclass(category, cat) and
-                (mod is None or mod.match(module)) and
-                (ln == 0 or lineno == ln)):
-                break
+                (ln == 0 or lineno == ln) and
+                (mod is None or (_match_filename(mod, filename)
+                                 if module is None else
+                                 mod.match(module)))):
+                    break
         else:
             action = _wm.defaultaction
         # Early exit actions
diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py
index a979a4b1da1ad1..551de5851daace 100644
--- a/Lib/test/test_ast/test_ast.py
+++ b/Lib/test/test_ast/test_ast.py
@@ -13,6 +13,7 @@
 import textwrap
 import types
 import unittest
+import warnings
 import weakref
 from io import StringIO
 from pathlib import Path
@@ -1069,6 +1070,19 @@ def test_tstring(self):
         self.assertIsInstance(tree.body[0].value.values[0], ast.Constant)
         self.assertIsInstance(tree.body[0].value.values[1], ast.Interpolation)
 
+    def test_filter_syntax_warnings_by_module(self):
+        filename = support.findfile('test_import/data/syntax_warnings.py')
+        with open(filename, 'rb') as f:
+            source = f.read()
+        with warnings.catch_warnings(record=True) as wlog:
+            warnings.simplefilter('error')
+            warnings.filterwarnings('always', module=r'<unknown>\z')
+            ast.parse(source)
+        self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10])
+        for wm in wlog:
+            self.assertEqual(wm.filename, '<unknown>')
+            self.assertIs(wm.category, SyntaxWarning)
+
 
 class CopyTests(unittest.TestCase):
     """Test copying and pickling AST nodes."""
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index fe3e391a7f5ba1..fba46af6617640 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -1088,6 +1088,28 @@ def four_freevars():
             three_freevars.__globals__,
             closure=my_closure)
 
+    def test_exec_filter_syntax_warnings_by_module(self):
+        filename = support.findfile('test_import/data/syntax_warnings.py')
+        with open(filename, 'rb') as f:
+            source = f.read()
+        with warnings.catch_warnings(record=True) as wlog:
+            warnings.simplefilter('error')
+            warnings.filterwarnings('always', module=r'<string>\z')
+            exec(source, {})
+        self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10, 13, 14, 21])
+        for wm in wlog:
+            self.assertEqual(wm.filename, '<string>')
+            self.assertIs(wm.category, SyntaxWarning)
+
+        with warnings.catch_warnings(record=True) as wlog:
+            warnings.simplefilter('error')
+            warnings.filterwarnings('always', module=r'<string>\z')
+            exec(source, {'__name__': 'package.module', '__file__': filename})
+        self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10, 13, 14, 21])
+        for wm in wlog:
+            self.assertEqual(wm.filename, '<string>')
+            self.assertIs(wm.category, SyntaxWarning)
+
 
     def test_filter(self):
         self.assertEqual(list(filter(lambda c: 'a' <= c <= 'z', 'Hello World')), list('elloorld'))
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index 784c45aa96f8a7..f8115cc8300df7 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -810,6 +810,19 @@ def test_script_as_dev_fd(self):
                 out, err = p.communicate()
                 self.assertEqual(out, b"12345678912345678912345\n")
 
+    def test_filter_syntax_warnings_by_module(self):
+        filename = support.findfile('test_import/data/syntax_warnings.py')
+        rc, out, err = assert_python_ok(
+            '-Werror',
+            '-Walways:::test.test_import.data.syntax_warnings',
+            filename)
+        self.assertEqual(err.count(b': SyntaxWarning: '), 6)
+
+        rc, out, err = assert_python_ok(
+            '-Werror',
+            '-Walways:::syntax_warnings',
+            filename)
+        self.assertEqual(err.count(b': SyntaxWarning: '), 6)
 
 
 def tearDownModule():
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index 846d38ae561fc5..9c2364491fe08d 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -1745,6 +1745,20 @@ def test_compile_warning_in_finally(self):
             self.assertEqual(wm.category, SyntaxWarning)
             self.assertIn("\"is\" with 'int' literal", str(wm.message))
 
+    def test_filter_syntax_warnings_by_module(self):
+        filename = support.findfile('test_import/data/syntax_warnings.py')
+        with open(filename, 'rb') as f:
+            source = f.read()
+        module_re = r'test\.test_import\.data\.syntax_warnings\z'
+        with warnings.catch_warnings(record=True) as wlog:
+            warnings.simplefilter('error')
+            warnings.filterwarnings('always', module=module_re)
+            compile(source, filename, 'exec')
+        self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10, 13, 14, 21])
+        for wm in wlog:
+            self.assertEqual(wm.filename, filename)
+            self.assertIs(wm.category, SyntaxWarning)
+
     @support.subTests('src', [
         textwrap.dedent("""
             def f():
diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py
index 95121debbbfa74..072021e595975a 100644
--- a/Lib/test/test_import/__init__.py
+++ b/Lib/test/test_import/__init__.py
@@ -15,6 +15,7 @@
 import os
 import py_compile
 import random
+import re
 import shutil
 import stat
 import subprocess
@@ -23,6 +24,7 @@
 import threading
 import time
 import types
+import warnings
 import unittest
 from unittest import mock
 import _imp
@@ -51,7 +53,7 @@
     TESTFN, rmtree, temp_umask, TESTFN_UNENCODABLE)
 from test.support import script_helper
 from test.support import threading_helper
-from test.test_importlib.util import uncache
+from test.test_importlib.util import uncache, temporary_pycache_prefix
 from types import ModuleType
 try:
     import _testsinglephase
@@ -412,7 +414,6 @@ def test_from_import_missing_attr_path_is_canonical(self):
         self.assertIsNotNone(cm.exception)
 
     def test_from_import_star_invalid_type(self):
-        import re
         with ready_to_import() as (name, path):
             with open(path, 'w', encoding='utf-8') as f:
                 f.write("__all__ = [b'invalid_type']")
@@ -1250,6 +1251,35 @@ class Spec2:
                 origin = "a\x00b"
             _imp.create_dynamic(Spec2())
 
+    def test_filter_syntax_warnings_by_module(self):
+        module_re = r'test\.test_import\.data\.syntax_warnings\z'
+        unload('test.test_import.data.syntax_warnings')
+        with (os_helper.temp_dir() as tmpdir,
+              temporary_pycache_prefix(tmpdir),
+              warnings.catch_warnings(record=True) as wlog):
+            warnings.simplefilter('error')
+            warnings.filterwarnings('always', module=module_re)
+            import test.test_import.data.syntax_warnings
+        self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10, 13, 14, 21])
+        filename = test.test_import.data.syntax_warnings.__file__
+        for wm in wlog:
+            self.assertEqual(wm.filename, filename)
+            self.assertIs(wm.category, SyntaxWarning)
+
+        module_re = r'syntax_warnings\z'
+        unload('test.test_import.data.syntax_warnings')
+        with (os_helper.temp_dir() as tmpdir,
+              temporary_pycache_prefix(tmpdir),
+              warnings.catch_warnings(record=True) as wlog):
+            warnings.simplefilter('error')
+            warnings.filterwarnings('always', module=module_re)
+            import test.test_import.data.syntax_warnings
+        self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10, 13, 14, 21])
+        filename = test.test_import.data.syntax_warnings.__file__
+        for wm in wlog:
+            self.assertEqual(wm.filename, filename)
+            self.assertIs(wm.category, SyntaxWarning)
+
 
 @skip_if_dont_write_bytecode
 class FilePermissionTests(unittest.TestCase):
diff --git a/Lib/test/test_import/data/syntax_warnings.py b/Lib/test/test_import/data/syntax_warnings.py
new file mode 100644
index 00000000000000..103f07b6187603
--- /dev/null
+++ b/Lib/test/test_import/data/syntax_warnings.py
@@ -0,0 +1,21 @@
+# Syntax warnings emitted in different parts of the Python compiler.
+
+# Parser/lexer/lexer.c
+x = 1or 0  # line 4
+
+# Parser/tokenizer/helpers.c
+'\z'  # line 7
+
+# Parser/string_parser.c
+'\400'  # line 10
+
+# _PyCompile_Warn() in Python/codegen.c
+assert(x, 'message')  # line 13
+x is 1  # line 14
+
+# _PyErr_EmitSyntaxWarning() in Python/ast_preprocess.c
+def f():
+    try:
+        pass
+    finally:
+        return 42  # line 21
diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py
index 943e63fc13c921..ef2c00e04b820c 100644
--- a/Lib/test/test_symtable.py
+++ b/Lib/test/test_symtable.py
@@ -5,6 +5,7 @@
 import re
 import textwrap
 import symtable
+import warnings
 import unittest
 
 from test import support
@@ -586,6 +587,20 @@ def test__symtable_refleak(self):
         # check error path when 'compile_type' AC conversion failed
         self.assertRaises(TypeError, symtable.symtable, '', mortal_str, 1)
 
+    def test_filter_syntax_warnings_by_module(self):
+        filename = support.findfile('test_import/data/syntax_warnings.py')
+        with open(filename, 'rb') as f:
+            source = f.read()
+        module_re = r'test\.test_import\.data\.syntax_warnings\z'
+        with warnings.catch_warnings(record=True) as wlog:
+            warnings.simplefilter('error')
+            warnings.filterwarnings('always', module=module_re)
+            symtable.symtable(source, filename, 'exec')
+        self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10])
+        for wm in wlog:
+            self.assertEqual(wm.filename, filename)
+            self.assertIs(wm.category, SyntaxWarning)
+
 
 class ComprehensionTests(unittest.TestCase):
     def get_identifiers_recursive(self, st, res):
diff --git a/Lib/test/test_warnings/__init__.py b/Lib/test/test_warnings/__init__.py
index 157852cfa91007..e6666ddc638037 100644
--- a/Lib/test/test_warnings/__init__.py
+++ b/Lib/test/test_warnings/__init__.py
@@ -249,10 +249,23 @@ def test_filter_module(self):
             self.module.warn_explicit('msg', UserWarning, 'filename', 42,
                                       module='package.module')
             self.assertEqual(len(w), 1)
+            self.module.warn_explicit('msg', UserWarning, '/path/to/package/module', 42)
+            self.assertEqual(len(w), 2)
+            self.module.warn_explicit('msg', UserWarning, '/path/to/package/module.py', 42)
+            self.assertEqual(len(w), 3)
+            self.module.warn_explicit('msg', UserWarning, '/path/to/package/module/__init__.py', 42)
+            self.assertEqual(len(w), 4)
             with self.assertRaises(UserWarning):
-                self.module.warn_explicit('msg', UserWarning, '/path/to/package/module', 42)
-            with self.assertRaises(UserWarning):
-                self.module.warn_explicit('msg', UserWarning, '/path/to/package/module.py', 42)
+                self.module.warn_explicit('msg', UserWarning, '/path/to/package/module/__init__', 42)
+            if MS_WINDOWS:
+                self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module.PY', 42)
+                self.assertEqual(len(w), 5)
+                self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module\__INIT__.PY', 42)
+                self.assertEqual(len(w), 6)
+                self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module.PYW', 42)
+                self.assertEqual(len(w), 7)
+                self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module\__INIT__.PYW', 42)
+                self.assertEqual(len(w), 8)
 
         with self.module.catch_warnings(record=True) as w:
             self.module.simplefilter('error')
@@ -276,9 +289,8 @@ def test_filter_module(self):
             with self.assertRaises(UserWarning):
                 self.module.warn_explicit('msg', UserWarning, '/PATH/TO/PACKAGE/MODULE', 42)
             if MS_WINDOWS:
-                if self.module is py_warnings:
-                    self.module.warn_explicit('msg', UserWarning, r'/path/to/package/module.PY', 42)
-                    self.assertEqual(len(w), 3)
+                self.module.warn_explicit('msg', UserWarning, r'/path/to/package/module.PY', 42)
+                self.assertEqual(len(w), 3)
                 with self.assertRaises(UserWarning):
                     self.module.warn_explicit('msg', UserWarning, r'/path/to/package/module/__init__.py', 42)
                 with self.assertRaises(UserWarning):
@@ -302,9 +314,8 @@ def test_filter_module(self):
                 self.assertEqual(len(w), 1)
                 self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module.py', 42)
                 self.assertEqual(len(w), 2)
-                if self.module is py_warnings:
-                    self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module.PY', 42)
-                    self.assertEqual(len(w), 3)
+                self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module.PY', 42)
+                self.assertEqual(len(w), 3)
                 with self.assertRaises(UserWarning):
                     self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module.pyw', 42)
                 with self.assertRaises(UserWarning):
@@ -399,7 +410,7 @@ def test_message_matching(self):
 
     def test_mutate_filter_list(self):
         class X:
-            def match(self, a):
+            def match(self, a, start=0):
                 L[:] = []
 
         L = [("default",X(),UserWarning,X(),0) for i in range(2)]
diff --git a/Misc/NEWS.d/next/Library/2025-10-16-17-17-20.gh-issue-135801.faH3fa.rst b/Misc/NEWS.d/next/Library/2025-10-16-17-17-20.gh-issue-135801.faH3fa.rst
new file mode 100644
index 00000000000000..d680312d5829fb
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-16-17-17-20.gh-issue-135801.faH3fa.rst
@@ -0,0 +1,6 @@
+Improve filtering by module in :func:`warnings.warn_explicit` if no *module*
+argument is passed. It now tests the module regular expression in the
+warnings filter not only against the filename with ``.py`` stripped, but
+also against module names constructed starting from different parent
+directories of the filename (with ``/__init__.py``, ``.py`` and, on Windows,
+``.pyw`` stripped).
diff --git a/Python/_warnings.c b/Python/_warnings.c
index 9989b623dbce3a..d44d414bc93a04 100644
--- a/Python/_warnings.c
+++ b/Python/_warnings.c
@@ -171,7 +171,7 @@ _PyWarnings_InitState(PyInterpreterState *interp)
 /*************************************************************************/
 
 static int
-check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg)
+check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg, PyObject *arg2)
 {
     PyObject *result;
     int rc;
@@ -182,6 +182,9 @@ check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg)
 
     /* An internal plain text default filter must match exactly */
     if (PyUnicode_CheckExact(obj)) {
+        if (arg == NULL) {
+            return 0;
+        }
         int cmp_result = PyUnicode_Compare(obj, arg);
         if (cmp_result == -1 && PyErr_Occurred()) {
             return -1;
@@ -190,10 +193,19 @@ check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg)
     }
 
     /* Otherwise assume a regex filter and call its match() method */
-    result = PyObject_CallMethodOneArg(obj, &_Py_ID(match), arg);
+    if (arg != NULL) {
+        result = PyObject_CallMethodOneArg(obj, &_Py_ID(match), arg);
+    }
+    else {
+        PyObject *match = PyImport_ImportModuleAttrString("_py_warnings", "_match_filename");
+        if (match == NULL) {
+            return -1;
+        }
+        result = PyObject_CallFunctionObjArgs(match, obj, arg2, NULL);
+        Py_DECREF(match);
+    }
     if (result == NULL)
         return -1;
-
     rc = PyObject_IsTrue(result);
     Py_DECREF(result);
     return rc;
@@ -423,7 +435,7 @@ get_default_action(PyInterpreterState *interp)
 static bool
 filter_search(PyInterpreterState *interp, PyObject *category,
               PyObject *text, Py_ssize_t lineno,
-              PyObject *module, char *list_name, PyObject *filters,
+              PyObject *module, PyObject *filename, char *list_name, PyObject *filters,
               PyObject **item, PyObject **matched_action) {
     bool result = true;
     *matched_action = NULL;
@@ -459,14 +471,14 @@ filter_search(PyInterpreterState *interp, PyObject *category,
             break;
         }
 
-        good_msg = check_matched(interp, msg, text);
+        good_msg = check_matched(interp, msg, text, NULL);
         if (good_msg == -1) {
             Py_DECREF(tmp_item);
             result = false;
             break;
         }
 
-        good_mod = check_matched(interp, mod, module);
+        good_mod = check_matched(interp, mod, module, filename);
         if (good_mod == -1) {
             Py_DECREF(tmp_item);
             result = false;
@@ -504,7 +516,7 @@ filter_search(PyInterpreterState *interp, PyObject *category,
 static PyObject*
 get_filter(PyInterpreterState *interp, PyObject *category,
            PyObject *text, Py_ssize_t lineno,
-           PyObject *module, PyObject **item)
+           PyObject *module, PyObject *filename, PyObject **item)
 {
 #ifdef Py_DEBUG
     WarningsState *st = warnings_get_state(interp);
@@ -522,7 +534,7 @@ get_filter(PyInterpreterState *interp, PyObject *category,
         use_global_filters = true;
     } else {
         PyObject *context_action = NULL;
-        if (!filter_search(interp, category, text, lineno, module, "_warnings_context _filters",
+        if (!filter_search(interp, category, text, lineno, module, filename, "_warnings_context _filters",
                            context_filters, item, &context_action)) {
             Py_DECREF(context_filters);
             return NULL;
@@ -541,7 +553,7 @@ get_filter(PyInterpreterState *interp, PyObject *category,
         if (filters == NULL) {
             return NULL;
         }
-        if (!filter_search(interp, category, text, lineno, module, "filters",
+        if (!filter_search(interp, category, text, lineno, module, filename, "filters",
                            filters, item, &action)) {
             return NULL;
         }
@@ -612,39 +624,6 @@ already_warned(PyInterpreterState *interp, PyObject *registry, PyObject *key,
     return 0;
 }
 
-/* New reference. */
-static PyObject *
-normalize_module(PyObject *filename)
-{
-    PyObject *module;
-    int kind;
-    const void *data;
-    Py_ssize_t len;
-
-    len = PyUnicode_GetLength(filename);
-    if (len < 0)
-        return NULL;
-
-    if (len == 0)
-        return PyUnicode_FromString("<unknown>");
-
-    kind = PyUnicode_KIND(filename);
-    data = PyUnicode_DATA(filename);
-
-    /* if filename.endswith(".py"): */
-    if (len >= 3 &&
-        PyUnicode_READ(kind, data, len-3) == '.' &&
-        PyUnicode_READ(kind, data, len-2) == 'p' &&
-        PyUnicode_READ(kind, data, len-1) == 'y')
-    {
-        module = PyUnicode_Substring(filename, 0, len-3);
-    }
-    else {
-        module = Py_NewRef(filename);
-    }
-    return module;
-}
-
 static int
 update_registry(PyInterpreterState *interp, PyObject *registry, PyObject *text,
                 PyObject *category, int add_zero)
@@ -812,15 +791,6 @@ warn_explicit(PyThreadState *tstate, PyObject *category, PyObject *message,
         return NULL;
     }
 
-    /* Normalize module. */
-    if (module == NULL) {
-        module = normalize_module(filename);
-        if (module == NULL)
-            return NULL;
-    }
-    else
-        Py_INCREF(module);
-
     /* Normalize message. */
     Py_INCREF(message);  /* DECREF'ed in cleanup. */
     if (PyObject_TypeCheck(message, (PyTypeObject *)PyExc_Warning)) {
@@ -858,7 +828,7 @@ warn_explicit(PyThreadState *tstate, PyObject *category, PyObject *message,
         /* Else this warning hasn't been generated before. */
     }
 
-    action = get_filter(interp, category, text, lineno, module, &item);
+    action = get_filter(interp, category, text, lineno, module, filename, &item);
     if (action == NULL)
         goto cleanup;
 
@@ -921,7 +891,6 @@ warn_explicit(PyThreadState *tstate, PyObject *category, PyObject *message,
     Py_XDECREF(key);
     Py_XDECREF(text);
     Py_XDECREF(lineno_obj);
-    Py_DECREF(module);
     Py_XDECREF(message);
     return result;  /* Py_None or NULL. */
 }

From a3ce2f77f0813c214896ec66be3a26121f52361e Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Date: Thu, 30 Oct 2025 14:31:47 +0000
Subject: [PATCH 12/13] gh-55531: Implement `normalize_encoding` in C (#136643)

Closes gh-55531
---
 Lib/encodings/__init__.py                     | 14 +---
 ...5-07-14-09-33-17.gh-issue-55531.Gt2e12.rst |  4 ++
 Modules/_codecsmodule.c                       | 42 ++++++++++++
 Modules/clinic/_codecsmodule.c.h              | 66 ++++++++++++++++++-
 Objects/unicodeobject.c                       | 15 +++--
 Python/fileutils.c                            |  4 +-
 6 files changed, 123 insertions(+), 22 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2025-07-14-09-33-17.gh-issue-55531.Gt2e12.rst

diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 298177eb8003a7..e7e4ca3358e0f9 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -30,6 +30,7 @@
 
 import codecs
 import sys
+from _codecs import _normalize_encoding
 from . import aliases
 
 _cache = {}
@@ -55,18 +56,7 @@ def normalize_encoding(encoding):
     if isinstance(encoding, bytes):
         encoding = str(encoding, "ascii")
 
-    chars = []
-    punct = False
-    for c in encoding:
-        if c.isalnum() or c == '.':
-            if punct and chars:
-                chars.append('_')
-            if c.isascii():
-                chars.append(c)
-            punct = False
-        else:
-            punct = True
-    return ''.join(chars)
+    return _normalize_encoding(encoding)
 
 def search_function(encoding):
 
diff --git a/Misc/NEWS.d/next/Library/2025-07-14-09-33-17.gh-issue-55531.Gt2e12.rst b/Misc/NEWS.d/next/Library/2025-07-14-09-33-17.gh-issue-55531.Gt2e12.rst
new file mode 100644
index 00000000000000..70e39a4f2c167c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-07-14-09-33-17.gh-issue-55531.Gt2e12.rst
@@ -0,0 +1,4 @@
+:mod:`encodings`: Improve :func:`~encodings.normalize_encoding` performance
+by implementing the function in C using the private
+``_Py_normalize_encoding`` which has been modified to make lowercase
+conversion optional.
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index bdffeced7da5a9..2f2edbb05ab5c5 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -1018,6 +1018,47 @@ _codecs_lookup_error_impl(PyObject *module, const char *name)
     return PyCodec_LookupError(name);
 }
 
+extern int _Py_normalize_encoding(const char *, char *, size_t, int);
+
+/*[clinic input]
+_codecs._normalize_encoding
+    encoding: unicode
+
+Normalize an encoding name *encoding*.
+
+Used for encodings.normalize_encoding. Does not convert to lower case.
+[clinic start generated code]*/
+
+static PyObject *
+_codecs__normalize_encoding_impl(PyObject *module, PyObject *encoding)
+/*[clinic end generated code: output=d27465d81e361f8e input=3ff3f4d64995b988]*/
+{
+    Py_ssize_t len;
+    const char *cstr = PyUnicode_AsUTF8AndSize(encoding, &len);
+    if (cstr == NULL) {
+        return NULL;
+    }
+
+    if (len > PY_SSIZE_T_MAX) {
+        PyErr_SetString(PyExc_OverflowError, "encoding is too large");
+        return NULL;
+    }
+
+    char *normalized = PyMem_Malloc(len + 1);
+    if (normalized == NULL) {
+        return PyErr_NoMemory();
+    }
+
+    if (!_Py_normalize_encoding(cstr, normalized, len + 1, 0)) {
+        PyMem_Free(normalized);
+        return NULL;
+    }
+
+    PyObject *result = PyUnicode_FromString(normalized);
+    PyMem_Free(normalized);
+    return result;
+}
+
 /* --- Module API --------------------------------------------------------- */
 
 static PyMethodDef _codecs_functions[] = {
@@ -1067,6 +1108,7 @@ static PyMethodDef _codecs_functions[] = {
     _CODECS_REGISTER_ERROR_METHODDEF
     _CODECS__UNREGISTER_ERROR_METHODDEF
     _CODECS_LOOKUP_ERROR_METHODDEF
+    _CODECS__NORMALIZE_ENCODING_METHODDEF
     {NULL, NULL}                /* sentinel */
 };
 
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
index b0310325759326..9e2a7950ebde64 100644
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -2779,6 +2779,70 @@ _codecs_lookup_error(PyObject *module, PyObject *arg)
     return return_value;
 }
 
+PyDoc_STRVAR(_codecs__normalize_encoding__doc__,
+"_normalize_encoding($module, /, encoding)\n"
+"--\n"
+"\n"
+"Normalize an encoding name *encoding*.\n"
+"\n"
+"Used for encodings.normalize_encoding. Does not convert to lower case.");
+
+#define _CODECS__NORMALIZE_ENCODING_METHODDEF    \
+    {"_normalize_encoding", _PyCFunction_CAST(_codecs__normalize_encoding), METH_FASTCALL|METH_KEYWORDS, _codecs__normalize_encoding__doc__},
+
+static PyObject *
+_codecs__normalize_encoding_impl(PyObject *module, PyObject *encoding);
+
+static PyObject *
+_codecs__normalize_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 1
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        Py_hash_t ob_hash;
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_hash = -1,
+        .ob_item = { &_Py_ID(encoding), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"encoding", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "_normalize_encoding",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[1];
+    PyObject *encoding;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+            /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("_normalize_encoding", "argument 'encoding'", "str", args[0]);
+        goto exit;
+    }
+    encoding = args[0];
+    return_value = _codecs__normalize_encoding_impl(module, encoding);
+
+exit:
+    return return_value;
+}
+
 #ifndef _CODECS_MBCS_DECODE_METHODDEF
     #define _CODECS_MBCS_DECODE_METHODDEF
 #endif /* !defined(_CODECS_MBCS_DECODE_METHODDEF) */
@@ -2802,4 +2866,4 @@ _codecs_lookup_error(PyObject *module, PyObject *arg)
 #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
     #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=ed13f20dfb09e306 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=a968c493bb28be3e input=a9049054013a1b77]*/
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1c443e88e05029..4e8c132327b7d0 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3449,13 +3449,14 @@ PyUnicode_FromEncodedObject(PyObject *obj,
     return v;
 }
 
-/* Normalize an encoding name: similar to encodings.normalize_encoding(), but
-   also convert to lowercase. Return 1 on success, or 0 on error (encoding is
-   longer than lower_len-1). */
+/* Normalize an encoding name like encodings.normalize_encoding()
+   but allow to convert to lowercase if *to_lower* is true.
+   Return 1 on success, or 0 on error (encoding is longer than lower_len-1). */
 int
 _Py_normalize_encoding(const char *encoding,
                        char *lower,
-                       size_t lower_len)
+                       size_t lower_len,
+                       int to_lower)
 {
     const char *e;
     char *l;
@@ -3486,7 +3487,7 @@ _Py_normalize_encoding(const char *encoding,
             if (l == l_end) {
                 return 0;
             }
-            *l++ = Py_TOLOWER(c);
+            *l++ = to_lower ? Py_TOLOWER(c) : c;
         }
         else {
             punct = 1;
@@ -3521,7 +3522,7 @@ PyUnicode_Decode(const char *s,
     }
 
     /* Shortcuts for common default encodings */
-    if (_Py_normalize_encoding(encoding, buflower, sizeof(buflower))) {
+    if (_Py_normalize_encoding(encoding, buflower, sizeof(buflower), 1)) {
         char *lower = buflower;
 
         /* Fast paths */
@@ -3778,7 +3779,7 @@ PyUnicode_AsEncodedString(PyObject *unicode,
     }
 
     /* Shortcuts for common default encodings */
-    if (_Py_normalize_encoding(encoding, buflower, sizeof(buflower))) {
+    if (_Py_normalize_encoding(encoding, buflower, sizeof(buflower), 1)) {
         char *lower = buflower;
 
         /* Fast paths */
diff --git a/Python/fileutils.c b/Python/fileutils.c
index b808229716fd9c..93abd70a34d420 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -178,7 +178,7 @@ _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
 
 #define USE_FORCE_ASCII
 
-extern int _Py_normalize_encoding(const char *, char *, size_t);
+extern int _Py_normalize_encoding(const char *, char *, size_t, int);
 
 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
    and POSIX locale. nl_langinfo(CODESET) announces an alias of the
@@ -229,7 +229,7 @@ check_force_ascii(void)
     }
 
     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
-    if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
+    if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding), 1)) {
         goto error;
     }
 

From 4e6dba0ef74523a52f66547c16b9972664b18fd4 Mon Sep 17 00:00:00 2001
From: yihong <zouzou0208@gmail.com>
Date: Thu, 30 Oct 2025 23:14:06 +0800
Subject: [PATCH 13/13] gh-139246: zero-width word paste can be wrong in
 default repl (GH-139254)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
Co-authored-by: grayjk <grayjk@gmail.com>
---
 Lib/_pyrepl/utils.py                          |  6 +++++
 Lib/test/test_pyrepl/test_utils.py            | 23 ++++++++++++++++++-
 ...-09-23-09-46-46.gh-issue-139246.pzfM-w.rst |  1 +
 3 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst

diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py
index 64708e843b685b..06cddef851bb40 100644
--- a/Lib/_pyrepl/utils.py
+++ b/Lib/_pyrepl/utils.py
@@ -63,6 +63,12 @@ class ColorSpan(NamedTuple):
 def str_width(c: str) -> int:
     if ord(c) < 128:
         return 1
+    # gh-139246 for zero-width joiner and combining characters
+    if unicodedata.combining(c):
+        return 0
+    category = unicodedata.category(c)
+    if category == "Cf" and c != "\u00ad":
+        return 0
     w = unicodedata.east_asian_width(c)
     if w in ("N", "Na", "H", "A"):
         return 1
diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py
index 05a4f329059835..656a1e441e0e47 100644
--- a/Lib/test/test_pyrepl/test_utils.py
+++ b/Lib/test/test_pyrepl/test_utils.py
@@ -5,10 +5,29 @@
 
 class TestUtils(TestCase):
     def test_str_width(self):
-        characters = ['a', '1', '_', '!', '\x1a', '\u263A', '\uffb9']
+        characters = [
+            'a',
+            '1',
+            '_',
+            '!',
+            '\x1a',
+            '\u263A',
+            '\uffb9',
+            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
+            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
+            '\u00ad',
+        ]
         for c in characters:
             self.assertEqual(str_width(c), 1)
 
+        zero_width_characters = [
+            '\N{COMBINING ACUTE ACCENT}',
+            '\N{ZERO WIDTH JOINER}',
+        ]
+        for c in zero_width_characters:
+            with self.subTest(character=c):
+                self.assertEqual(str_width(c), 0)
+
         characters = [chr(99989), chr(99999)]
         for c in characters:
             self.assertEqual(str_width(c), 2)
@@ -25,6 +44,8 @@ def test_wlen(self):
 
         self.assertEqual(wlen('hello'), 5)
         self.assertEqual(wlen('hello' + '\x1a'), 7)
+        self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
+        self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
 
     def test_prev_next_window(self):
         def gen_normal():
diff --git a/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst
new file mode 100644
index 00000000000000..a816bda5cfe8e8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst
@@ -0,0 +1 @@
+fix: paste zero-width in default repl width is wrong.