TechPenguineer · pull · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/Doc/library/logging.rst b/Doc/library/logging.rst
@@ -1082,12 +1082,13 @@ LoggerAdapter Objects
 information into logging calls. For a usage example, see the section on
 :ref:`adding contextual information to your logging output <context-info>`.
 
-.. class:: LoggerAdapter(logger, extra, merge_extra=False)
+.. class:: LoggerAdapter(logger, extra=None, merge_extra=False)
 
    Returns an instance of :class:`LoggerAdapter` initialized with an
-   underlying :class:`Logger` instance, a dict-like object (*extra*), and a
-   boolean (*merge_extra*) indicating whether or not the *extra* argument of
-   individual log calls should be merged with the :class:`LoggerAdapter` extra.
+   underlying :class:`Logger` instance, an optional dict-like object (*extra*),
+   and an optional boolean (*merge_extra*) indicating whether or not
+   the *extra* argument of individual log calls should be merged with
+   the :class:`LoggerAdapter` extra.
    The default behavior is to ignore the *extra* argument of individual log
    calls and only use the one of the :class:`LoggerAdapter` instance
 
@@ -1127,9 +1128,13 @@ information into logging calls. For a usage example, see the section on
       Attribute :attr:`!manager` and method :meth:`!_log` were added, which
       delegate to the underlying logger and allow adapters to be nested.
 
+   .. versionchanged:: 3.10
+
+      The *extra* argument is now optional.
+
    .. versionchanged:: 3.13
 
-      The *merge_extra* argument was added.
+      The *merge_extra* parameter was added.
 
 
 Thread Safety

diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst
@@ -31,7 +31,7 @@ following functions:
    this module.
 
 
-.. function:: lookup(name)
+.. function:: lookup(name, /)
 
    Look up character by name.  If a character with the given name is found, return
    the corresponding character.  If not found, :exc:`KeyError` is raised.
@@ -94,7 +94,7 @@ following functions:
       0.5
 
 
-.. function:: category(chr)
+.. function:: category(chr, /)
 
    Returns the general category assigned to the character *chr* as
    string. General category names consist of two letters.
@@ -106,7 +106,7 @@ following functions:
       'Lu'
 
 
-.. function:: bidirectional(chr)
+.. function:: bidirectional(chr, /)
 
    Returns the bidirectional class assigned to the character *chr* as
    string. If no such value is defined, an empty string is returned.
@@ -118,7 +118,7 @@ following functions:
       'AN'
 
 
-.. function:: combining(chr)
+.. function:: combining(chr, /)
 
    Returns the canonical combining class assigned to the character *chr*
    as integer. Returns ``0`` if no combining class is defined.
@@ -127,14 +127,14 @@ following functions:
    for more information.
 
 
-.. function:: east_asian_width(chr)
+.. function:: east_asian_width(chr, /)
 
    Returns the east asian width assigned to the character *chr* as
    string. For a list of widths and or more information, see the
    `Unicode Standard Annex #11 <https://www.unicode.org/reports/tr11/>`_.
 
 
-.. function:: mirrored(chr)
+.. function:: mirrored(chr, /)
 
    Returns the mirrored property assigned to the character *chr* as
    integer. Returns ``1`` if the character has been identified as a "mirrored"
@@ -144,7 +144,37 @@ following functions:
       1
 
 
-.. function:: decomposition(chr)
+.. function:: isxidstart(chr, /)
+
+   Return ``True`` if *chr* is a valid identifier start per the
+   `Unicode Standard Annex #31 <https://www.unicode.org/reports/tr31/>`_,
+   that is, it has the ``XID_Start`` property. Return ``False`` otherwise.
+   For example::
+
+      >>> unicodedata.isxidstart('S')
+      True
+      >>> unicodedata.isxidstart('0')
+      False
+
+   .. versionadded:: next
+
+
+.. function:: isxidcontinue(chr, /)
+
+   Return ``True`` if *chr* is a valid identifier character per the
+   `Unicode Standard Annex #31 <https://www.unicode.org/reports/tr31/>`_,
+   that is, it has the ``XID_Continue`` property. Return ``False`` otherwise.
+   For example::
+
+      >>> unicodedata.isxidcontinue('S')
+      True
+      >>> unicodedata.isxidcontinue(' ')
+      False
+
+   .. versionadded:: next
+
+
+.. function:: decomposition(chr, /)
 
    Returns the character decomposition mapping assigned to the character
    *chr* as string. An empty string is returned in case no such mapping is
@@ -154,7 +184,7 @@ following functions:
       '0041 0303'
 
 
-.. function:: normalize(form, unistr)
+.. function:: normalize(form, unistr, /)
 
    Return the normal form *form* for the Unicode string *unistr*. Valid values for
    *form* are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
@@ -187,7 +217,7 @@ following functions:
    doesn't, they may not compare equal.
 
 
-.. function:: is_normalized(form, unistr)
+.. function:: is_normalized(form, unistr, /)
 
    Return whether the Unicode string *unistr* is in the normal form *form*. Valid
    values for *form* are 'NFC', 'NFKC', 'NFD', and 'NFKD'.

diff --git a/Doc/library/warnings.rst b/Doc/library/warnings.rst
@@ -487,7 +487,14 @@ Available Functions
    ignored.
 
    *module*, if supplied, should be the module name.
-   If no module is passed, the filename with ``.py`` stripped is used.
+   If no module is passed, the module regular expression in
+   :ref:`warnings filter <warning-filter>` will be tested against the module
+   names constructed from the path components starting from all parent
+   directories (with ``/__init__.py``, ``.py`` and, on Windows, ``.pyw``
+   stripped) and against the filename with ``.py`` stripped.
+   For example, when the filename is ``'/path/to/package/module.py'``, it will
+   be tested against  ``'path.to.package.module'``, ``'to.package.module'``
+   ``'package.module'``, ``'module'``, and ``'/path/to/package/module'``.
 
    *registry*, if supplied, should be the ``__warningregistry__`` dictionary
    of the module.
@@ -506,6 +513,10 @@ Available Functions
    .. versionchanged:: 3.6
       Add the *source* parameter.
 
+   .. versionchanged:: next
+      If no module is passed, test the filter regular expression against
+      module names created from the path, not only the path itself.
+
 
 .. function:: showwarning(message, category, filename, lineno, file=None, line=None)
 

diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
@@ -611,6 +611,18 @@ unittest
   (Contributed by Garry Cairns in :gh:`134567`.)
 
 
+warnings
+--------
+
+* Improve filtering by module in :func:`warnings.warn_explicit` if no *module*
+  argument is passed.
+  It now tests the module regular expression in the warnings filter not only
+  against the filename with ``.py`` stripped, but also against module names
+  constructed starting from different parent directories of the filename
+  (with ``/__init__.py``, ``.py`` and, on Windows, ``.pyw`` stripped).
+  (Contributed by Serhiy Storchaka in :gh:`135801`.)
+
+
 venv
 ----
 
@@ -794,6 +806,11 @@ unicodedata
 
 * The Unicode database has been updated to Unicode 17.0.0.
 
+* Add :func:`unicodedata.isxidstart` and :func:`unicodedata.isxidcontinue`
+  functions to check whether a character can start or continue a
+  `Unicode Standard Annex #31 <https://www.unicode.org/reports/tr31/>`_ identifier.
+  (Contributed by Stan Ulbrych in :gh:`129117`.)
+
 
 wave
 ----

diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h
@@ -49,7 +49,8 @@ extern int _PyAST_Preprocess(
     PyObject *filename,
     int optimize,
     int ff_features,
-    int syntax_check_only);
+    int syntax_check_only,
+    int enable_warnings);
 
 
 typedef struct {

diff --git a/Include/internal/pycore_unicodectype.h b/Include/internal/pycore_unicodectype.h
@@ -0,0 +1,25 @@
+#ifndef Py_INTERNAL_UNICODECTYPE_H
+#define Py_INTERNAL_UNICODECTYPE_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+extern int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res);
+extern int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res);
+extern int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res);
+extern int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res);
+extern int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch);
+extern int _PyUnicode_IsCased(Py_UCS4 ch);
+
+// Export for 'unicodedata' shared extension.
+PyAPI_FUNC(int) _PyUnicode_IsXidStart(Py_UCS4 ch);
+PyAPI_FUNC(int) _PyUnicode_IsXidContinue(Py_UCS4 ch);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_UNICODECTYPE_H */
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
@@ -17,6 +17,46 @@ extern "C" {
 
 
 extern int _PyUnicode_IsModifiable(PyObject *unicode);
+extern void _PyUnicodeWriter_InitWithBuffer(
+    _PyUnicodeWriter *writer,
+    PyObject *buffer);
+extern PyObject* _PyUnicode_Result(PyObject *unicode);
+extern int _PyUnicode_DecodeUTF8Writer(
+    _PyUnicodeWriter *writer,
+    const char *s,
+    Py_ssize_t size,
+    _Py_error_handler error_handler,
+    const char *errors,
+    Py_ssize_t *consumed);
+extern PyObject* _PyUnicode_ResizeCompact(
+    PyObject *unicode,
+    Py_ssize_t length);
+extern PyObject* _PyUnicode_GetEmpty(void);
+
+
+/* Generic helper macro to convert characters of different types.
+   from_type and to_type have to be valid type names, begin and end
+   are pointers to the source characters which should be of type
+   "from_type *".  to is a pointer of type "to_type *" and points to the
+   buffer where the result characters are written to. */
+#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
+    do {                                                \
+        to_type *_to = (to_type *)(to);                 \
+        const from_type *_iter = (const from_type *)(begin);\
+        const from_type *_end = (const from_type *)(end);\
+        Py_ssize_t n = (_end) - (_iter);                \
+        const from_type *_unrolled_end =                \
+            _iter + _Py_SIZE_ROUND_DOWN(n, 4);          \
+        while (_iter < (_unrolled_end)) {               \
+            _to[0] = (to_type) _iter[0];                \
+            _to[1] = (to_type) _iter[1];                \
+            _to[2] = (to_type) _iter[2];                \
+            _to[3] = (to_type) _iter[3];                \
+            _iter += 4; _to += 4;                       \
+        }                                               \
+        while (_iter < (_end))                          \
+            *_to++ = (to_type) *_iter++;                \
+    } while (0)
 
 
 static inline void
@@ -74,18 +114,6 @@ _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
     return 0;
 }
 
-
-/* --- Characters Type APIs ----------------------------------------------- */
-
-extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
-extern int _PyUnicode_IsXidContinue(Py_UCS4 ch);
-extern int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch);
-extern int _PyUnicode_IsCased(Py_UCS4 ch);
-
 /* --- Unicode API -------------------------------------------------------- */
 
 // Export for '_json' shared extension

diff --git a/Lib/_py_warnings.py b/Lib/_py_warnings.py
@@ -520,20 +520,50 @@ def warn(message, category=None, stacklevel=1, source=None,
     )
 
 
+def _match_filename(pattern, filename, *, MS_WINDOWS=(sys.platform == 'win32')):
+    if not filename:
+        return pattern.match('<unknown>') is not None
+    if filename[0] == '<' and filename[-1] == '>':
+        return pattern.match(filename) is not None
+
+    is_py = (filename[-3:].lower() == '.py'
+             if MS_WINDOWS else
+             filename.endswith('.py'))
+    if is_py:
+        filename = filename[:-3]
+    if pattern.match(filename):  # for backward compatibility
+        return True
+    if MS_WINDOWS:
+        if not is_py and filename[-4:].lower() == '.pyw':
+            filename = filename[:-4]
+            is_py = True
+        if is_py and filename[-9:].lower() in (r'\__init__', '/__init__'):
+            filename = filename[:-9]
+        filename = filename.replace('\\', '/')
+    else:
+        if is_py and filename.endswith('/__init__'):
+            filename = filename[:-9]
+    filename = filename.replace('/', '.')
+    i = 0
+    while True:
+        if pattern.match(filename, i):
+            return True
+        i = filename.find('.', i) + 1
+        if not i:
+            return False
+
+
 def warn_explicit(message, category, filename, lineno,
                   module=None, registry=None, module_globals=None,
                   source=None):
     lineno = int(lineno)
-    if module is None:
-        module = filename or "<unknown>"
-        if module[-3:].lower() == ".py":
-            module = module[:-3] # XXX What about leading pathname?
     if isinstance(message, Warning):
         text = str(message)
         category = message.__class__
     else:
         text = message
         message = category(message)
+    modules = None
     key = (text, category, lineno)
     with _wm._lock:
         if registry is None:
@@ -549,9 +579,11 @@ def warn_explicit(message, category, filename, lineno,
             action, msg, cat, mod, ln = item
             if ((msg is None or msg.match(text)) and
                 issubclass(category, cat) and
-                (mod is None or mod.match(module)) and
-                (ln == 0 or lineno == ln)):
-                break
+                (ln == 0 or lineno == ln) and
+                (mod is None or (_match_filename(mod, filename)
+                                 if module is None else
+                                 mod.match(module)))):
+                    break
         else:
             action = _wm.defaultaction
         # Early exit actions

diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py
@@ -63,6 +63,12 @@ class ColorSpan(NamedTuple):
 def str_width(c: str) -> int:
     if ord(c) < 128:
         return 1
+    # gh-139246 for zero-width joiner and combining characters
+    if unicodedata.combining(c):
+        return 0
+    category = unicodedata.category(c)
+    if category == "Cf" and c != "\u00ad":
+        return 0
     w = unicodedata.east_asian_width(c)
     if w in ("N", "Na", "H", "A"):
         return 1