TechPenguineer · pull · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst
@@ -363,6 +363,11 @@ Literals
      function call).
      This has the same meaning as ``FormattedValue.value``.
    * ``str`` is a constant containing the text of the interpolation expression.
+
+     If ``str`` is set to ``None``, then ``value`` is used to generate code
+     when calling :func:`ast.unparse`. This no longer guarantees that the
+     generated code is identical to the original and is intended for code
+     generation.
    * ``conversion`` is an integer:
 
      * -1: no conversion

diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
@@ -652,11 +652,11 @@ zlib
 Optimizations
 =============
 
-module_name
------------
-
-* TODO
+csv
+---
 
+* :meth:`csv.Sniffer.sniff` delimiter detection is now up to 1.6x faster.
+  (Contributed by Maurycy Pawłowski-Wieroński in :gh:`137628`.)
 
 
 Removed

diff --git a/Lib/_ast_unparse.py b/Lib/_ast_unparse.py
@@ -658,9 +658,9 @@ def _unparse_interpolation_value(self, inner):
         unparser.set_precedence(_Precedence.TEST.next(), inner)
         return unparser.visit(inner)
 
-    def _write_interpolation(self, node, is_interpolation=False):
+    def _write_interpolation(self, node, use_str_attr=False):
         with self.delimit("{", "}"):
-            if is_interpolation:
+            if use_str_attr:
                 expr = node.str
             else:
                 expr = self._unparse_interpolation_value(node.value)
@@ -678,7 +678,8 @@ def visit_FormattedValue(self, node):
         self._write_interpolation(node)
 
     def visit_Interpolation(self, node):
-        self._write_interpolation(node, is_interpolation=True)
+        # If `str` is set to `None`, use the `value` to generate the source code.
+        self._write_interpolation(node, use_str_attr=node.str is not None)
 
     def visit_Name(self, node):
         self.write(node.id)

diff --git a/Lib/csv.py b/Lib/csv.py
@@ -362,31 +362,33 @@ def _guess_delimiter(self, data, delimiters):
         try and evaluate the smallest portion of the data possible, evaluating
         additional chunks as necessary.
         """
+        from collections import Counter, defaultdict
 
         data = list(filter(None, data.split('\n')))
 
-        ascii = [chr(c) for c in range(127)] # 7-bit ASCII
-
         # build frequency tables
         chunkLength = min(10, len(data))
         iteration = 0
-        charFrequency = {}
+        num_lines = 0
+        # {char -> {count_per_line -> num_lines_with_that_count}}
+        char_frequency = defaultdict(Counter)
         modes = {}
         delims = {}
         start, end = 0, chunkLength
         while start < len(data):
             iteration += 1
             for line in data[start:end]:
-                for char in ascii:
-                    metaFrequency = charFrequency.get(char, {})
-                    # must count even if frequency is 0
-                    freq = line.count(char)
-                    # value is the mode
-                    metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
-                    charFrequency[char] = metaFrequency
-
-            for char in charFrequency.keys():
-                items = list(charFrequency[char].items())
+                num_lines += 1
+                for char, count in Counter(line).items():
+                    if char.isascii():
+                        char_frequency[char][count] += 1
+
+            for char, counts in char_frequency.items():
+                items = list(counts.items())
+                missed_lines = num_lines - sum(counts.values())
+                if missed_lines:
+                    # Store the number of lines 'char' was missing from.
+                    items.append((0, missed_lines))
                 if len(items) == 1 and items[0][0] == 0:
                     continue
                 # get the mode of the frequencies

diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py
@@ -1255,6 +1255,14 @@ def test_typecode_u_deprecation(self):
         with self.assertWarns(DeprecationWarning):
             array.array("u")
 
+    def test_empty_string_mem_leak_gh140474(self):
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', DeprecationWarning)
+            for _ in range(1000):
+                a = array.array('u', '')
+                self.assertEqual(len(a), 0)
+                self.assertEqual(a.typecode, 'u')
+
 
 class UCS4Test(UnicodeTest):
     typecode = 'w'

diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py
@@ -3308,6 +3308,15 @@ class MoreFieldsThanTypes(ast.AST):
         self.assertEqual(obj.a, 1)
         self.assertEqual(obj.b, 2)
 
+    def test_malformed_fields_with_bytes(self):
+        class BadFields(ast.AST):
+            _fields = (b'\xff'*64,)
+            _field_types = {'a': int}
+
+        # This should not crash
+        with self.assertWarnsRegex(DeprecationWarning, r"Field b'\\xff\\xff.*' .*"):
+            obj = BadFields()
+
     def test_complete_field_types(self):
         class _AllFieldTypes(ast.AST):
             _fields = ('a', 'b')

diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
@@ -1437,6 +1437,56 @@ def test_doublequote(self):
         dialect = sniffer.sniff(self.sample9)
         self.assertTrue(dialect.doublequote)
 
+    def test_guess_delimiter_crlf_not_chosen(self):
+        # Ensure that we pick the real delimiter ("|") over "\r" in a tie.
+        sniffer = csv.Sniffer()
+        sample = "a|b\r\nc|d\r\ne|f\r\n"
+        self.assertEqual(sniffer.sniff(sample).delimiter, "|")
+        self.assertNotEqual(sniffer.sniff(sample).delimiter, "\r")
+
+    def test_zero_mode_tie_order_independence(self):
+        sniffer = csv.Sniffer()
+        # ":" appears in half the rows (1, 0, 1, 0) - a tie between
+        #     0 and 1 per line.
+        # "," appears once every row (true delimiter).
+        #
+        # Even if the zero-frequency bucket is appended vs. inserted, the tie
+        # yields an adjusted score of 0, so ":" should not be promoted and
+        # "," must be selected.
+        sample = (
+            "a,b:c\n"
+            "d,e\n"
+            "f,g:c\n"
+            "h,i\n"
+        )
+        dialect = sniffer.sniff(sample)
+        self.assertEqual(dialect.delimiter, ",")
+
+    def test_zero_mode_tie_order_comma_first(self):
+        sniffer = csv.Sniffer()
+        pattern = (
+            "a,b\n"
+            "c:d\n"
+            "e,f\n"
+            "g:h\n"
+        )
+        sample = pattern * 10
+        with self.assertRaisesRegex(csv.Error, "Could not determine delimiter"):
+            sniffer.sniff(sample)
+
+    def test_zero_mode_tie_order_colon_first(self):
+        sniffer = csv.Sniffer()
+        pattern = (
+            "a:b\n"
+            "c,d\n"
+            "e:f\n"
+            "g,h\n"
+        )
+        sample = pattern * 10
+        with self.assertRaisesRegex(csv.Error, "Could not determine delimiter"):
+            sniffer.sniff(sample)
+
+
 class NUL:
     def write(s, *args):
         pass

diff --git a/Lib/test/test_repl.py b/Lib/test/test_repl.py
@@ -5,6 +5,7 @@
 import subprocess
 import sys
 import unittest
+from functools import partial
 from textwrap import dedent
 from test import support
 from test.support import (
@@ -27,7 +28,7 @@
     raise unittest.SkipTest("test module requires subprocess")
 
 
-def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw):
+def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, custom=False, **kw):
     """Run the Python REPL with the given arguments.
 
     kw is extra keyword args to pass to subprocess.Popen. Returns a Popen
@@ -41,7 +42,11 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw):
     # path may be used by PyConfig_Get("module_search_paths") to build the
     # default module search path.
     stdin_fname = os.path.join(os.path.dirname(sys.executable), "<stdin>")
-    cmd_line = [stdin_fname, '-I', '-i']
+    cmd_line = [stdin_fname, '-I']
+    # Don't re-run the built-in REPL from interactive mode
+    # if we're testing a custom REPL (such as the asyncio REPL).
+    if not custom:
+        cmd_line.append('-i')
     cmd_line.extend(args)
 
     # Set TERM=vt100, for the rationale see the comments in spawn_python() of
@@ -55,6 +60,10 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw):
                             stdout=stdout, stderr=stderr,
                             **kw)
 
+
+spawn_asyncio_repl = partial(spawn_repl, "-m", "asyncio", custom=True)
+
+
 def run_on_interactive_mode(source):
     """Spawn a new Python interpreter, pass the given
     input source code from the stdin and return the
@@ -359,7 +368,7 @@ def f():
 class TestAsyncioREPL(unittest.TestCase):
     def test_multiple_statements_fail_early(self):
         user_input = "1 / 0; print(f'afterwards: {1+1}')"
-        p = spawn_repl("-m", "asyncio")
+        p = spawn_asyncio_repl()
         p.stdin.write(user_input)
         output = kill_python(p)
         self.assertIn("ZeroDivisionError", output)
@@ -371,7 +380,7 @@ def test_toplevel_contextvars_sync(self):
         var = ContextVar("var", default="failed")
         var.set("ok")
         """)
-        p = spawn_repl("-m", "asyncio")
+        p = spawn_asyncio_repl()
         p.stdin.write(user_input)
         user_input2 = dedent("""
         print(f"toplevel contextvar test: {var.get()}")
@@ -387,7 +396,7 @@ def test_toplevel_contextvars_async(self):
         from contextvars import ContextVar
         var = ContextVar('var', default='failed')
         """)
-        p = spawn_repl("-m", "asyncio")
+        p = spawn_asyncio_repl()
         p.stdin.write(user_input)
         user_input2 = "async def set_var(): var.set('ok')\n"
         p.stdin.write(user_input2)

diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py
@@ -206,6 +206,97 @@ def test_tstrings(self):
         self.check_ast_roundtrip("t'foo'")
         self.check_ast_roundtrip("t'foo {bar}'")
         self.check_ast_roundtrip("t'foo {bar!s:.2f}'")
+        self.check_ast_roundtrip("t'{a +    b}'")
+        self.check_ast_roundtrip("t'{a +    b:x}'")
+        self.check_ast_roundtrip("t'{a +    b!s}'")
+        self.check_ast_roundtrip("t'{ {a}}'")
+        self.check_ast_roundtrip("t'{ {a}=}'")
+        self.check_ast_roundtrip("t'{{a}}'")
+        self.check_ast_roundtrip("t''")
+        self.check_ast_roundtrip('t""')
+        self.check_ast_roundtrip("t'{(lambda x: x)}'")
+        self.check_ast_roundtrip("t'{t'{x}'}'")
+
+    def test_tstring_with_nonsensical_str_field(self):
+        # `value` suggests that the original code is `t'{test1}`, but `str` suggests otherwise
+        self.assertEqual(
+            ast.unparse(
+                ast.TemplateStr(
+                    values=[
+                        ast.Interpolation(
+                            value=ast.Name(id="test1", ctx=ast.Load()), str="test2", conversion=-1
+                        )
+                    ]
+                )
+            ),
+            "t'{test2}'",
+        )
+
+    def test_tstring_with_none_str_field(self):
+        self.assertEqual(
+            ast.unparse(
+                ast.TemplateStr(
+                    [ast.Interpolation(value=ast.Name(id="test1"), str=None, conversion=-1)]
+                )
+            ),
+            "t'{test1}'",
+        )
+        self.assertEqual(
+            ast.unparse(
+                ast.TemplateStr(
+                    [
+                        ast.Interpolation(
+                            value=ast.Lambda(
+                                args=ast.arguments(args=[ast.arg(arg="x")]),
+                                body=ast.Name(id="x"),
+                            ),
+                            str=None,
+                            conversion=-1,
+                        )
+                    ]
+                )
+            ),
+            "t'{(lambda x: x)}'",
+        )
+        self.assertEqual(
+            ast.unparse(
+                ast.TemplateStr(
+                    values=[
+                        ast.Interpolation(
+                            value=ast.TemplateStr(
+                                # `str` field kept here
+                                [ast.Interpolation(value=ast.Name(id="x"), str="y", conversion=-1)]
+                            ),
+                            str=None,
+                            conversion=-1,
+                        )
+                    ]
+                )
+            ),
+            '''t"{t'{y}'}"''',
+        )
+        self.assertEqual(
+            ast.unparse(
+                ast.TemplateStr(
+                    values=[
+                        ast.Interpolation(
+                            value=ast.TemplateStr(
+                                [ast.Interpolation(value=ast.Name(id="x"), str=None, conversion=-1)]
+                            ),
+                            str=None,
+                            conversion=-1,
+                        )
+                    ]
+                )
+            ),
+            '''t"{t'{x}'}"''',
+        )
+        self.assertEqual(
+            ast.unparse(ast.TemplateStr(
+                [ast.Interpolation(value=ast.Constant(value="foo"), str=None, conversion=114)]
+            )),
+            '''t"{'foo'!r}"''',
+        )
 
     def test_strings(self):
         self.check_ast_roundtrip("u'foo'")
@@ -813,15 +904,6 @@ def test_type_params(self):
         self.check_ast_roundtrip("def f[T: int = int, **P = int, *Ts = *int]():\n    pass")
         self.check_ast_roundtrip("class C[T: int = int, **P = int, *Ts = *int]():\n    pass")
 
-    def test_tstr(self):
-        self.check_ast_roundtrip("t'{a +    b}'")
-        self.check_ast_roundtrip("t'{a +    b:x}'")
-        self.check_ast_roundtrip("t'{a +    b!s}'")
-        self.check_ast_roundtrip("t'{ {a}}'")
-        self.check_ast_roundtrip("t'{ {a}=}'")
-        self.check_ast_roundtrip("t'{{a}}'")
-        self.check_ast_roundtrip("t''")
-
 
 class ManualASTCreationTestCase(unittest.TestCase):
     """Test that AST nodes created without a type_params field unparse correctly."""

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-17-22-22.gh-issue-140431.m8D_A-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-17-22-22.gh-issue-140431.m8D_A-.rst
@@ -0,0 +1,3 @@
+Fix a crash in Python's :term:`garbage collector <garbage collection>` due to
+partially initialized :term:`coroutine` objects when coroutine origin tracking
+depth is enabled (:func:`sys.set_coroutine_origin_tracking_depth`).
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-23-16-05-50.gh-issue-140471.Ax_aXn.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-23-16-05-50.gh-issue-140471.Ax_aXn.rst
@@ -0,0 +1,2 @@
+Fix potential buffer overflow in :class:`ast.AST` node initialization when
+encountering malformed :attr:`~ast.AST._fields` containing non-:class:`str`.
diff --git a/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst b/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst
@@ -0,0 +1 @@
+Speed up :meth:`csv.Sniffer.sniff` delimiter detection by up to 1.6x.
diff --git a/Misc/NEWS.d/next/Library/2025-10-22-20-52-13.gh-issue-140474.xIWlip.rst b/Misc/NEWS.d/next/Library/2025-10-22-20-52-13.gh-issue-140474.xIWlip.rst
@@ -0,0 +1,2 @@
+Fix memory leak in :class:`array.array` when creating arrays from an empty
+:class:`str` and the ``u`` type code.
diff --git a/Misc/NEWS.d/next/Library/2025-10-23-12-12-22.gh-issue-138774.mnh2gU.rst b/Misc/NEWS.d/next/Library/2025-10-23-12-12-22.gh-issue-138774.mnh2gU.rst
@@ -0,0 +1,2 @@
+:func:`ast.unparse` now generates full source code when handling
+:class:`ast.Interpolation` nodes that do not have a specified source.
diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c
@@ -2833,6 +2833,9 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
                         Py_SET_SIZE(self, n);
                         self->allocated = n;
                     }
+                    else {
+                        PyMem_Free(ustr);
+                    }
                 }
                 else { // c == 'w'
                     Py_ssize_t n = PyUnicode_GET_LENGTH(initial);

diff --git a/Objects/genobject.c b/Objects/genobject.c
@@ -932,6 +932,7 @@ make_gen(PyTypeObject *type, PyFunctionObject *func)
     gen->gi_weakreflist = NULL;
     gen->gi_exc_state.exc_value = NULL;
     gen->gi_exc_state.previous_item = NULL;
+    gen->gi_iframe.f_executable = PyStackRef_None;
     assert(func->func_name != NULL);
     gen->gi_name = Py_NewRef(func->func_name);
     assert(func->func_qualname != NULL);
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Fix potential buffer overflow in :class:`ast.AST` node initialization when
		encountering malformed :attr:`~ast.AST._fields` containing non-:class:`str`.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Speed up :meth:`csv.Sniffer.sniff` delimiter detection by up to 1.6x.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Fix memory leak in :class:`array.array` when creating arrays from an empty
		:class:`str` and the ``u`` type code.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		:func:`ast.unparse` now generates full source code when handling
		:class:`ast.Interpolation` nodes that do not have a specified source.