diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index ea3ec7d95dc45d..494621672171f2 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -363,6 +363,11 @@ Literals function call). This has the same meaning as ``FormattedValue.value``. * ``str`` is a constant containing the text of the interpolation expression. + + If ``str`` is set to ``None``, then ``value`` is used to generate code + when calling :func:`ast.unparse`. This no longer guarantees that the + generated code is identical to the original and is intended for code + generation. * ``conversion`` is an integer: * -1: no conversion diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index a9543bdd13e83f..cbca20cba5c284 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -652,11 +652,11 @@ zlib Optimizations ============= -module_name ------------ - -* TODO +csv +--- +* :meth:`csv.Sniffer.sniff` delimiter detection is now up to 1.6x faster. + (Contributed by Maurycy Pawłowski-Wieroński in :gh:`137628`.) Removed diff --git a/Lib/_ast_unparse.py b/Lib/_ast_unparse.py index 16cf56f62cc1e5..1c8741b5a55483 100644 --- a/Lib/_ast_unparse.py +++ b/Lib/_ast_unparse.py @@ -658,9 +658,9 @@ def _unparse_interpolation_value(self, inner): unparser.set_precedence(_Precedence.TEST.next(), inner) return unparser.visit(inner) - def _write_interpolation(self, node, is_interpolation=False): + def _write_interpolation(self, node, use_str_attr=False): with self.delimit("{", "}"): - if is_interpolation: + if use_str_attr: expr = node.str else: expr = self._unparse_interpolation_value(node.value) @@ -678,7 +678,8 @@ def visit_FormattedValue(self, node): self._write_interpolation(node) def visit_Interpolation(self, node): - self._write_interpolation(node, is_interpolation=True) + # If `str` is set to `None`, use the `value` to generate the source code. + self._write_interpolation(node, use_str_attr=node.str is not None) def visit_Name(self, node): self.write(node.id) diff --git a/Lib/csv.py b/Lib/csv.py index 98eab01429a8ec..b2aaf5fd9fa91e 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -362,31 +362,33 @@ def _guess_delimiter(self, data, delimiters): try and evaluate the smallest portion of the data possible, evaluating additional chunks as necessary. """ + from collections import Counter, defaultdict data = list(filter(None, data.split('\n'))) - ascii = [chr(c) for c in range(127)] # 7-bit ASCII - # build frequency tables chunkLength = min(10, len(data)) iteration = 0 - charFrequency = {} + num_lines = 0 + # {char -> {count_per_line -> num_lines_with_that_count}} + char_frequency = defaultdict(Counter) modes = {} delims = {} start, end = 0, chunkLength while start < len(data): iteration += 1 for line in data[start:end]: - for char in ascii: - metaFrequency = charFrequency.get(char, {}) - # must count even if frequency is 0 - freq = line.count(char) - # value is the mode - metaFrequency[freq] = metaFrequency.get(freq, 0) + 1 - charFrequency[char] = metaFrequency - - for char in charFrequency.keys(): - items = list(charFrequency[char].items()) + num_lines += 1 + for char, count in Counter(line).items(): + if char.isascii(): + char_frequency[char][count] += 1 + + for char, counts in char_frequency.items(): + items = list(counts.items()) + missed_lines = num_lines - sum(counts.values()) + if missed_lines: + # Store the number of lines 'char' was missing from. + items.append((0, missed_lines)) if len(items) == 1 and items[0][0] == 0: continue # get the mode of the frequencies diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index 58ea89c4fac833..83b3c978da3581 100755 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -1255,6 +1255,14 @@ def test_typecode_u_deprecation(self): with self.assertWarns(DeprecationWarning): array.array("u") + def test_empty_string_mem_leak_gh140474(self): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + for _ in range(1000): + a = array.array('u', '') + self.assertEqual(len(a), 0) + self.assertEqual(a.typecode, 'u') + class UCS4Test(UnicodeTest): typecode = 'w' diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 1e6f60074308e2..5fdb3a458ae999 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -3308,6 +3308,15 @@ class MoreFieldsThanTypes(ast.AST): self.assertEqual(obj.a, 1) self.assertEqual(obj.b, 2) + def test_malformed_fields_with_bytes(self): + class BadFields(ast.AST): + _fields = (b'\xff'*64,) + _field_types = {'a': int} + + # This should not crash + with self.assertWarnsRegex(DeprecationWarning, r"Field b'\\xff\\xff.*' .*"): + obj = BadFields() + def test_complete_field_types(self): class _AllFieldTypes(ast.AST): _fields = ('a', 'b') diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 50431b562f90ba..6be6a7ae222f02 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1437,6 +1437,56 @@ def test_doublequote(self): dialect = sniffer.sniff(self.sample9) self.assertTrue(dialect.doublequote) + def test_guess_delimiter_crlf_not_chosen(self): + # Ensure that we pick the real delimiter ("|") over "\r" in a tie. + sniffer = csv.Sniffer() + sample = "a|b\r\nc|d\r\ne|f\r\n" + self.assertEqual(sniffer.sniff(sample).delimiter, "|") + self.assertNotEqual(sniffer.sniff(sample).delimiter, "\r") + + def test_zero_mode_tie_order_independence(self): + sniffer = csv.Sniffer() + # ":" appears in half the rows (1, 0, 1, 0) - a tie between + # 0 and 1 per line. + # "," appears once every row (true delimiter). + # + # Even if the zero-frequency bucket is appended vs. inserted, the tie + # yields an adjusted score of 0, so ":" should not be promoted and + # "," must be selected. + sample = ( + "a,b:c\n" + "d,e\n" + "f,g:c\n" + "h,i\n" + ) + dialect = sniffer.sniff(sample) + self.assertEqual(dialect.delimiter, ",") + + def test_zero_mode_tie_order_comma_first(self): + sniffer = csv.Sniffer() + pattern = ( + "a,b\n" + "c:d\n" + "e,f\n" + "g:h\n" + ) + sample = pattern * 10 + with self.assertRaisesRegex(csv.Error, "Could not determine delimiter"): + sniffer.sniff(sample) + + def test_zero_mode_tie_order_colon_first(self): + sniffer = csv.Sniffer() + pattern = ( + "a:b\n" + "c,d\n" + "e:f\n" + "g,h\n" + ) + sample = pattern * 10 + with self.assertRaisesRegex(csv.Error, "Could not determine delimiter"): + sniffer.sniff(sample) + + class NUL: def write(s, *args): pass diff --git a/Lib/test/test_repl.py b/Lib/test/test_repl.py index 54e69277282c30..042aa84b35dcf8 100644 --- a/Lib/test/test_repl.py +++ b/Lib/test/test_repl.py @@ -5,6 +5,7 @@ import subprocess import sys import unittest +from functools import partial from textwrap import dedent from test import support from test.support import ( @@ -27,7 +28,7 @@ raise unittest.SkipTest("test module requires subprocess") -def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): +def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, custom=False, **kw): """Run the Python REPL with the given arguments. kw is extra keyword args to pass to subprocess.Popen. Returns a Popen @@ -41,7 +42,11 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): # path may be used by PyConfig_Get("module_search_paths") to build the # default module search path. stdin_fname = os.path.join(os.path.dirname(sys.executable), "") - cmd_line = [stdin_fname, '-I', '-i'] + cmd_line = [stdin_fname, '-I'] + # Don't re-run the built-in REPL from interactive mode + # if we're testing a custom REPL (such as the asyncio REPL). + if not custom: + cmd_line.append('-i') cmd_line.extend(args) # Set TERM=vt100, for the rationale see the comments in spawn_python() of @@ -55,6 +60,10 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): stdout=stdout, stderr=stderr, **kw) + +spawn_asyncio_repl = partial(spawn_repl, "-m", "asyncio", custom=True) + + def run_on_interactive_mode(source): """Spawn a new Python interpreter, pass the given input source code from the stdin and return the @@ -359,7 +368,7 @@ def f(): class TestAsyncioREPL(unittest.TestCase): def test_multiple_statements_fail_early(self): user_input = "1 / 0; print(f'afterwards: {1+1}')" - p = spawn_repl("-m", "asyncio") + p = spawn_asyncio_repl() p.stdin.write(user_input) output = kill_python(p) self.assertIn("ZeroDivisionError", output) @@ -371,7 +380,7 @@ def test_toplevel_contextvars_sync(self): var = ContextVar("var", default="failed") var.set("ok") """) - p = spawn_repl("-m", "asyncio") + p = spawn_asyncio_repl() p.stdin.write(user_input) user_input2 = dedent(""" print(f"toplevel contextvar test: {var.get()}") @@ -387,7 +396,7 @@ def test_toplevel_contextvars_async(self): from contextvars import ContextVar var = ContextVar('var', default='failed') """) - p = spawn_repl("-m", "asyncio") + p = spawn_asyncio_repl() p.stdin.write(user_input) user_input2 = "async def set_var(): var.set('ok')\n" p.stdin.write(user_input2) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 0d6b05bc660b76..35e4652a87b423 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -206,6 +206,97 @@ def test_tstrings(self): self.check_ast_roundtrip("t'foo'") self.check_ast_roundtrip("t'foo {bar}'") self.check_ast_roundtrip("t'foo {bar!s:.2f}'") + self.check_ast_roundtrip("t'{a + b}'") + self.check_ast_roundtrip("t'{a + b:x}'") + self.check_ast_roundtrip("t'{a + b!s}'") + self.check_ast_roundtrip("t'{ {a}}'") + self.check_ast_roundtrip("t'{ {a}=}'") + self.check_ast_roundtrip("t'{{a}}'") + self.check_ast_roundtrip("t''") + self.check_ast_roundtrip('t""') + self.check_ast_roundtrip("t'{(lambda x: x)}'") + self.check_ast_roundtrip("t'{t'{x}'}'") + + def test_tstring_with_nonsensical_str_field(self): + # `value` suggests that the original code is `t'{test1}`, but `str` suggests otherwise + self.assertEqual( + ast.unparse( + ast.TemplateStr( + values=[ + ast.Interpolation( + value=ast.Name(id="test1", ctx=ast.Load()), str="test2", conversion=-1 + ) + ] + ) + ), + "t'{test2}'", + ) + + def test_tstring_with_none_str_field(self): + self.assertEqual( + ast.unparse( + ast.TemplateStr( + [ast.Interpolation(value=ast.Name(id="test1"), str=None, conversion=-1)] + ) + ), + "t'{test1}'", + ) + self.assertEqual( + ast.unparse( + ast.TemplateStr( + [ + ast.Interpolation( + value=ast.Lambda( + args=ast.arguments(args=[ast.arg(arg="x")]), + body=ast.Name(id="x"), + ), + str=None, + conversion=-1, + ) + ] + ) + ), + "t'{(lambda x: x)}'", + ) + self.assertEqual( + ast.unparse( + ast.TemplateStr( + values=[ + ast.Interpolation( + value=ast.TemplateStr( + # `str` field kept here + [ast.Interpolation(value=ast.Name(id="x"), str="y", conversion=-1)] + ), + str=None, + conversion=-1, + ) + ] + ) + ), + '''t"{t'{y}'}"''', + ) + self.assertEqual( + ast.unparse( + ast.TemplateStr( + values=[ + ast.Interpolation( + value=ast.TemplateStr( + [ast.Interpolation(value=ast.Name(id="x"), str=None, conversion=-1)] + ), + str=None, + conversion=-1, + ) + ] + ) + ), + '''t"{t'{x}'}"''', + ) + self.assertEqual( + ast.unparse(ast.TemplateStr( + [ast.Interpolation(value=ast.Constant(value="foo"), str=None, conversion=114)] + )), + '''t"{'foo'!r}"''', + ) def test_strings(self): self.check_ast_roundtrip("u'foo'") @@ -813,15 +904,6 @@ def test_type_params(self): self.check_ast_roundtrip("def f[T: int = int, **P = int, *Ts = *int]():\n pass") self.check_ast_roundtrip("class C[T: int = int, **P = int, *Ts = *int]():\n pass") - def test_tstr(self): - self.check_ast_roundtrip("t'{a + b}'") - self.check_ast_roundtrip("t'{a + b:x}'") - self.check_ast_roundtrip("t'{a + b!s}'") - self.check_ast_roundtrip("t'{ {a}}'") - self.check_ast_roundtrip("t'{ {a}=}'") - self.check_ast_roundtrip("t'{{a}}'") - self.check_ast_roundtrip("t''") - class ManualASTCreationTestCase(unittest.TestCase): """Test that AST nodes created without a type_params field unparse correctly.""" diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-17-22-22.gh-issue-140431.m8D_A-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-17-22-22.gh-issue-140431.m8D_A-.rst new file mode 100644 index 00000000000000..3d62d210f1f007 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-17-22-22.gh-issue-140431.m8D_A-.rst @@ -0,0 +1,3 @@ +Fix a crash in Python's :term:`garbage collector ` due to +partially initialized :term:`coroutine` objects when coroutine origin tracking +depth is enabled (:func:`sys.set_coroutine_origin_tracking_depth`). diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-23-16-05-50.gh-issue-140471.Ax_aXn.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-23-16-05-50.gh-issue-140471.Ax_aXn.rst new file mode 100644 index 00000000000000..afa9326fff3aee --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-23-16-05-50.gh-issue-140471.Ax_aXn.rst @@ -0,0 +1,2 @@ +Fix potential buffer overflow in :class:`ast.AST` node initialization when +encountering malformed :attr:`~ast.AST._fields` containing non-:class:`str`. diff --git a/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst b/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst new file mode 100644 index 00000000000000..855070ed6f4511 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst @@ -0,0 +1 @@ +Speed up :meth:`csv.Sniffer.sniff` delimiter detection by up to 1.6x. diff --git a/Misc/NEWS.d/next/Library/2025-10-22-20-52-13.gh-issue-140474.xIWlip.rst b/Misc/NEWS.d/next/Library/2025-10-22-20-52-13.gh-issue-140474.xIWlip.rst new file mode 100644 index 00000000000000..aca4e68b1e5e49 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-22-20-52-13.gh-issue-140474.xIWlip.rst @@ -0,0 +1,2 @@ +Fix memory leak in :class:`array.array` when creating arrays from an empty +:class:`str` and the ``u`` type code. diff --git a/Misc/NEWS.d/next/Library/2025-10-23-12-12-22.gh-issue-138774.mnh2gU.rst b/Misc/NEWS.d/next/Library/2025-10-23-12-12-22.gh-issue-138774.mnh2gU.rst new file mode 100644 index 00000000000000..e12f789e674454 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-23-12-12-22.gh-issue-138774.mnh2gU.rst @@ -0,0 +1,2 @@ +:func:`ast.unparse` now generates full source code when handling +:class:`ast.Interpolation` nodes that do not have a specified source. diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index d97cf7af767ca3..729e085c19f006 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -2833,6 +2833,9 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_SET_SIZE(self, n); self->allocated = n; } + else { + PyMem_Free(ustr); + } } else { // c == 'w' Py_ssize_t n = PyUnicode_GET_LENGTH(initial); diff --git a/Objects/genobject.c b/Objects/genobject.c index c9ca2f1de51ddc..2371ad16d5c1a6 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -932,6 +932,7 @@ make_gen(PyTypeObject *type, PyFunctionObject *func) gen->gi_weakreflist = NULL; gen->gi_exc_state.exc_value = NULL; gen->gi_exc_state.previous_item = NULL; + gen->gi_iframe.f_executable = PyStackRef_None; assert(func->func_name != NULL); gen->gi_name = Py_NewRef(func->func_name); assert(func->func_qualname != NULL); diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index dba20226c3283a..3e252cbc4883d1 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -1009,7 +1009,7 @@ def visitModule(self, mod): else { if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "Field '%U' is missing from %.400s._field_types. " + "Field %R is missing from %.400s._field_types. " "This will become an error in Python 3.15.", name, Py_TYPE(self)->tp_name ) < 0) { @@ -1044,7 +1044,7 @@ def visitModule(self, mod): // simple field (e.g., identifier) if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "%.400s.__init__ missing 1 required positional argument: '%U'. " + "%.400s.__init__ missing 1 required positional argument: %R. " "This will become an error in Python 3.15.", Py_TYPE(self)->tp_name, name ) < 0) { diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 660bc598a4862c..aac24ed7d3c0c5 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -5293,7 +5293,7 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw) else { if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "Field '%U' is missing from %.400s._field_types. " + "Field %R is missing from %.400s._field_types. " "This will become an error in Python 3.15.", name, Py_TYPE(self)->tp_name ) < 0) { @@ -5328,7 +5328,7 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw) // simple field (e.g., identifier) if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "%.400s.__init__ missing 1 required positional argument: '%U'. " + "%.400s.__init__ missing 1 required positional argument: %R. " "This will become an error in Python 3.15.", Py_TYPE(self)->tp_name, name ) < 0) {