Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Doc/library/ast.rst
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,11 @@ Literals
function call).
This has the same meaning as ``FormattedValue.value``.
* ``str`` is a constant containing the text of the interpolation expression.

If ``str`` is set to ``None``, then ``value`` is used to generate code
when calling :func:`ast.unparse`. This no longer guarantees that the
generated code is identical to the original and is intended for code
generation.
* ``conversion`` is an integer:

* -1: no conversion
Expand Down
8 changes: 4 additions & 4 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -652,11 +652,11 @@ zlib
Optimizations
=============

module_name
-----------

* TODO
csv
---

* :meth:`csv.Sniffer.sniff` delimiter detection is now up to 1.6x faster.
(Contributed by Maurycy Pawłowski-Wieroński in :gh:`137628`.)


Removed
Expand Down
7 changes: 4 additions & 3 deletions Lib/_ast_unparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,9 +658,9 @@ def _unparse_interpolation_value(self, inner):
unparser.set_precedence(_Precedence.TEST.next(), inner)
return unparser.visit(inner)

def _write_interpolation(self, node, is_interpolation=False):
def _write_interpolation(self, node, use_str_attr=False):
with self.delimit("{", "}"):
if is_interpolation:
if use_str_attr:
expr = node.str
else:
expr = self._unparse_interpolation_value(node.value)
Expand All @@ -678,7 +678,8 @@ def visit_FormattedValue(self, node):
self._write_interpolation(node)

def visit_Interpolation(self, node):
self._write_interpolation(node, is_interpolation=True)
# If `str` is set to `None`, use the `value` to generate the source code.
self._write_interpolation(node, use_str_attr=node.str is not None)

def visit_Name(self, node):
self.write(node.id)
Expand Down
28 changes: 15 additions & 13 deletions Lib/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,31 +362,33 @@ def _guess_delimiter(self, data, delimiters):
try and evaluate the smallest portion of the data possible, evaluating
additional chunks as necessary.
"""
from collections import Counter, defaultdict

data = list(filter(None, data.split('\n')))

ascii = [chr(c) for c in range(127)] # 7-bit ASCII

# build frequency tables
chunkLength = min(10, len(data))
iteration = 0
charFrequency = {}
num_lines = 0
# {char -> {count_per_line -> num_lines_with_that_count}}
char_frequency = defaultdict(Counter)
modes = {}
delims = {}
start, end = 0, chunkLength
while start < len(data):
iteration += 1
for line in data[start:end]:
for char in ascii:
metaFrequency = charFrequency.get(char, {})
# must count even if frequency is 0
freq = line.count(char)
# value is the mode
metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
charFrequency[char] = metaFrequency

for char in charFrequency.keys():
items = list(charFrequency[char].items())
num_lines += 1
for char, count in Counter(line).items():
if char.isascii():
char_frequency[char][count] += 1

for char, counts in char_frequency.items():
items = list(counts.items())
missed_lines = num_lines - sum(counts.values())
if missed_lines:
# Store the number of lines 'char' was missing from.
items.append((0, missed_lines))
if len(items) == 1 and items[0][0] == 0:
continue
# get the mode of the frequencies
Expand Down
8 changes: 8 additions & 0 deletions Lib/test/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,14 @@ def test_typecode_u_deprecation(self):
with self.assertWarns(DeprecationWarning):
array.array("u")

def test_empty_string_mem_leak_gh140474(self):
with warnings.catch_warnings():
warnings.simplefilter('ignore', DeprecationWarning)
for _ in range(1000):
a = array.array('u', '')
self.assertEqual(len(a), 0)
self.assertEqual(a.typecode, 'u')


class UCS4Test(UnicodeTest):
typecode = 'w'
Expand Down
9 changes: 9 additions & 0 deletions Lib/test/test_ast/test_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -3308,6 +3308,15 @@ class MoreFieldsThanTypes(ast.AST):
self.assertEqual(obj.a, 1)
self.assertEqual(obj.b, 2)

def test_malformed_fields_with_bytes(self):
class BadFields(ast.AST):
_fields = (b'\xff'*64,)
_field_types = {'a': int}

# This should not crash
with self.assertWarnsRegex(DeprecationWarning, r"Field b'\\xff\\xff.*' .*"):
obj = BadFields()

def test_complete_field_types(self):
class _AllFieldTypes(ast.AST):
_fields = ('a', 'b')
Expand Down
50 changes: 50 additions & 0 deletions Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1437,6 +1437,56 @@ def test_doublequote(self):
dialect = sniffer.sniff(self.sample9)
self.assertTrue(dialect.doublequote)

def test_guess_delimiter_crlf_not_chosen(self):
# Ensure that we pick the real delimiter ("|") over "\r" in a tie.
sniffer = csv.Sniffer()
sample = "a|b\r\nc|d\r\ne|f\r\n"
self.assertEqual(sniffer.sniff(sample).delimiter, "|")
self.assertNotEqual(sniffer.sniff(sample).delimiter, "\r")

def test_zero_mode_tie_order_independence(self):
sniffer = csv.Sniffer()
# ":" appears in half the rows (1, 0, 1, 0) - a tie between
# 0 and 1 per line.
# "," appears once every row (true delimiter).
#
# Even if the zero-frequency bucket is appended vs. inserted, the tie
# yields an adjusted score of 0, so ":" should not be promoted and
# "," must be selected.
sample = (
"a,b:c\n"
"d,e\n"
"f,g:c\n"
"h,i\n"
)
dialect = sniffer.sniff(sample)
self.assertEqual(dialect.delimiter, ",")

def test_zero_mode_tie_order_comma_first(self):
sniffer = csv.Sniffer()
pattern = (
"a,b\n"
"c:d\n"
"e,f\n"
"g:h\n"
)
sample = pattern * 10
with self.assertRaisesRegex(csv.Error, "Could not determine delimiter"):
sniffer.sniff(sample)

def test_zero_mode_tie_order_colon_first(self):
sniffer = csv.Sniffer()
pattern = (
"a:b\n"
"c,d\n"
"e:f\n"
"g,h\n"
)
sample = pattern * 10
with self.assertRaisesRegex(csv.Error, "Could not determine delimiter"):
sniffer.sniff(sample)


class NUL:
def write(s, *args):
pass
Expand Down
19 changes: 14 additions & 5 deletions Lib/test/test_repl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import subprocess
import sys
import unittest
from functools import partial
from textwrap import dedent
from test import support
from test.support import (
Expand All @@ -27,7 +28,7 @@
raise unittest.SkipTest("test module requires subprocess")


def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw):
def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, custom=False, **kw):
"""Run the Python REPL with the given arguments.

kw is extra keyword args to pass to subprocess.Popen. Returns a Popen
Expand All @@ -41,7 +42,11 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw):
# path may be used by PyConfig_Get("module_search_paths") to build the
# default module search path.
stdin_fname = os.path.join(os.path.dirname(sys.executable), "<stdin>")
cmd_line = [stdin_fname, '-I', '-i']
cmd_line = [stdin_fname, '-I']
# Don't re-run the built-in REPL from interactive mode
# if we're testing a custom REPL (such as the asyncio REPL).
if not custom:
cmd_line.append('-i')
cmd_line.extend(args)

# Set TERM=vt100, for the rationale see the comments in spawn_python() of
Expand All @@ -55,6 +60,10 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw):
stdout=stdout, stderr=stderr,
**kw)


spawn_asyncio_repl = partial(spawn_repl, "-m", "asyncio", custom=True)


def run_on_interactive_mode(source):
"""Spawn a new Python interpreter, pass the given
input source code from the stdin and return the
Expand Down Expand Up @@ -359,7 +368,7 @@ def f():
class TestAsyncioREPL(unittest.TestCase):
def test_multiple_statements_fail_early(self):
user_input = "1 / 0; print(f'afterwards: {1+1}')"
p = spawn_repl("-m", "asyncio")
p = spawn_asyncio_repl()
p.stdin.write(user_input)
output = kill_python(p)
self.assertIn("ZeroDivisionError", output)
Expand All @@ -371,7 +380,7 @@ def test_toplevel_contextvars_sync(self):
var = ContextVar("var", default="failed")
var.set("ok")
""")
p = spawn_repl("-m", "asyncio")
p = spawn_asyncio_repl()
p.stdin.write(user_input)
user_input2 = dedent("""
print(f"toplevel contextvar test: {var.get()}")
Expand All @@ -387,7 +396,7 @@ def test_toplevel_contextvars_async(self):
from contextvars import ContextVar
var = ContextVar('var', default='failed')
""")
p = spawn_repl("-m", "asyncio")
p = spawn_asyncio_repl()
p.stdin.write(user_input)
user_input2 = "async def set_var(): var.set('ok')\n"
p.stdin.write(user_input2)
Expand Down
100 changes: 91 additions & 9 deletions Lib/test/test_unparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,97 @@ def test_tstrings(self):
self.check_ast_roundtrip("t'foo'")
self.check_ast_roundtrip("t'foo {bar}'")
self.check_ast_roundtrip("t'foo {bar!s:.2f}'")
self.check_ast_roundtrip("t'{a + b}'")
self.check_ast_roundtrip("t'{a + b:x}'")
self.check_ast_roundtrip("t'{a + b!s}'")
self.check_ast_roundtrip("t'{ {a}}'")
self.check_ast_roundtrip("t'{ {a}=}'")
self.check_ast_roundtrip("t'{{a}}'")
self.check_ast_roundtrip("t''")
self.check_ast_roundtrip('t""')
self.check_ast_roundtrip("t'{(lambda x: x)}'")
self.check_ast_roundtrip("t'{t'{x}'}'")

def test_tstring_with_nonsensical_str_field(self):
# `value` suggests that the original code is `t'{test1}`, but `str` suggests otherwise
self.assertEqual(
ast.unparse(
ast.TemplateStr(
values=[
ast.Interpolation(
value=ast.Name(id="test1", ctx=ast.Load()), str="test2", conversion=-1
)
]
)
),
"t'{test2}'",
)

def test_tstring_with_none_str_field(self):
self.assertEqual(
ast.unparse(
ast.TemplateStr(
[ast.Interpolation(value=ast.Name(id="test1"), str=None, conversion=-1)]
)
),
"t'{test1}'",
)
self.assertEqual(
ast.unparse(
ast.TemplateStr(
[
ast.Interpolation(
value=ast.Lambda(
args=ast.arguments(args=[ast.arg(arg="x")]),
body=ast.Name(id="x"),
),
str=None,
conversion=-1,
)
]
)
),
"t'{(lambda x: x)}'",
)
self.assertEqual(
ast.unparse(
ast.TemplateStr(
values=[
ast.Interpolation(
value=ast.TemplateStr(
# `str` field kept here
[ast.Interpolation(value=ast.Name(id="x"), str="y", conversion=-1)]
),
str=None,
conversion=-1,
)
]
)
),
'''t"{t'{y}'}"''',
)
self.assertEqual(
ast.unparse(
ast.TemplateStr(
values=[
ast.Interpolation(
value=ast.TemplateStr(
[ast.Interpolation(value=ast.Name(id="x"), str=None, conversion=-1)]
),
str=None,
conversion=-1,
)
]
)
),
'''t"{t'{x}'}"''',
)
self.assertEqual(
ast.unparse(ast.TemplateStr(
[ast.Interpolation(value=ast.Constant(value="foo"), str=None, conversion=114)]
)),
'''t"{'foo'!r}"''',
)

def test_strings(self):
self.check_ast_roundtrip("u'foo'")
Expand Down Expand Up @@ -813,15 +904,6 @@ def test_type_params(self):
self.check_ast_roundtrip("def f[T: int = int, **P = int, *Ts = *int]():\n pass")
self.check_ast_roundtrip("class C[T: int = int, **P = int, *Ts = *int]():\n pass")

def test_tstr(self):
self.check_ast_roundtrip("t'{a + b}'")
self.check_ast_roundtrip("t'{a + b:x}'")
self.check_ast_roundtrip("t'{a + b!s}'")
self.check_ast_roundtrip("t'{ {a}}'")
self.check_ast_roundtrip("t'{ {a}=}'")
self.check_ast_roundtrip("t'{{a}}'")
self.check_ast_roundtrip("t''")


class ManualASTCreationTestCase(unittest.TestCase):
"""Test that AST nodes created without a type_params field unparse correctly."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix a crash in Python's :term:`garbage collector <garbage collection>` due to
partially initialized :term:`coroutine` objects when coroutine origin tracking
depth is enabled (:func:`sys.set_coroutine_origin_tracking_depth`).
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix potential buffer overflow in :class:`ast.AST` node initialization when
encountering malformed :attr:`~ast.AST._fields` containing non-:class:`str`.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speed up :meth:`csv.Sniffer.sniff` delimiter detection by up to 1.6x.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix memory leak in :class:`array.array` when creating arrays from an empty
:class:`str` and the ``u`` type code.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:func:`ast.unparse` now generates full source code when handling
:class:`ast.Interpolation` nodes that do not have a specified source.
3 changes: 3 additions & 0 deletions Modules/arraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2833,6 +2833,9 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Py_SET_SIZE(self, n);
self->allocated = n;
}
else {
PyMem_Free(ustr);
}
}
else { // c == 'w'
Py_ssize_t n = PyUnicode_GET_LENGTH(initial);
Expand Down
1 change: 1 addition & 0 deletions Objects/genobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ make_gen(PyTypeObject *type, PyFunctionObject *func)
gen->gi_weakreflist = NULL;
gen->gi_exc_state.exc_value = NULL;
gen->gi_exc_state.previous_item = NULL;
gen->gi_iframe.f_executable = PyStackRef_None;
assert(func->func_name != NULL);
gen->gi_name = Py_NewRef(func->func_name);
assert(func->func_qualname != NULL);
Expand Down
Loading
Loading