From 44fb7c361cb24dcf9989a7a1cfee4f6aad5c81aa Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Tue, 3 Jun 2025 05:22:41 +1200 Subject: [PATCH 1/6] gh-134908: Protect `textiowrapper_iternext` with critical section (gh-134910) The `textiowrapper_iternext` function called `_textiowrapper_writeflush`, but did not use a critical section, making it racy in free-threaded builds. --- Lib/test/test_io.py | 31 +++++++++++++++++++ ...-05-30-15-56-19.gh-issue-134908.3a7PxM.rst | 1 + Modules/_io/textio.c | 15 ++++++++- 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-15-56-19.gh-issue-134908.3a7PxM.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 168e66c5a3f0e0..0c921ffbc2576a 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -1062,6 +1062,37 @@ def flush(self): # Silence destructor error R.flush = lambda self: None + @threading_helper.requires_working_threading() + def test_write_readline_races(self): + # gh-134908: Concurrent iteration over a file caused races + thread_count = 2 + write_count = 100 + read_count = 100 + + def writer(file, barrier): + barrier.wait() + for _ in range(write_count): + file.write("x") + + def reader(file, barrier): + barrier.wait() + for _ in range(read_count): + for line in file: + self.assertEqual(line, "") + + with self.open(os_helper.TESTFN, "w+") as f: + barrier = threading.Barrier(thread_count + 1) + reader = threading.Thread(target=reader, args=(f, barrier)) + writers = [threading.Thread(target=writer, args=(f, barrier)) + for _ in range(thread_count)] + with threading_helper.catch_threading_exception() as cm: + with threading_helper.start_threads(writers + [reader]): + pass + self.assertIsNone(cm.exc_type) + + self.assertEqual(os.stat(os_helper.TESTFN).st_size, + write_count * thread_count) + class CIOTest(IOTest): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-15-56-19.gh-issue-134908.3a7PxM.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-15-56-19.gh-issue-134908.3a7PxM.rst new file mode 100644 index 00000000000000..3178f0aaf885f8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-30-15-56-19.gh-issue-134908.3a7PxM.rst @@ -0,0 +1 @@ +Fix crash when iterating over lines in a text file on the :term:`free threaded ` build. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 86328e46a7b131..3808ecdceb9b70 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1578,6 +1578,8 @@ _io_TextIOWrapper_detach_impl(textio *self) static int _textiowrapper_writeflush(textio *self) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + if (self->pending_bytes == NULL) return 0; @@ -3173,8 +3175,9 @@ _io_TextIOWrapper_close_impl(textio *self) } static PyObject * -textiowrapper_iternext(PyObject *op) +textiowrapper_iternext_lock_held(PyObject *op) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); PyObject *line; textio *self = textio_CAST(op); @@ -3210,6 +3213,16 @@ textiowrapper_iternext(PyObject *op) return line; } +static PyObject * +textiowrapper_iternext(PyObject *op) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = textiowrapper_iternext_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] @critical_section @getter From 0cec424af5904b3d23ad6e3c6d1a27f89d238d64 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 2 Jun 2025 21:08:26 +0300 Subject: [PATCH 2/6] gh-66234: Add flag to disable the use of mmap in dbm.gnu (GH-135005) This may harm performance, but improve crash tolerance. --- Doc/library/dbm.rst | 3 +++ Doc/whatsnew/3.15.rst | 4 +++ Lib/test/test_dbm_gnu.py | 27 +++++++++++++++++-- ...5-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst | 3 +++ Modules/_gdbmmodule.c | 8 ++++++ 5 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 6f548fbb1b39d8..39e287b15214e4 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -254,6 +254,9 @@ functionality like crash tolerance. * ``'s'``: Synchronized mode. Changes to the database will be written immediately to the file. * ``'u'``: Do not lock database. + * ``'m'``: Do not use :manpage:`mmap(2)`. + This may harm performance, but improve crash tolerance. + .. versionadded:: next Not all flags are valid for all versions of GDBM. See the :data:`open_flags` member for a list of supported flag characters. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index a27a17afdba2a8..2fe33c4c535919 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -96,6 +96,10 @@ dbm which allow to recover unused free space previously occupied by deleted entries. (Contributed by Andrea Oliveri in :gh:`134004`.) +* Add the ``'m'`` flag for :func:`dbm.gnu.open` which allows to disable + the use of :manpage:`mmap(2)`. + This may harm performance, but improve crash tolerance. + (Contributed by Serhiy Storchaka in :gh:`66234`.) difflib ------- diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index 66268c42a300b5..e0b988b7b95bbd 100644 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -74,12 +74,12 @@ def test_flags(self): # Test the flag parameter open() by trying all supported flag modes. all = set(gdbm.open_flags) # Test standard flags (presumably "crwn"). - modes = all - set('fsu') + modes = all - set('fsum') for mode in sorted(modes): # put "c" mode first self.g = gdbm.open(filename, mode) self.g.close() - # Test additional flags (presumably "fsu"). + # Test additional flags (presumably "fsum"). flags = all - set('crwn') for mode in modes: for flag in flags: @@ -217,6 +217,29 @@ def test_localized_error(self): create_empty_file(os.path.join(d, 'test')) self.assertRaises(gdbm.error, gdbm.open, filename, 'r') + @unittest.skipUnless('m' in gdbm.open_flags, "requires 'm' in open_flags") + def test_nommap_no_crash(self): + self.g = g = gdbm.open(filename, 'nm') + os.truncate(filename, 0) + + g.get(b'a', b'c') + g.keys() + g.firstkey() + g.nextkey(b'a') + with self.assertRaises(KeyError): + g[b'a'] + with self.assertRaises(gdbm.error): + len(g) + + with self.assertRaises(gdbm.error): + g[b'a'] = b'c' + with self.assertRaises(gdbm.error): + del g[b'a'] + with self.assertRaises(gdbm.error): + g.setdefault(b'a', b'c') + with self.assertRaises(gdbm.error): + g.reorganize() + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst b/Misc/NEWS.d/next/Library/2025-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst new file mode 100644 index 00000000000000..1defb9a72e04e7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-01-15-13-07.gh-issue-66234.Jw7OdC.rst @@ -0,0 +1,3 @@ +Add the ``'m'`` flag for :func:`dbm.gnu.open` which allows to disable the +use of :manpage:`mmap(2)`. This may harm performance, but improve crash +tolerance. diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c index 9c402e20e513b9..6a4939512b22fc 100644 --- a/Modules/_gdbmmodule.c +++ b/Modules/_gdbmmodule.c @@ -813,6 +813,11 @@ dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, case 'u': iflags |= GDBM_NOLOCK; break; +#endif +#ifdef GDBM_NOMMAP + case 'm': + iflags |= GDBM_NOMMAP; + break; #endif default: PyErr_Format(state->gdbm_error, @@ -846,6 +851,9 @@ static const char gdbmmodule_open_flags[] = "rwcn" #endif #ifdef GDBM_NOLOCK "u" +#endif +#ifdef GDBM_NOMMAP + "m" #endif ; From c7051a366996dd2213decb8677c52d1eda0fde6a Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 2 Jun 2025 15:13:08 -0300 Subject: [PATCH 3/6] Remove newline in Doc/c-api/lifecycle.rst for gettext builder (GH-135013) --- Doc/c-api/lifecycle.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Doc/c-api/lifecycle.rst b/Doc/c-api/lifecycle.rst index 0e2ffc096caba7..5a170862a26f44 100644 --- a/Doc/c-api/lifecycle.rst +++ b/Doc/c-api/lifecycle.rst @@ -55,16 +55,14 @@ that must be true for *B* to occur after *A*. .. image:: lifecycle.dot.svg :align: center :class: invert-in-dark-mode - :alt: Diagram showing events in an object's life. Explained in detail - below. + :alt: Diagram showing events in an object's life. Explained in detail below. .. only:: latex .. image:: lifecycle.dot.pdf :align: center :class: invert-in-dark-mode - :alt: Diagram showing events in an object's life. Explained in detail - below. + :alt: Diagram showing events in an object's life. Explained in detail below. .. container:: :name: life-events-graph-description From 7a79f52d83c22f5a9787e590f267325c1175d389 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 2 Jun 2025 23:25:32 +0300 Subject: [PATCH 4/6] gh-133454: Mark test_queue tests with many threads as bigmem (gh-134575) 50 producer and 50 consumer threads need more than 5GB of memory. --- Lib/test/test_queue.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_queue.py b/Lib/test/test_queue.py index 7f4fe357034b71..c855fb8fe2b05a 100644 --- a/Lib/test/test_queue.py +++ b/Lib/test/test_queue.py @@ -6,7 +6,7 @@ import time import unittest import weakref -from test.support import gc_collect +from test.support import gc_collect, bigmemtest from test.support import import_helper from test.support import threading_helper @@ -963,33 +963,33 @@ def test_order(self): # One producer, one consumer => results appended in well-defined order self.assertEqual(results, inputs) - def test_many_threads(self): + @bigmemtest(size=50, memuse=100*2**20, dry_run=False) + def test_many_threads(self, size): # Test multiple concurrent put() and get() - N = 50 q = self.q inputs = list(range(10000)) - results = self.run_threads(N, q, inputs, self.feed, self.consume) + results = self.run_threads(size, q, inputs, self.feed, self.consume) # Multiple consumers without synchronization append the # results in random order self.assertEqual(sorted(results), inputs) - def test_many_threads_nonblock(self): + @bigmemtest(size=50, memuse=100*2**20, dry_run=False) + def test_many_threads_nonblock(self, size): # Test multiple concurrent put() and get(block=False) - N = 50 q = self.q inputs = list(range(10000)) - results = self.run_threads(N, q, inputs, + results = self.run_threads(size, q, inputs, self.feed, self.consume_nonblock) self.assertEqual(sorted(results), inputs) - def test_many_threads_timeout(self): + @bigmemtest(size=50, memuse=100*2**20, dry_run=False) + def test_many_threads_timeout(self, size): # Test multiple concurrent put() and get(timeout=...) - N = 50 q = self.q inputs = list(range(1000)) - results = self.run_threads(N, q, inputs, + results = self.run_threads(size, q, inputs, self.feed, self.consume_timeout) self.assertEqual(sorted(results), inputs) From e814f43f2c655b931af8ee9e1c128bd1027549fb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 2 Jun 2025 23:31:06 +0300 Subject: [PATCH 5/6] gh-74232: Add a note about roundtrip of non-float numerics in CSV (GH-134963) --- Doc/library/csv.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 533cdf13974be6..5297be17bd708e 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -70,7 +70,7 @@ The :mod:`csv` module defines the following functions: section :ref:`csv-fmt-params`. Each row read from the csv file is returned as a list of strings. No - automatic data type conversion is performed unless the ``QUOTE_NONNUMERIC`` format + automatic data type conversion is performed unless the :data:`QUOTE_NONNUMERIC` format option is specified (in which case unquoted fields are transformed into floats). A short usage example:: @@ -331,8 +331,14 @@ The :mod:`csv` module defines the following constants: Instructs :class:`writer` objects to quote all non-numeric fields. - Instructs :class:`reader` objects to convert all non-quoted fields to type *float*. + Instructs :class:`reader` objects to convert all non-quoted fields to type :class:`float`. + .. note:: + Some numeric types, such as :class:`bool`, :class:`~fractions.Fraction`, + or :class:`~enum.IntEnum`, have a string representation that cannot be + converted to :class:`float`. + They cannot be read in the :data:`QUOTE_NONNUMERIC` and + :data:`QUOTE_STRINGS` modes. .. data:: QUOTE_NONE From df98a47a61a274eb7427c6201ddabec9ffd30b0a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 2 Jun 2025 23:35:41 +0300 Subject: [PATCH 6/6] gh-132813: Improve error messages for incorrect types and values of csv.Dialog attributes (GH-133241) Make them similar to PyArg_Parse error messages, mention None as a possible value, show a wrong type and the string length. --- Lib/test/test_csv.py | 48 ++++++++---- ...-05-01-10-56-44.gh-issue-132813.rKurvp.rst | 2 + Modules/_csv.c | 73 ++++++++----------- 3 files changed, 69 insertions(+), 54 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 9aace57633b0c6..60feab225a107c 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1122,19 +1122,22 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be a 1-character string') + '"quotechar" must be a unicode character or None, ' + 'not a string of length 0') mydialect.quotechar = "''" with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be a 1-character string') + '"quotechar" must be a unicode character or None, ' + 'not a string of length 2') mydialect.quotechar = 4 with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be string or None, not int') + '"quotechar" must be a unicode character or None, ' + 'not int') def test_delimiter(self): class mydialect(csv.Dialect): @@ -1151,31 +1154,32 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be a 1-character string') + '"delimiter" must be a unicode character, ' + 'not a string of length 3') mydialect.delimiter = "" with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be a 1-character string') + '"delimiter" must be a unicode character, not a string of length 0') mydialect.delimiter = b"," with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not bytes') + '"delimiter" must be a unicode character, not bytes') mydialect.delimiter = 4 with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not int') + '"delimiter" must be a unicode character, not int') mydialect.delimiter = None with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not NoneType') + '"delimiter" must be a unicode character, not NoneType') def test_escapechar(self): class mydialect(csv.Dialect): @@ -1189,20 +1193,32 @@ class mydialect(csv.Dialect): self.assertEqual(d.escapechar, "\\") mydialect.escapechar = "" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not a string of length 0') mydialect.escapechar = "**" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not a string of length 2') mydialect.escapechar = b"*" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not bytes'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not bytes') mydialect.escapechar = 4 - with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not int'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not int') def test_lineterminator(self): class mydialect(csv.Dialect): @@ -1223,7 +1239,13 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"lineterminator" must be a string') + '"lineterminator" must be a string, not int') + + mydialect.lineterminator = None + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"lineterminator" must be a string, not NoneType') def test_invalid_chars(self): def create_invalid(field_name, value, **kwargs): diff --git a/Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst b/Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst new file mode 100644 index 00000000000000..55608528a4564b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-01-10-56-44.gh-issue-132813.rKurvp.rst @@ -0,0 +1,2 @@ +Improve error messages for incorrect types and values of :class:`csv.Dialect` +attributes. diff --git a/Modules/_csv.c b/Modules/_csv.c index e5ae853590bf2c..2e04136e0ac657 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -237,7 +237,7 @@ _set_int(const char *name, int *target, PyObject *src, int dflt) int value; if (!PyLong_CheckExact(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be an integer", name); + "\"%s\" must be an integer, not %T", name, src); return -1; } value = PyLong_AsInt(src); @@ -255,27 +255,29 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt if (src == NULL) { *target = dflt; } - else { + else if (src == Py_None) { *target = NOT_SET; - if (src != Py_None) { - if (!PyUnicode_Check(src)) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be string or None, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; - } - Py_ssize_t len = PyUnicode_GetLength(src); - if (len < 0) { - return -1; - } - if (len != 1) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); - return -1; - } - *target = PyUnicode_READ_CHAR(src, 0); + } + else { + // similar to PyArg_Parse("C?") + if (!PyUnicode_Check(src)) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, not %T", + name, src); + return -1; + } + Py_ssize_t len = PyUnicode_GetLength(src); + if (len < 0) { + return -1; } + if (len != 1) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, " + "not a string of length %zd", + name, len); + return -1; + } + *target = PyUnicode_READ_CHAR(src, 0); } return 0; } @@ -287,11 +289,12 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) *target = dflt; } else { + // similar to PyArg_Parse("C") if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be string, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; + "\"%s\" must be a unicode character, not %T", + name, src); + return -1; } Py_ssize_t len = PyUnicode_GetLength(src); if (len < 0) { @@ -299,8 +302,9 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) } if (len != 1) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); + "\"%s\" must be a unicode character, " + "not a string of length %zd", + name, len); return -1; } *target = PyUnicode_READ_CHAR(src, 0); @@ -314,16 +318,12 @@ _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) if (src == NULL) *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); else { - if (src == Py_None) - *target = NULL; - else if (!PyUnicode_Check(src)) { + if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a string", name); + "\"%s\" must be a string, not %T", name, src); return -1; } - else { - Py_XSETREF(*target, Py_NewRef(src)); - } + Py_XSETREF(*target, Py_NewRef(src)); } return 0; } @@ -533,11 +533,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) /* validate options */ if (dialect_check_quoting(self->quoting)) goto err; - if (self->delimiter == NOT_SET) { - PyErr_SetString(PyExc_TypeError, - "\"delimiter\" must be a 1-character string"); - goto err; - } if (quotechar == Py_None && quoting == NULL) self->quoting = QUOTE_NONE; if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) { @@ -545,10 +540,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) "quotechar must be set if quoting enabled"); goto err; } - if (self->lineterminator == NULL) { - PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); - goto err; - } if (dialect_check_char("delimiter", self->delimiter, self, true) || dialect_check_char("escapechar", self->escapechar, self, !self->skipinitialspace) ||