From c07cadccb7ac988131335cac2bea4c2b8c2d9cbe Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 01:45:49 +0200 Subject: [PATCH 01/14] chunk --- Modules/_csv.c | 160 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 146 insertions(+), 14 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 2e04136e0ac657..c9c6c8288b836d 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -722,6 +722,45 @@ parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) return 0; } +static int +parse_add_substring(ReaderObj *self, _csvstate *module_state, + PyObject* lineobj, Py_ssize_t start, Py_ssize_t end) +{ + int kind; + const void *data; + Py_UCS4 *dest; + Py_ssize_t field_limit; + + Py_ssize_t len = end - start; + if (len <= 0) { + return 0; + } + + field_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit); + if (self->field_len + len > field_limit) { + PyErr_Format(module_state->error_obj, + "field larger than field limit (%zd)", + field_limit); + return -1; + } + + while (self->field_len + len > self->field_size) { + if (!parse_grow_buff(self)) + return -1; + } + + kind = PyUnicode_KIND(lineobj); + data = PyUnicode_DATA(lineobj); + dest = self->field + self->field_len; + + for (Py_ssize_t i = 0; i < len; ++i) { + dest[i] = PyUnicode_READ(kind, data, start + i); + } + + self->field_len += len; + return 0; +} + static int parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) { @@ -923,11 +962,9 @@ Reader_iternext(PyObject *op) ReaderObj *self = _ReaderObj_CAST(op); PyObject *fields = NULL; - Py_UCS4 c; - Py_ssize_t pos, linelen; - int kind; - const void *data; + Py_ssize_t pos, linelen, chunk_end, p; PyObject *lineobj; + DialectObj *dialect; _csvstate *module_state = _csv_state_from_type(Py_TYPE(self), "Reader.__next__"); @@ -937,13 +974,16 @@ Reader_iternext(PyObject *op) if (parse_reset(self) < 0) return NULL; + + dialect = self->dialect; + do { lineobj = PyIter_Next(self->input_iter); if (lineobj == NULL) { /* End of input OR exception */ if (!PyErr_Occurred() && (self->field_len != 0 || self->state == IN_QUOTED_FIELD)) { - if (self->dialect->strict) + if (dialect->strict) PyErr_SetString(module_state->error_obj, "unexpected end of data"); else if (parse_save_field(self) >= 0) @@ -962,17 +1002,109 @@ Reader_iternext(PyObject *op) return NULL; } ++self->line_num; - kind = PyUnicode_KIND(lineobj); - data = PyUnicode_DATA(lineobj); - pos = 0; + linelen = PyUnicode_GET_LENGTH(lineobj); - while (linelen--) { - c = PyUnicode_READ(kind, data, pos); - if (parse_process_char(self, module_state, c) < 0) { - Py_DECREF(lineobj); - goto err; + pos = 0; + + while (pos < linelen) { + switch (self->state) { + case IN_FIELD: + chunk_end = linelen; + + p = PyUnicode_FindChar(lineobj, dialect->delimiter, pos, linelen, 1); + if (p >= 0 && p < chunk_end) { + chunk_end = p; + } else if (p == -2) { + Py_DECREF(lineobj); + goto err; + } + if (dialect->escapechar != NOT_SET) { + p = PyUnicode_FindChar(lineobj, dialect->escapechar, pos, linelen, 1); + if (p >= 0 && p < chunk_end) { + chunk_end = p; + } else if (p == -2) { + Py_DECREF(lineobj); + goto err; + } + } + p = PyUnicode_FindChar(lineobj, '\n', pos, linelen, 1); + if (p >= 0 && p < chunk_end) { + chunk_end = p; + } else if (p == -2) { + Py_DECREF(lineobj); + goto err; + } + p = PyUnicode_FindChar(lineobj, '\r', pos, linelen, 1); + if (p >= 0 && p < chunk_end) { + chunk_end = p; + } else if (p == -2) { + Py_DECREF(lineobj); + goto err; + } + + if (chunk_end > pos) { + if (parse_add_substring(self, module_state, lineobj, pos, chunk_end) < 0) { + Py_DECREF(lineobj); + goto err; + } + } + pos = chunk_end; + + if (pos < linelen) { + Py_UCS4 c = PyUnicode_READ_CHAR(lineobj, pos); + if (parse_process_char(self, module_state, c) < 0) { + Py_DECREF(lineobj); + goto err; + } + pos++; + } + break; + case IN_QUOTED_FIELD: + chunk_end = linelen; + + p = PyUnicode_FindChar(lineobj, dialect->quotechar, pos, linelen, 1); + if (p >= 0 && p < chunk_end) { + chunk_end = p; + } else if (p == -2) { + Py_DECREF(lineobj); + goto err; + } + if (dialect->escapechar != NOT_SET) { + p = PyUnicode_FindChar(lineobj, dialect->escapechar, pos, linelen, 1); + if (p >= 0 && p < chunk_end) { + chunk_end = p; + } else if (p == -2) { + Py_DECREF(lineobj); + goto err; + } + } + + if (chunk_end > pos) { + if (parse_add_substring(self, module_state, lineobj, pos, chunk_end) < 0) { + Py_DECREF(lineobj); + goto err; + } + } + pos = chunk_end; + + if (pos < linelen) { + Py_UCS4 c = PyUnicode_READ_CHAR(lineobj, pos); + if (parse_process_char(self, module_state, c) < 0) { + Py_DECREF(lineobj); + goto err; + } + pos++; + } + break; + default: + Py_UCS4 c = PyUnicode_READ_CHAR(lineobj, pos); + if (parse_process_char(self, module_state, c) < 0) { + Py_DECREF(lineobj); + goto err; + } + pos++; + break; } - pos++; } Py_DECREF(lineobj); if (parse_process_char(self, module_state, EOL) < 0) From 78878917b2379408ea99406c6d584ccf279ebf78 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 02:05:36 +0200 Subject: [PATCH 02/14] tests --- Lib/test/test_csv.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 60feab225a107c..55ab9b6c9eb6c0 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -373,6 +373,7 @@ def test_read_oddinputs(self): self.assertRaises(csv.Error, self._read_test, ['"ab"c'], None, strict = 1) self._read_test(['"ab"c'], [['abc']], doublequote = 0) + self._read_test([",,,"], [["", "", "", ""]]) self.assertRaises(csv.Error, self._read_test, [b'abc'], None) @@ -423,6 +424,10 @@ def test_read_escape(self): self._read_test(['a,\0b,c'], [['a', 'b', 'c']], escapechar='\0') self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar=None) self._read_test(['a,\\b,c'], [['a', '\\b', 'c']]) + # '"abc\" with escapechar='\' -> ESCAPE_IN_QUOTED_FIELD + EOL -> '\n' appended + self._read_test(['"abc\\'], [["abc\n"]], escapechar="\\") + with self.assertRaises(csv.Error): + self._read_test(['"abc\\'], None, escapechar="\\", strict=True) def test_read_quoting(self): self._read_test(['1,",3,",5'], [['1', ',3,', '5']]) @@ -513,6 +518,31 @@ def test_read_linenum(self): self.assertRaises(StopIteration, next, r) self.assertEqual(r.line_num, 3) + def test_read_linenum_multiline_record(self): + r = csv.reader(['"a', 'b",c', "d,e"]) + self.assertEqual(next(r), ["ab", "c"]) + self.assertEqual(r.line_num, 2) + self.assertEqual(next(r), ["d", "e"]) + self.assertEqual(r.line_num, 3) + with self.assertRaises(StopIteration): + next(r) + self.assertEqual(r.line_num, 3) + + def test_read_with_unicode_delimiter_and_quotechar(self): + self._read_test(["αλβλγ"], [["α", "β", "γ"]], delimiter="λ") + self._read_test( + ["אαאλאβאλאγא"], [["α", "β", "γ"]], delimiter="λ", quotechar="א" + ) + + # non-BMP + delim, quote = "😂", "😺" + self._read_test( + [f"{quote}a{quote}{delim}{quote}b{quote}"], + [["a", "b"]], + delimiter=delim, + quotechar=quote, + ) + def test_roundtrip_quoteed_newlines(self): rows = [ ['\na', 'b\nc', 'd\n'], From 1516b759a3a88b543611aa1c62ab618b54dfd24f Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 02:41:58 +0200 Subject: [PATCH 03/14] blurb --- .../next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst diff --git a/Misc/NEWS.d/next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst b/Misc/NEWS.d/next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst new file mode 100644 index 00000000000000..3c9ed740a9707e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst @@ -0,0 +1 @@ +Speed up :class:`~csv.reader` by 1.4x. From dabe3c12b7601a0c4a6545eb0c3e60d16271ce50 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 02:42:11 +0200 Subject: [PATCH 04/14] whatsnew --- Doc/whatsnew/3.15.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 43c40e4d0f3154..6a0a8fd602f428 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -423,11 +423,11 @@ zlib Optimizations ============= -module_name ------------ - -* TODO +csv +--- +* The :meth:`csv.reader` has been optimized, and is around 1.4x faster. + (Contributed by Maurycy Pawłowski-Wieroński in :gh:`XXX`.) Deprecated From e01263b9a372c5f8bdf53df19a7cf2538098446a Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 02:48:06 +0200 Subject: [PATCH 05/14] correct gh issue --- Doc/whatsnew/3.15.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 6a0a8fd602f428..e91ced290a1575 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -427,7 +427,7 @@ csv --- * The :meth:`csv.reader` has been optimized, and is around 1.4x faster. - (Contributed by Maurycy Pawłowski-Wieroński in :gh:`XXX`.) + (Contributed by Maurycy Pawłowski-Wieroński in :gh:`138214`.) Deprecated From 502a5d44463113f0dcb028117439043b14c44060 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 02:59:50 +0200 Subject: [PATCH 06/14] a label can only be part of a statement and a declaration is not a statement --- Modules/_csv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index c9c6c8288b836d..a1429f1ce612ef 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -965,6 +965,7 @@ Reader_iternext(PyObject *op) Py_ssize_t pos, linelen, chunk_end, p; PyObject *lineobj; DialectObj *dialect; + Py_UCS4 c; _csvstate *module_state = _csv_state_from_type(Py_TYPE(self), "Reader.__next__"); @@ -1051,7 +1052,7 @@ Reader_iternext(PyObject *op) pos = chunk_end; if (pos < linelen) { - Py_UCS4 c = PyUnicode_READ_CHAR(lineobj, pos); + c = PyUnicode_READ_CHAR(lineobj, pos); if (parse_process_char(self, module_state, c) < 0) { Py_DECREF(lineobj); goto err; @@ -1088,7 +1089,7 @@ Reader_iternext(PyObject *op) pos = chunk_end; if (pos < linelen) { - Py_UCS4 c = PyUnicode_READ_CHAR(lineobj, pos); + c = PyUnicode_READ_CHAR(lineobj, pos); if (parse_process_char(self, module_state, c) < 0) { Py_DECREF(lineobj); goto err; @@ -1097,7 +1098,7 @@ Reader_iternext(PyObject *op) } break; default: - Py_UCS4 c = PyUnicode_READ_CHAR(lineobj, pos); + c = PyUnicode_READ_CHAR(lineobj, pos); if (parse_process_char(self, module_state, c) < 0) { Py_DECREF(lineobj); goto err; From 50cce09861c317adb44feb8c85c1a12a8ee627b9 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 11:45:06 +0200 Subject: [PATCH 07/14] func --- Doc/whatsnew/3.15.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index e91ced290a1575..dbbdb48ced6d4d 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -426,7 +426,7 @@ Optimizations csv --- -* The :meth:`csv.reader` has been optimized, and is around 1.4x faster. +* The :func:`csv.reader` has been optimized, and is around 1.4x faster. (Contributed by Maurycy Pawłowski-Wieroński in :gh:`138214`.) From 41c11dfb9a93836b17170d860b668af88c4baabe Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 11:54:45 +0200 Subject: [PATCH 08/14] macros --- Modules/_csv.c | 95 +++++++++++++++++++------------------------------- 1 file changed, 35 insertions(+), 60 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index a1429f1ce612ef..3b986640132d53 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -967,6 +967,30 @@ Reader_iternext(PyObject *op) DialectObj *dialect; Py_UCS4 c; +#define FIND_AND_UPDATE_CHUNK_END(c) \ + do \ + { \ + p = PyUnicode_FindChar(lineobj, (c), pos, linelen, 1); \ + if (p == -2) { \ + Py_DECREF(lineobj); \ + goto err; \ + } \ + if (p >= 0 && p < chunk_end) { \ + chunk_end = p; \ + } \ + } while (0) + +#define PROCESS_CHAR_AND_ADVANCE() \ + do \ + { \ + c = PyUnicode_READ_CHAR(lineobj, pos); \ + if (parse_process_char(self, module_state, c) < 0) { \ + Py_DECREF(lineobj); \ + goto err; \ + } \ + pos++; \ + } while (0) + _csvstate *module_state = _csv_state_from_type(Py_TYPE(self), "Reader.__next__"); if (module_state == NULL) { @@ -1012,36 +1036,12 @@ Reader_iternext(PyObject *op) case IN_FIELD: chunk_end = linelen; - p = PyUnicode_FindChar(lineobj, dialect->delimiter, pos, linelen, 1); - if (p >= 0 && p < chunk_end) { - chunk_end = p; - } else if (p == -2) { - Py_DECREF(lineobj); - goto err; - } + FIND_AND_UPDATE_CHUNK_END(dialect->delimiter); if (dialect->escapechar != NOT_SET) { - p = PyUnicode_FindChar(lineobj, dialect->escapechar, pos, linelen, 1); - if (p >= 0 && p < chunk_end) { - chunk_end = p; - } else if (p == -2) { - Py_DECREF(lineobj); - goto err; - } - } - p = PyUnicode_FindChar(lineobj, '\n', pos, linelen, 1); - if (p >= 0 && p < chunk_end) { - chunk_end = p; - } else if (p == -2) { - Py_DECREF(lineobj); - goto err; - } - p = PyUnicode_FindChar(lineobj, '\r', pos, linelen, 1); - if (p >= 0 && p < chunk_end) { - chunk_end = p; - } else if (p == -2) { - Py_DECREF(lineobj); - goto err; + FIND_AND_UPDATE_CHUNK_END(dialect->escapechar); } + FIND_AND_UPDATE_CHUNK_END('\n'); + FIND_AND_UPDATE_CHUNK_END('\r'); if (chunk_end > pos) { if (parse_add_substring(self, module_state, lineobj, pos, chunk_end) < 0) { @@ -1052,32 +1052,15 @@ Reader_iternext(PyObject *op) pos = chunk_end; if (pos < linelen) { - c = PyUnicode_READ_CHAR(lineobj, pos); - if (parse_process_char(self, module_state, c) < 0) { - Py_DECREF(lineobj); - goto err; - } - pos++; + PROCESS_CHAR_AND_ADVANCE(); } break; case IN_QUOTED_FIELD: chunk_end = linelen; - p = PyUnicode_FindChar(lineobj, dialect->quotechar, pos, linelen, 1); - if (p >= 0 && p < chunk_end) { - chunk_end = p; - } else if (p == -2) { - Py_DECREF(lineobj); - goto err; - } + FIND_AND_UPDATE_CHUNK_END(dialect->quotechar); if (dialect->escapechar != NOT_SET) { - p = PyUnicode_FindChar(lineobj, dialect->escapechar, pos, linelen, 1); - if (p >= 0 && p < chunk_end) { - chunk_end = p; - } else if (p == -2) { - Py_DECREF(lineobj); - goto err; - } + FIND_AND_UPDATE_CHUNK_END(dialect->escapechar); } if (chunk_end > pos) { @@ -1089,21 +1072,11 @@ Reader_iternext(PyObject *op) pos = chunk_end; if (pos < linelen) { - c = PyUnicode_READ_CHAR(lineobj, pos); - if (parse_process_char(self, module_state, c) < 0) { - Py_DECREF(lineobj); - goto err; - } - pos++; + PROCESS_CHAR_AND_ADVANCE(); } break; default: - c = PyUnicode_READ_CHAR(lineobj, pos); - if (parse_process_char(self, module_state, c) < 0) { - Py_DECREF(lineobj); - goto err; - } - pos++; + PROCESS_CHAR_AND_ADVANCE(); break; } } @@ -1116,6 +1089,8 @@ Reader_iternext(PyObject *op) self->fields = NULL; err: return fields; +#undef PROCESS_CHAR_AND_ADVANCE +#undef FIND_AND_UPDATE_CHUNK_END } static void From acbfaa0f12be6f4f858495612527bb735df584c4 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 11:58:54 +0200 Subject: [PATCH 09/14] comment --- Modules/_csv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Modules/_csv.c b/Modules/_csv.c index 3b986640132d53..69232d0e75e98e 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1032,6 +1032,10 @@ Reader_iternext(PyObject *op) pos = 0; while (pos < linelen) { + /* For IN_FIELD and IN_QUOTED_FIELD states, optimize by finding + * chunks of characters that can be processed together up to the + * next special character (eg: delimiter, quote, escape). + */ switch (self->state) { case IN_FIELD: chunk_end = linelen; From 6b9088781efc4286250bc45375b4f5891e3b2ce3 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 12:30:43 +0200 Subject: [PATCH 10/14] reduce the diff --- Modules/_csv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 69232d0e75e98e..e57fd465369b25 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1027,9 +1027,8 @@ Reader_iternext(PyObject *op) return NULL; } ++self->line_num; - - linelen = PyUnicode_GET_LENGTH(lineobj); pos = 0; + linelen = PyUnicode_GET_LENGTH(lineobj); while (pos < linelen) { /* For IN_FIELD and IN_QUOTED_FIELD states, optimize by finding From f2d56daf9a887ba08bc1b139b4018a2a2779e830 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 12:40:51 +0200 Subject: [PATCH 11/14] do not be clever --- Modules/_csv.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index e57fd465369b25..0b7c7e37e7e39c 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -964,7 +964,6 @@ Reader_iternext(PyObject *op) PyObject *fields = NULL; Py_ssize_t pos, linelen, chunk_end, p; PyObject *lineobj; - DialectObj *dialect; Py_UCS4 c; #define FIND_AND_UPDATE_CHUNK_END(c) \ @@ -1000,15 +999,13 @@ Reader_iternext(PyObject *op) if (parse_reset(self) < 0) return NULL; - dialect = self->dialect; - do { lineobj = PyIter_Next(self->input_iter); if (lineobj == NULL) { /* End of input OR exception */ if (!PyErr_Occurred() && (self->field_len != 0 || self->state == IN_QUOTED_FIELD)) { - if (dialect->strict) + if (self->dialect->strict) PyErr_SetString(module_state->error_obj, "unexpected end of data"); else if (parse_save_field(self) >= 0) @@ -1039,9 +1036,9 @@ Reader_iternext(PyObject *op) case IN_FIELD: chunk_end = linelen; - FIND_AND_UPDATE_CHUNK_END(dialect->delimiter); - if (dialect->escapechar != NOT_SET) { - FIND_AND_UPDATE_CHUNK_END(dialect->escapechar); + FIND_AND_UPDATE_CHUNK_END(self->dialect->delimiter); + if (self->dialect->escapechar != NOT_SET) { + FIND_AND_UPDATE_CHUNK_END(self->dialect->escapechar); } FIND_AND_UPDATE_CHUNK_END('\n'); FIND_AND_UPDATE_CHUNK_END('\r'); @@ -1061,9 +1058,9 @@ Reader_iternext(PyObject *op) case IN_QUOTED_FIELD: chunk_end = linelen; - FIND_AND_UPDATE_CHUNK_END(dialect->quotechar); - if (dialect->escapechar != NOT_SET) { - FIND_AND_UPDATE_CHUNK_END(dialect->escapechar); + FIND_AND_UPDATE_CHUNK_END(self->dialect->quotechar); + if (self->dialect->escapechar != NOT_SET) { + FIND_AND_UPDATE_CHUNK_END(self->dialect->escapechar); } if (chunk_end > pos) { From b4b8dbc67fb629438446ad961b5425cf81f8c053 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 12:42:23 +0200 Subject: [PATCH 12/14] keep reducing the diff --- Modules/_csv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 0b7c7e37e7e39c..9505a3ea314663 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -962,9 +962,9 @@ Reader_iternext(PyObject *op) ReaderObj *self = _ReaderObj_CAST(op); PyObject *fields = NULL; + Py_UCS4 c; Py_ssize_t pos, linelen, chunk_end, p; PyObject *lineobj; - Py_UCS4 c; #define FIND_AND_UPDATE_CHUNK_END(c) \ do \ From 4d903d723fdd49670f6c26bdf11f1d92f07713c0 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 13:34:23 +0200 Subject: [PATCH 13/14] docs --- Doc/whatsnew/3.15.rst | 2 +- .../next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index dbbdb48ced6d4d..333f9b1f6d20e0 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -426,7 +426,7 @@ Optimizations csv --- -* The :func:`csv.reader` has been optimized, and is around 1.4x faster. +* The :func:`csv.reader` has been optimized, and is around 2x faster. (Contributed by Maurycy Pawłowski-Wieroński in :gh:`138214`.) diff --git a/Misc/NEWS.d/next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst b/Misc/NEWS.d/next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst index 3c9ed740a9707e..e2b226b90ae447 100644 --- a/Misc/NEWS.d/next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst +++ b/Misc/NEWS.d/next/Library/2025-08-28-02-41-14.gh-issue-138213.8m2OO9.rst @@ -1 +1 @@ -Speed up :class:`~csv.reader` by 1.4x. +Speed up :class:`~csv.reader` by 2x. From 7ac8785772998aff65f19fdc1485bdd6b7f3ca68 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Thu, 28 Aug 2025 13:34:49 +0200 Subject: [PATCH 14/14] reduce the diff --- Modules/_csv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 9505a3ea314663..e21d2eeab83fde 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -998,7 +998,6 @@ Reader_iternext(PyObject *op) if (parse_reset(self) < 0) return NULL; - do { lineobj = PyIter_Next(self->input_iter); if (lineobj == NULL) {