From ce7c135c4df91b2e3b19f18da66e493b61277c9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 26 Aug 2024 14:50:59 +0200
Subject: [PATCH 01/20] add tests for C API `codecs`
---
 Modules/_testcapi/codec.c | 237 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 232 insertions(+), 5 deletions(-)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index d13f51e20331a1..8fa3b57194328f 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -1,17 +1,244 @@
 #include "parts.h"
-#include "util.h"
 
+// === Codecs registration and un-registration ================================
+
+static PyObject *
+codec_register(PyObject *Py_UNUSED(module), PyObject *search_function)
+{
+    if (PyCodec_Register(search_function) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function)
+{
+    if (PyCodec_Unregister(search_function) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    const char *encoding;   // should not be NULL
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False;
+}
+
+// === Codecs encoding and decoding interfaces ================================
+
+static PyObject *
+codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *input;
+    const char *encoding;   // should not be NULL
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    return PyCodec_Encode(input, encoding, errors);
+}
+
+static PyObject *
+codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *input;
+    const char *encoding;   // should not be NULL
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    return PyCodec_Decode(input, encoding, errors);
+}
+
+static PyObject *
+codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    const char *encoding;  // should not be NULL
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    return PyCodec_Encoder(encoding);
+}
+
+static PyObject *
+codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    const char *encoding;  // should not be NULL
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    return PyCodec_Decoder(encoding);
+}
+
+static PyObject *
+codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    const char *encoding;   // should not be NULL
+    const char *errors;     // should not be NULL
+    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    assert(errors != NULL);
+    return PyCodec_IncrementalEncoder(encoding, errors);
+}
+
+static PyObject *
+codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    const char *encoding;   // should not be NULL
+    const char *errors;     // should not be NULL
+    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    assert(errors != NULL);
+    return PyCodec_IncrementalDecoder(encoding, errors);
+}
+
+static PyObject *
+codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    const char *encoding;  // should not be NULL
+    PyObject *stream;
+    const char *errors;    // should not be NULL
+    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    assert(errors != NULL);
+    return PyCodec_StreamReader(encoding, stream, errors);
+}
+
+static PyObject *
+codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    const char *encoding;  // should not be NULL
+    PyObject *stream;
+    const char *errors;    // should not be NULL
+    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    assert(errors != NULL);
+    return PyCodec_StreamWriter(encoding, stream, errors);
+}
+
+// === Codecs errors handlers =================================================
+
+static PyObject *
+codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    const char *encoding;  // should not be NULL
+    PyObject *error;
+    if (!PyArg_ParseTuple(args, "zO", &encoding, &error)) {
+        return NULL;
+    }
+    assert(encoding != NULL);
+    if (PyCodec_RegisterError(encoding, error) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+codec_lookup_error(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    const char *encoding;   // can be NULL
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+        return NULL;
+    }
+    return PyCodec_LookupError(encoding);
+}
+
+static PyObject *
+codec_strict_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+    assert(exc != NULL);
+    return PyCodec_StrictErrors(exc);
+}
+
+static PyObject *
+codec_ignore_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+    assert(exc != NULL);
+    return PyCodec_IgnoreErrors(exc);
+}
+
+static PyObject *
+codec_replace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+    assert(exc != NULL);
+    return PyCodec_ReplaceErrors(exc);
+}
+
+static PyObject *
+codec_xmlcharrefreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+    assert(exc != NULL);
+    return PyCodec_XMLCharRefReplaceErrors(exc);
+}
+
+static PyObject *
+codec_backslashreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+    assert(exc != NULL);
+    return PyCodec_BackslashReplaceErrors(exc);
+}
+
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000
+static PyObject *
+codec_namereplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+    assert(exc != NULL);
+    return PyCodec_NameReplaceErrors(exc);
+}
+#endif
 
 static PyMethodDef test_methods[] = {
-    {NULL},
+    /* codecs registration */
+    {"codec_register", codec_register, METH_O},
+    {"codec_unregister", codec_unregister, METH_O},
+    {"codec_known_encoding", codec_known_encoding, METH_VARARGS},
+    /* encoding and decoding interface */
+    {"codec_encode", codec_encode, METH_VARARGS},
+    {"codec_decode", codec_decode, METH_VARARGS},
+    {"codec_encoder", codec_encoder, METH_VARARGS},
+    {"codec_decoder", codec_decoder, METH_VARARGS},
+    {"codec_incremental_encoder", codec_incremental_encoder, METH_VARARGS},
+    {"codec_incremental_decoder", codec_incremental_decoder, METH_VARARGS},
+    {"codec_stream_reader", codec_stream_reader, METH_VARARGS},
+    {"codec_stream_writer", codec_stream_writer, METH_VARARGS},
+    /* error handling */
+    {"codec_register_error", codec_register_error, METH_VARARGS},
+    {"codec_lookup_error", codec_lookup_error, METH_VARARGS},
+    {"codec_strict_errors", codec_strict_errors, METH_O},
+    {"codec_ignore_errors", codec_ignore_errors, METH_O},
+    {"codec_replace_errors", codec_replace_errors, METH_O},
+    {"codec_xmlcharrefreplace_errors", codec_xmlcharrefreplace_errors, METH_O},
+    {"codec_backslashreplace_errors", codec_backslashreplace_errors, METH_O},
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000
+    {"codec_namereplace_errors", codec_namereplace_errors, METH_O},
+#endif
+    {NULL, NULL, 0, NULL},
 };
 
 int
-_PyTestCapi_Init_Codec(PyObject *m)
+_PyTestCapi_Init_Codec(PyObject *module)
 {
-    if (PyModule_AddFunctions(m, test_methods) < 0){
+    if (PyModule_AddFunctions(module, test_methods) < 0) {
         return -1;
     }
-
     return 0;
 }
From f9e350a18c7d1d2f41b596a0ff73f4454b9bd1cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 26 Aug 2024 14:51:10 +0200
Subject: [PATCH 02/20] add Python tests for `_codecs`
---
 Lib/test/test_capi/test_codecs.py | 232 +++++++++++++++++++++++++++++-
 1 file changed, 230 insertions(+), 2 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index bd521a509d07ec..f3e96cadf67066 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -1,5 +1,10 @@
-import unittest
+import codecs
+import contextlib
+import io
 import sys
+import unittest
+import unittest.mock as mock
+import _testcapi
 from test.support import import_helper
 
 _testlimitedcapi = import_helper.import_module('_testlimitedcapi')
@@ -7,7 +12,7 @@
 NULL = None
 
 
-class CAPITest(unittest.TestCase):
+class CAPIUnicodeTest(unittest.TestCase):
     # TODO: Test the following functions:
     #
     #   PyUnicode_BuildEncodingMap
@@ -516,5 +521,228 @@ def test_asrawunicodeescapestring(self):
         # CRASHES asrawunicodeescapestring(NULL)
 
 
+class CAPICodecRegistration(unittest.TestCase):
+
+    def setUp(self):
+        self.enterContext(import_helper.isolated_modules())
+        self.enterContext(import_helper.CleanImport('codecs'))
+        self.codecs = import_helper.import_module('codecs')
+        # Encoding names are normalized internally by converting them
+        # to lowercase and their hyphens are replaced by underscores.
+        self.encoding_name = f'codec_reversed_{id(self)}'
+        # make sure that our custom codec is not already registered
+        self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+        # create the search function without registering yet
+        self._create_custom_codec()
+
+    def _create_custom_codec(self):
+        def codec_encoder(m, errors='strict'):
+            return (type(m)().join(reversed(m)), len(m))
+
+        def codec_decoder(c, errors='strict'):
+            return (type(c)().join(reversed(c)), len(c))
+
+        class IncrementalEncoder(codecs.IncrementalEncoder):
+            def encode(self, input, final=False):
+                return codec_encoder(input)
+
+        class IncrementalDecoder(codecs.IncrementalDecoder):
+            def decode(self, input, final=False):
+                return codec_decoder(input)
+
+        class StreamReader(codecs.StreamReader):
+            def encode(self, input, errors='strict'):
+                return codec_encoder(input, errors=errors)
+
+            def decode(self, input, errors='strict'):
+                return codec_decoder(input, errors=errors)
+
+        class StreamWriter(codecs.StreamWriter):
+            def encode(self, input, errors='strict'):
+                return codec_encoder(input, errors=errors)
+
+            def decode(self, input, errors='strict'):
+                return codec_decoder(input, errors=errors)
+
+        info = codecs.CodecInfo(
+            encode=codec_encoder,
+            decode=codec_decoder,
+            streamreader=StreamReader,
+            streamwriter=StreamWriter,
+            incrementalencoder=IncrementalEncoder,
+            incrementaldecoder=IncrementalDecoder,
+            name=self.encoding_name
+        )
+
+        def search_function(encoding):
+            if encoding == self.encoding_name:
+                return info
+            return None
+
+        self.codec_info = info
+        self.search_function = search_function
+
+    @contextlib.contextmanager
+    def use_custom_encoder(self):
+        self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+        self.codecs.register(self.search_function)
+        yield
+        self.codecs.unregister(self.search_function)
+        self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+
+    def test_codec_register(self):
+        search_function, encoding = self.search_function, self.encoding_name
+        self.assertIsNone(_testcapi.codec_register(search_function))
+        self.assertIs(self.codecs.lookup(encoding), search_function(encoding))
+        self.assertEqual(self.codecs.encode('123', encoding=encoding), '321')
+
+    def test_codec_unregister(self):
+        search_function, encoding = self.search_function, self.encoding_name
+        self.assertRaises(LookupError, self.codecs.lookup, encoding)
+        self.codecs.register(search_function)
+        self.assertIsNone(_testcapi.codec_unregister(search_function))
+        self.assertRaises(LookupError, self.codecs.lookup, encoding)
+
+    def test_codec_known_encoding(self):
+        self.assertRaises(LookupError, self.codecs.lookup, 'unknown-codec')
+        self.assertFalse(_testcapi.codec_known_encoding('unknown-codec'))
+        self.assertFalse(_testcapi.codec_known_encoding('unknown_codec'))
+        self.assertFalse(_testcapi.codec_known_encoding('UNKNOWN-codec'))
+
+        encoding_name = self.encoding_name
+        self.assertRaises(LookupError, self.codecs.lookup, encoding_name)
+        self.codecs.register(self.search_function)
+
+        for name in [
+            encoding_name,
+            encoding_name.upper(),
+            encoding_name.replace('_', '-'),
+        ]:
+            with self.subTest(name):
+                self.assertTrue(_testcapi.codec_known_encoding(name))
+
+    def test_codec_encode(self):
+        encode = _testcapi.codec_encode
+        self.assertEqual(encode('a', 'utf-8', NULL), b'a')
+        self.assertEqual(encode('a', 'utf-8', 'strict'), b'a')
+        self.assertEqual(encode('é', 'ascii', 'ignore'), b'')
+        # todo: add more cases
+        self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
+        # CRASHES encode('a', NULL, 'strict')
+
+    def test_codec_decode(self):
+        decode = _testcapi.codec_decode
+
+        b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80'
+        s = 'a\xa1\u4f60\U0001f600'
+
+        self.assertEqual(decode(b, 'utf-8', 'strict'), s)
+        self.assertEqual(decode(b, 'utf-8', NULL), s)
+        self.assertEqual(decode(b, 'latin1', 'strict'), b.decode('latin1'))
+        self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict')
+        self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
+        self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
+        # todo: add more cases
+
+        # _codecs.decode only reports unknown errors policy when they are
+        # used (it has a fast path for empty bytes); this is different from
+        # PyUnicode_Decode which checks that both the encoding and the errors
+        # policy are recognized.
+        self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '')
+
+        self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict')
+        # CRASHES decode(b, NULL, 'strict')
+
+    def test_codec_encoder(self):
+        with self.use_custom_encoder():
+            encoder = _testcapi.codec_encoder(self.encoding_name)
+            self.assertIs(encoder, self.codec_info.encode)
+
+    def test_codec_decoder(self):
+        with self.use_custom_encoder():
+            decoder = _testcapi.codec_decoder(self.encoding_name)
+            self.assertIs(decoder, self.codec_info.decode)
+
+    def test_codec_incremental_encoder(self):
+        with self.use_custom_encoder():
+            encoder = _testcapi.codec_incremental_encoder(self.encoding_name, 'strict')
+            self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+
+    def test_codec_incremental_decoder(self):
+        with self.use_custom_encoder():
+            decoder = _testcapi.codec_incremental_decoder(self.encoding_name, 'strict')
+            self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+
+    def test_codec_stream_reader(self):
+        with self.use_custom_encoder():
+            encoding, stream = self.encoding_name, io.StringIO()
+            reader = _testcapi.codec_stream_reader(encoding, stream, 'strict')
+            self.assertIsInstance(reader, self.codec_info.streamreader)
+
+    def test_codec_stream_writer(self):
+        with self.use_custom_encoder():
+            encoding, stream = self.encoding_name, io.StringIO()
+            writer = _testcapi.codec_stream_writer(encoding, stream, 'strict')
+            self.assertIsInstance(writer, self.codec_info.streamwriter)
+
+class CAPICodecErrors(unittest.TestCase):
+
+    def setUp(self):
+        self.enterContext(import_helper.isolated_modules())
+        self.enterContext(import_helper.CleanImport('codecs'))
+        self.codecs = import_helper.import_module('codecs')
+
+    def test_codec_register_error(self):
+        self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
+
+        def error_handler(exc):
+            raise exc
+
+        error_handler = mock.Mock(wraps=error_handler)
+        _testcapi.codec_register_error('custom', error_handler)
+
+        self.assertRaises(UnicodeEncodeError, self.codecs.encode,
+                          '\xff', 'ascii', errors='custom')
+        error_handler.assert_called_once()
+        error_handler.reset_mock()
+
+        self.assertRaises(UnicodeDecodeError, self.codecs.decode,
+                          b'\xff', 'ascii', errors='custom')
+        error_handler.assert_called_once()
+
+    def test_codec_lookup_error(self):
+        codec_lookup_error = _testcapi.codec_lookup_error
+        self.assertIs(codec_lookup_error(NULL), self.codecs.strict_errors)
+        self.assertIs(codec_lookup_error('strict'), self.codecs.strict_errors)
+        self.assertIs(codec_lookup_error('ignore'), self.codecs.ignore_errors)
+        self.assertIs(codec_lookup_error('replace'), self.codecs.replace_errors)
+        self.assertIs(codec_lookup_error('xmlcharrefreplace'), self.codecs.xmlcharrefreplace_errors)
+        self.assertIs(codec_lookup_error('namereplace'), self.codecs.namereplace_errors)
+        self.assertRaises(LookupError, codec_lookup_error, 'custom')
+
+    def test_codec_error_handlers(self):
+        exceptions = [
+            UnicodeEncodeError('bad', '', 0, 1, 'reason'),
+            UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
+            UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
+            UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
+        ]
+
+        strict_handler = _testcapi.codec_strict_errors
+        for exc in exceptions:
+            with self.subTest(handler=strict_handler, exc=exc):
+                self.assertRaises(UnicodeEncodeError, strict_handler, exc)
+
+        for handler in [
+            _testcapi.codec_ignore_errors,
+            _testcapi.codec_replace_errors,
+            _testcapi.codec_xmlcharrefreplace_errors,
+            _testcapi.codec_namereplace_errors,
+        ]:
+            for exc in exceptions:
+                with self.subTest(handler=handler, exc=exc):
+                    handler(exc)
+
+
 if __name__ == "__main__":
     unittest.main()
From 15b68116da1f46ebe195ef8e2d02feb129319b1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 26 Aug 2024 17:40:48 +0200
Subject: [PATCH 03/20] fix size bug
---
 Objects/exceptions.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index fda62f159c1540..9bcdc88e1291ca 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2751,7 +2751,7 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
     if (*start<0)
         *start = 0; /*XXX check for values <0*/
     if (*start>=size)
-        *start = size-1;
+        *start = size ? size-1 : 0;
     Py_DECREF(obj);
     return 0;
 }
@@ -2769,7 +2769,7 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
     if (*start<0)
         *start = 0;
     if (*start>=size)
-        *start = size-1;
+        *start = size ? size-1 : 0;
     Py_DECREF(obj);
     return 0;
 }
From 8048ae1d2172df5bace73a3ea30712745d4c6864 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 26 Aug 2024 17:46:04 +0200
Subject: [PATCH 04/20] rename test class
---
 Lib/test/test_capi/test_codecs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index f3e96cadf67066..c7fbca32ac582b 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -521,7 +521,7 @@ def test_asrawunicodeescapestring(self):
         # CRASHES asrawunicodeescapestring(NULL)
 
 
-class CAPICodecRegistration(unittest.TestCase):
+class CAPICodecs(unittest.TestCase):
 
     def setUp(self):
         self.enterContext(import_helper.isolated_modules())
From 8487b4630cb51a07ceb16569446733b1947df05f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 25 Sep 2024 15:04:54 +0200
Subject: [PATCH 05/20] Revert "fix size bug"
This reverts commit 15b68116da1f46ebe195ef8e2d02feb129319b1f.
---
 Objects/exceptions.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 9bcdc88e1291ca..fda62f159c1540 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2751,7 +2751,7 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
     if (*start<0)
         *start = 0; /*XXX check for values <0*/
     if (*start>=size)
-        *start = size ? size-1 : 0;
+        *start = size-1;
     Py_DECREF(obj);
     return 0;
 }
@@ -2769,7 +2769,7 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
     if (*start<0)
         *start = 0;
     if (*start>=size)
-        *start = size ? size-1 : 0;
+        *start = size-1;
     Py_DECREF(obj);
     return 0;
 }
From 0097f2a4c595590d70a04dc61a34c5da08aea011 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 25 Sep 2024 15:24:09 +0200
Subject: [PATCH 06/20] Disable tests that are known to crash.
---
 Lib/test/test_capi/test_codecs.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index c7fbca32ac582b..21cbf76d14dffb 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -722,7 +722,9 @@ def test_codec_lookup_error(self):
 
     def test_codec_error_handlers(self):
         exceptions = [
-            UnicodeEncodeError('bad', '', 0, 1, 'reason'),
+            # A UnicodeError with an empty message currently crashes:
+            # See: https://github.com/python/cpython/issues/123378
+            # UnicodeEncodeError('bad', '', 0, 1, 'reason'),
             UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
             UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
             UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
From 303b13c4c61aa2a6d13fd027c3c31a0b4cef7f9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 25 Sep 2024 17:24:50 +0200
Subject: [PATCH 07/20] address Victor's review
---
 Lib/test/test_capi/test_codecs.py | 15 ++++++++++----
 Modules/_testcapi/codec.c         | 34 +++++++++----------------------
 2 files changed, 21 insertions(+), 28 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 21cbf76d14dffb..2c72a989e32364 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -592,14 +592,21 @@ def use_custom_encoder(self):
 
     def test_codec_register(self):
         search_function, encoding = self.search_function, self.encoding_name
+        # register the search function using the C API
         self.assertIsNone(_testcapi.codec_register(search_function))
         self.assertIs(self.codecs.lookup(encoding), search_function(encoding))
         self.assertEqual(self.codecs.encode('123', encoding=encoding), '321')
+        # unregister the search function using the regular API
+        self.codecs.unregister(search_function)
+        self.assertRaises(LookupError, self.codecs.lookup, encoding)
 
     def test_codec_unregister(self):
         search_function, encoding = self.search_function, self.encoding_name
         self.assertRaises(LookupError, self.codecs.lookup, encoding)
+        # register the search function using the regular API
         self.codecs.register(search_function)
+        self.assertIsNotNone(self.codecs.lookup(encoding))
+        # unregister the search function using the C API
         self.assertIsNone(_testcapi.codec_unregister(search_function))
         self.assertRaises(LookupError, self.codecs.lookup, encoding)
 
@@ -625,16 +632,16 @@ def test_codec_encode(self):
         encode = _testcapi.codec_encode
         self.assertEqual(encode('a', 'utf-8', NULL), b'a')
         self.assertEqual(encode('a', 'utf-8', 'strict'), b'a')
-        self.assertEqual(encode('é', 'ascii', 'ignore'), b'')
-        # todo: add more cases
+        self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]')
+
         self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
         # CRASHES encode('a', NULL, 'strict')
 
     def test_codec_decode(self):
         decode = _testcapi.codec_decode
 
-        b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80'
         s = 'a\xa1\u4f60\U0001f600'
+        b = s.encode()
 
         self.assertEqual(decode(b, 'utf-8', 'strict'), s)
         self.assertEqual(decode(b, 'utf-8', NULL), s)
@@ -642,7 +649,6 @@ def test_codec_decode(self):
         self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict')
         self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
         self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
-        # todo: add more cases
 
         # _codecs.decode only reports unknown errors policy when they are
         # used (it has a fast path for empty bytes); this is different from
@@ -685,6 +691,7 @@ def test_codec_stream_writer(self):
             writer = _testcapi.codec_stream_writer(encoding, stream, 'strict')
             self.assertIsInstance(writer, self.codec_info.streamwriter)
 
+
 class CAPICodecErrors(unittest.TestCase):
 
     def setUp(self):
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 8fa3b57194328f..5cd187bde60fef 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -24,10 +24,9 @@ static PyObject *
 codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;   // should not be NULL
-    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+    if (!PyArg_ParseTuple(args, "s", &encoding)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False;
 }
 
@@ -39,10 +38,9 @@ codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
     PyObject *input;
     const char *encoding;   // should not be NULL
     const char *errors;     // can be NULL
-    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_Encode(input, encoding, errors);
 }
 
@@ -52,10 +50,9 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
     PyObject *input;
     const char *encoding;   // should not be NULL
     const char *errors;     // can be NULL
-    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_Decode(input, encoding, errors);
 }
 
@@ -63,10 +60,9 @@ static PyObject *
 codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;  // should not be NULL
-    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+    if (!PyArg_ParseTuple(args, "s", &encoding)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_Encoder(encoding);
 }
 
@@ -74,10 +70,9 @@ static PyObject *
 codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;  // should not be NULL
-    if (!PyArg_ParseTuple(args, "z", &encoding)) {
+    if (!PyArg_ParseTuple(args, "s", &encoding)) {
         return NULL;
     }
-    assert(encoding != NULL);
     return PyCodec_Decoder(encoding);
 }
 
@@ -86,11 +81,9 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;   // should not be NULL
     const char *errors;     // should not be NULL
-    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
-    assert(errors != NULL);
     return PyCodec_IncrementalEncoder(encoding, errors);
 }
 
@@ -99,11 +92,9 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;   // should not be NULL
     const char *errors;     // should not be NULL
-    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
-    assert(errors != NULL);
     return PyCodec_IncrementalDecoder(encoding, errors);
 }
 
@@ -113,11 +104,9 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
     const char *encoding;  // should not be NULL
     PyObject *stream;
     const char *errors;    // should not be NULL
-    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+    if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
-    assert(errors != NULL);
     return PyCodec_StreamReader(encoding, stream, errors);
 }
 
@@ -127,11 +116,9 @@ codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
     const char *encoding;  // should not be NULL
     PyObject *stream;
     const char *errors;    // should not be NULL
-    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+    if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
         return NULL;
     }
-    assert(encoding != NULL);
-    assert(errors != NULL);
     return PyCodec_StreamWriter(encoding, stream, errors);
 }
 
@@ -142,10 +129,9 @@ codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
 {
     const char *encoding;  // should not be NULL
     PyObject *error;
-    if (!PyArg_ParseTuple(args, "zO", &encoding, &error)) {
+    if (!PyArg_ParseTuple(args, "sO", &encoding, &error)) {
         return NULL;
     }
-    assert(encoding != NULL);
     if (PyCodec_RegisterError(encoding, error) < 0) {
         return NULL;
     }
From 4f474ddc570b6b127b2880f69955496bd6d83f4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 25 Sep 2024 18:13:07 +0200
Subject: [PATCH 08/20] update tests to reflect user errors
---
 Lib/test/test_capi/test_codecs.py | 79 ++++++++++++++++++++++++-------
 Modules/_testcapi/codec.c         | 54 ++++++++++++---------
 2 files changed, 94 insertions(+), 39 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 2c72a989e32364..c57491619d5976 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -1,6 +1,7 @@
 import codecs
 import contextlib
 import io
+import re
 import sys
 import unittest
 import unittest.mock as mock
@@ -10,6 +11,7 @@
 _testlimitedcapi = import_helper.import_module('_testlimitedcapi')
 
 NULL = None
+BAD_ARGUMENT = re.escape('bad argument type for built-in operation')
 
 
 class CAPIUnicodeTest(unittest.TestCase):
@@ -635,7 +637,8 @@ def test_codec_encode(self):
         self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]')
 
         self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
-        # CRASHES encode('a', NULL, 'strict')
+        with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+            encode('a', NULL, 'strict')
 
     def test_codec_decode(self):
         decode = _testcapi.codec_decode
@@ -650,46 +653,90 @@ def test_codec_decode(self):
         self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
         self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
 
-        # _codecs.decode only reports unknown errors policy when they are
-        # used (it has a fast path for empty bytes); this is different from
-        # PyUnicode_Decode which checks that both the encoding and the errors
-        # policy are recognized.
+        # _codecs.decode() only reports unknown errors policy when they are
+        # used; this is different from PyUnicode_Decode() which checks that
+        # both the encoding and the errors policy are recognized before even
+        # attempting to call the decoder.
         self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '')
+        self.assertEqual(decode(b'a', 'utf-8', 'unknown-errors-policy'), 'a')
 
         self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict')
-        # CRASHES decode(b, NULL, 'strict')
+        with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+            decode(b, NULL, 'strict')
 
     def test_codec_encoder(self):
+        codec_encoder = _testcapi.codec_encoder
+
         with self.use_custom_encoder():
-            encoder = _testcapi.codec_encoder(self.encoding_name)
+            encoder = codec_encoder(self.encoding_name)
             self.assertIs(encoder, self.codec_info.encode)
 
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_encoder(NULL)
+
     def test_codec_decoder(self):
+        codec_decoder = _testcapi.codec_decoder
+
         with self.use_custom_encoder():
-            decoder = _testcapi.codec_decoder(self.encoding_name)
+            decoder = codec_decoder(self.encoding_name)
             self.assertIs(decoder, self.codec_info.decode)
 
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_decoder(NULL)
+
     def test_codec_incremental_encoder(self):
+        codec_incremental_encoder = _testcapi.codec_incremental_encoder
+
         with self.use_custom_encoder():
-            encoder = _testcapi.codec_incremental_encoder(self.encoding_name, 'strict')
-            self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+            encoding = self.encoding_name
+
+            for policy in ['strict', NULL]:
+                with self.subTest(policy=policy):
+                    encoder = codec_incremental_encoder(encoding, policy)
+                    self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_incremental_encoder(NULL, 'strict')
 
     def test_codec_incremental_decoder(self):
+        codec_incremental_decoder = _testcapi.codec_incremental_decoder
+
         with self.use_custom_encoder():
-            decoder = _testcapi.codec_incremental_decoder(self.encoding_name, 'strict')
-            self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+            encoding = self.encoding_name
+
+            for policy in ['strict', NULL]:
+                with self.subTest(policy=policy):
+                    decoder = codec_incremental_decoder(encoding, policy)
+                    self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_incremental_decoder(NULL, 'strict')
 
     def test_codec_stream_reader(self):
+        codec_stream_reader = _testcapi.codec_stream_reader
+
         with self.use_custom_encoder():
             encoding, stream = self.encoding_name, io.StringIO()
-            reader = _testcapi.codec_stream_reader(encoding, stream, 'strict')
-            self.assertIsInstance(reader, self.codec_info.streamreader)
+            for policy in ['strict', NULL]:
+                with self.subTest(policy=policy):
+                    writer = codec_stream_reader(encoding, stream, policy)
+                    self.assertIsInstance(writer, self.codec_info.streamreader)
+
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_stream_reader(NULL, stream, 'strict')
 
     def test_codec_stream_writer(self):
+        codec_stream_writer = _testcapi.codec_stream_writer
+
         with self.use_custom_encoder():
             encoding, stream = self.encoding_name, io.StringIO()
-            writer = _testcapi.codec_stream_writer(encoding, stream, 'strict')
-            self.assertIsInstance(writer, self.codec_info.streamwriter)
+            for policy in ['strict', NULL]:
+                with self.subTest(policy=policy):
+                    writer = codec_stream_writer(encoding, stream, policy)
+                    self.assertIsInstance(writer, self.codec_info.streamwriter)
+
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_stream_writer(NULL, stream, 'strict')
 
 
 class CAPICodecErrors(unittest.TestCase):
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 5cd187bde60fef..6aa19c2055d6b6 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -1,5 +1,13 @@
 #include "parts.h"
 
+/*
+ * The Codecs C API assume that 'encoding' is not NULL, lest
+ * it uses PyErr_BadArgument() to set a TypeError exception.
+ *
+ * In this file, we allow to call the functions using None
+ * as NULL to explicitly check this behaviour.
+ */
+
 // === Codecs registration and un-registration ================================
 
 static PyObject *
@@ -23,8 +31,8 @@ codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function)
 static PyObject *
 codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL
-    if (!PyArg_ParseTuple(args, "s", &encoding)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
     return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False;
@@ -36,9 +44,9 @@ static PyObject *
 codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
 {
     PyObject *input;
-    const char *encoding;   // should not be NULL
+    const char *encoding;   // should not be NULL (see top-file comment)
     const char *errors;     // can be NULL
-    if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
         return NULL;
     }
     return PyCodec_Encode(input, encoding, errors);
@@ -48,9 +56,9 @@ static PyObject *
 codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
 {
     PyObject *input;
-    const char *encoding;   // should not be NULL
+    const char *encoding;   // should not be NULL (see top-file comment)
     const char *errors;     // can be NULL
-    if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
         return NULL;
     }
     return PyCodec_Decode(input, encoding, errors);
@@ -59,8 +67,8 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
-    if (!PyArg_ParseTuple(args, "s", &encoding)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
     return PyCodec_Encoder(encoding);
@@ -69,8 +77,8 @@ codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
-    if (!PyArg_ParseTuple(args, "s", &encoding)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
     return PyCodec_Decoder(encoding);
@@ -79,9 +87,9 @@ codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL
-    const char *errors;     // should not be NULL
-    if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
         return NULL;
     }
     return PyCodec_IncrementalEncoder(encoding, errors);
@@ -90,9 +98,9 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL
-    const char *errors;     // should not be NULL
-    if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
         return NULL;
     }
     return PyCodec_IncrementalDecoder(encoding, errors);
@@ -101,10 +109,10 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
+    const char *encoding;   // should not be NULL (see top-file comment)
     PyObject *stream;
-    const char *errors;    // should not be NULL
-    if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
         return NULL;
     }
     return PyCodec_StreamReader(encoding, stream, errors);
@@ -113,10 +121,10 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
+    const char *encoding;   // should not be NULL (see top-file comment)
     PyObject *stream;
-    const char *errors;    // should not be NULL
-    if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
         return NULL;
     }
     return PyCodec_StreamWriter(encoding, stream, errors);
@@ -127,7 +135,7 @@ codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
+    const char *encoding;   // must not be NULL
     PyObject *error;
     if (!PyArg_ParseTuple(args, "sO", &encoding, &error)) {
         return NULL;
From 87ee0d26d4c150795437c06b547c14464e8b94b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 27 Sep 2024 12:52:35 +0200
Subject: [PATCH 09/20] fix C API codec tests
---
 Lib/test/test_capi/test_codecs.py | 68 ++++++++++++++++---------------
 1 file changed, 35 insertions(+), 33 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index c57491619d5976..4865f4384422ac 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -1,6 +1,7 @@
 import codecs
 import contextlib
 import io
+import os
 import re
 import sys
 import unittest
@@ -526,14 +527,13 @@ def test_asrawunicodeescapestring(self):
 class CAPICodecs(unittest.TestCase):
 
     def setUp(self):
-        self.enterContext(import_helper.isolated_modules())
-        self.enterContext(import_helper.CleanImport('codecs'))
-        self.codecs = import_helper.import_module('codecs')
         # Encoding names are normalized internally by converting them
         # to lowercase and their hyphens are replaced by underscores.
         self.encoding_name = f'codec_reversed_{id(self)}'
-        # make sure that our custom codec is not already registered
-        self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+        # Make sure that our custom codec is not already registered (that
+        # way we know whether we correctly unregistered the custom codec
+        # after a test or not).
+        self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
         # create the search function without registering yet
         self._create_custom_codec()
 
@@ -586,41 +586,47 @@ def search_function(encoding):
 
     @contextlib.contextmanager
     def use_custom_encoder(self):
-        self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
-        self.codecs.register(self.search_function)
+        self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
+        codecs.register(self.search_function)
         yield
-        self.codecs.unregister(self.search_function)
-        self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+        codecs.unregister(self.search_function)
+        self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
 
     def test_codec_register(self):
         search_function, encoding = self.search_function, self.encoding_name
         # register the search function using the C API
         self.assertIsNone(_testcapi.codec_register(search_function))
-        self.assertIs(self.codecs.lookup(encoding), search_function(encoding))
-        self.assertEqual(self.codecs.encode('123', encoding=encoding), '321')
+        # in case the test failed before cleaning up
+        self.addCleanup(codecs.unregister, self.search_function)
+        self.assertIs(codecs.lookup(encoding), search_function(encoding))
+        self.assertEqual(codecs.encode('123', encoding=encoding), '321')
         # unregister the search function using the regular API
-        self.codecs.unregister(search_function)
-        self.assertRaises(LookupError, self.codecs.lookup, encoding)
+        codecs.unregister(search_function)
+        self.assertRaises(LookupError, codecs.lookup, encoding)
 
     def test_codec_unregister(self):
         search_function, encoding = self.search_function, self.encoding_name
-        self.assertRaises(LookupError, self.codecs.lookup, encoding)
+        self.assertRaises(LookupError, codecs.lookup, encoding)
         # register the search function using the regular API
-        self.codecs.register(search_function)
-        self.assertIsNotNone(self.codecs.lookup(encoding))
+        codecs.register(search_function)
+        # in case the test failed before cleaning up
+        self.addCleanup(codecs.unregister, self.search_function)
+        self.assertIsNotNone(codecs.lookup(encoding))
         # unregister the search function using the C API
         self.assertIsNone(_testcapi.codec_unregister(search_function))
-        self.assertRaises(LookupError, self.codecs.lookup, encoding)
+        self.assertRaises(LookupError, codecs.lookup, encoding)
 
     def test_codec_known_encoding(self):
-        self.assertRaises(LookupError, self.codecs.lookup, 'unknown-codec')
+        self.assertRaises(LookupError, codecs.lookup, 'unknown-codec')
         self.assertFalse(_testcapi.codec_known_encoding('unknown-codec'))
         self.assertFalse(_testcapi.codec_known_encoding('unknown_codec'))
         self.assertFalse(_testcapi.codec_known_encoding('UNKNOWN-codec'))
 
         encoding_name = self.encoding_name
-        self.assertRaises(LookupError, self.codecs.lookup, encoding_name)
-        self.codecs.register(self.search_function)
+        self.assertRaises(LookupError, codecs.lookup, encoding_name)
+
+        codecs.register(self.search_function)
+        self.addCleanup(codecs.unregister, self.search_function)
 
         for name in [
             encoding_name,
@@ -741,11 +747,6 @@ def test_codec_stream_writer(self):
 
 class CAPICodecErrors(unittest.TestCase):
 
-    def setUp(self):
-        self.enterContext(import_helper.isolated_modules())
-        self.enterContext(import_helper.CleanImport('codecs'))
-        self.codecs = import_helper.import_module('codecs')
-
     def test_codec_register_error(self):
         self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
 
@@ -754,24 +755,25 @@ def error_handler(exc):
 
         error_handler = mock.Mock(wraps=error_handler)
         _testcapi.codec_register_error('custom', error_handler)
+        # self.addCleanup(codecs.unregister_error, 'custom')
 
-        self.assertRaises(UnicodeEncodeError, self.codecs.encode,
+        self.assertRaises(UnicodeEncodeError, codecs.encode,
                           '\xff', 'ascii', errors='custom')
         error_handler.assert_called_once()
         error_handler.reset_mock()
 
-        self.assertRaises(UnicodeDecodeError, self.codecs.decode,
+        self.assertRaises(UnicodeDecodeError, codecs.decode,
                           b'\xff', 'ascii', errors='custom')
         error_handler.assert_called_once()
 
     def test_codec_lookup_error(self):
         codec_lookup_error = _testcapi.codec_lookup_error
-        self.assertIs(codec_lookup_error(NULL), self.codecs.strict_errors)
-        self.assertIs(codec_lookup_error('strict'), self.codecs.strict_errors)
-        self.assertIs(codec_lookup_error('ignore'), self.codecs.ignore_errors)
-        self.assertIs(codec_lookup_error('replace'), self.codecs.replace_errors)
-        self.assertIs(codec_lookup_error('xmlcharrefreplace'), self.codecs.xmlcharrefreplace_errors)
-        self.assertIs(codec_lookup_error('namereplace'), self.codecs.namereplace_errors)
+        self.assertIs(codec_lookup_error(NULL), codecs.strict_errors)
+        self.assertIs(codec_lookup_error('strict'), codecs.strict_errors)
+        self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
+        self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
+        self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
+        self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
         self.assertRaises(LookupError, codec_lookup_error, 'custom')
 
     def test_codec_error_handlers(self):
From 6a36eb08466e2a610dbbc2621c5c81482fd847cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 27 Sep 2024 13:30:12 +0200
Subject: [PATCH 10/20] small hack to make the test suite correct
---
 Lib/test/test_capi/test_codecs.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 4865f4384422ac..60b48ae099dcd3 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -7,6 +7,7 @@
 import unittest
 import unittest.mock as mock
 import _testcapi
+from getopt import error
 from test.support import import_helper
 
 _testlimitedcapi = import_helper.import_module('_testlimitedcapi')
@@ -748,14 +749,19 @@ def test_codec_stream_writer(self):
 class CAPICodecErrors(unittest.TestCase):
 
     def test_codec_register_error(self):
-        self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
+        try:
+            error_handler = _testcapi.codec_lookup_error('custom')
+        except LookupError:
+            error_handler = None
 
-        def error_handler(exc):
-            raise exc
+        if error_handler is None:
+            def custom_error_handler(exc):
+                raise exc
 
-        error_handler = mock.Mock(wraps=error_handler)
-        _testcapi.codec_register_error('custom', error_handler)
-        # self.addCleanup(codecs.unregister_error, 'custom')
+            error_handler = mock.Mock(wraps=custom_error_handler)
+            _testcapi.codec_register_error('custom', error_handler)
+        else:
+            self.assertIsInstance(error_handler, mock.Mock)
 
         self.assertRaises(UnicodeEncodeError, codecs.encode,
                           '\xff', 'ascii', errors='custom')
@@ -765,6 +771,7 @@ def error_handler(exc):
         self.assertRaises(UnicodeDecodeError, codecs.decode,
                           b'\xff', 'ascii', errors='custom')
         error_handler.assert_called_once()
+        error_handler.reset_mock()
 
     def test_codec_lookup_error(self):
         codec_lookup_error = _testcapi.codec_lookup_error
@@ -774,7 +781,7 @@ def test_codec_lookup_error(self):
         self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
         self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
         self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
-        self.assertRaises(LookupError, codec_lookup_error, 'custom')
+        self.assertRaises(LookupError, codec_lookup_error, 'unknown')
 
     def test_codec_error_handlers(self):
         exceptions = [
From 145b285f6f6200eda8fa90155a2abb4846e2a769 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 28 Sep 2024 18:29:06 +0200
Subject: [PATCH 11/20] remove un-necessary imports
---
 Lib/test/test_capi/test_codecs.py | 2 --
 1 file changed, 2 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 60b48ae099dcd3..29642203b44878 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -1,13 +1,11 @@
 import codecs
 import contextlib
 import io
-import os
 import re
 import sys
 import unittest
 import unittest.mock as mock
 import _testcapi
-from getopt import error
 from test.support import import_helper
 
 _testlimitedcapi = import_helper.import_module('_testlimitedcapi')
From 7be1f555f9f9483525d79a7596c4c30cd9e3a6a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 09:58:54 +0200
Subject: [PATCH 12/20] use `_codecs._unregister_error` to cleanup test state
---
 Lib/test/test_capi/test_codecs.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 29642203b44878..d9e2524a63fa88 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -747,19 +747,17 @@ def test_codec_stream_writer(self):
 class CAPICodecErrors(unittest.TestCase):
 
     def test_codec_register_error(self):
-        try:
-            error_handler = _testcapi.codec_lookup_error('custom')
-        except LookupError:
-            error_handler = None
+        # for cleaning up between tests
+        from _codecs import _unregister_error as _codecs_unregister_error
 
-        if error_handler is None:
-            def custom_error_handler(exc):
-                raise exc
+        self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
 
-            error_handler = mock.Mock(wraps=custom_error_handler)
-            _testcapi.codec_register_error('custom', error_handler)
-        else:
-            self.assertIsInstance(error_handler, mock.Mock)
+        def custom_error_handler(exc):
+            raise exc
+
+        error_handler = mock.Mock(wraps=custom_error_handler)
+        _testcapi.codec_register_error('custom', error_handler)
+        self.addCleanup(_codecs_unregister_error, 'custom')
 
         self.assertRaises(UnicodeEncodeError, codecs.encode,
                           '\xff', 'ascii', errors='custom')
@@ -769,7 +767,6 @@ def custom_error_handler(exc):
         self.assertRaises(UnicodeDecodeError, codecs.decode,
                           b'\xff', 'ascii', errors='custom')
         error_handler.assert_called_once()
-        error_handler.reset_mock()
 
     def test_codec_lookup_error(self):
         codec_lookup_error = _testcapi.codec_lookup_error
From f72be5c600824765297441319b8faa523036143a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:08:28 +0200
Subject: [PATCH 13/20] indicate some semantics for NULL case being tested
---
 Modules/_testcapi/codec.c | 39 ++++++++++++++++-----------------------
 Modules/_testcapi/util.h  | 10 ++++++++++
 2 files changed, 26 insertions(+), 23 deletions(-)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 6aa19c2055d6b6..2e4712bd55316f 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -1,12 +1,5 @@
 #include "parts.h"
-
-/*
- * The Codecs C API assume that 'encoding' is not NULL, lest
- * it uses PyErr_BadArgument() to set a TypeError exception.
- *
- * In this file, we allow to call the functions using None
- * as NULL to explicitly check this behaviour.
- */
+#include "util.h"
 
 // === Codecs registration and un-registration ================================
 
@@ -44,8 +37,8 @@ static PyObject *
 codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
 {
     PyObject *input;
-    const char *encoding;   // should not be NULL (see top-file comment)
-    const char *errors;     // can be NULL
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+    const char *errors;                     // can be NULL
     if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
         return NULL;
     }
@@ -56,8 +49,8 @@ static PyObject *
 codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
 {
     PyObject *input;
-    const char *encoding;   // should not be NULL (see top-file comment)
-    const char *errors;     // can be NULL
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+    const char *errors;                     // can be NULL
     if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
         return NULL;
     }
@@ -67,7 +60,7 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL (see top-file comment)
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
     if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
@@ -77,7 +70,7 @@ codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL (see top-file comment)
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
     if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
@@ -87,8 +80,8 @@ codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL (see top-file comment)
-    const char *errors;     // can be NULL
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+    const char *errors;                     // can be NULL
     if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
         return NULL;
     }
@@ -98,8 +91,8 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL (see top-file comment)
-    const char *errors;     // can be NULL
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+    const char *errors;                     // can be NULL
     if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
         return NULL;
     }
@@ -109,9 +102,9 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL (see top-file comment)
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
     PyObject *stream;
-    const char *errors;     // can be NULL
+    const char *errors;                     // can be NULL
     if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
         return NULL;
     }
@@ -121,9 +114,9 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL (see top-file comment)
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
     PyObject *stream;
-    const char *errors;     // can be NULL
+    const char *errors;                     // can be NULL
     if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
         return NULL;
     }
@@ -149,7 +142,7 @@ codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_lookup_error(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // can be NULL
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
     if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
diff --git a/Modules/_testcapi/util.h b/Modules/_testcapi/util.h
index f26d7656a10138..042e522542eddb 100644
--- a/Modules/_testcapi/util.h
+++ b/Modules/_testcapi/util.h
@@ -31,3 +31,13 @@ static const char uninitialized[] = "uninitialized";
 #define UNINITIALIZED_SIZE ((Py_ssize_t)236892191)
 /* Marker to check that integer value was set. */
 #define UNINITIALIZED_INT (63256717)
+/*
+ * Marker to indicate that a NULL parameter would not be allowed
+ * at runtime but that the test interface will check that it is
+ * indeed the case.
+ *
+ * Use this macro only if passing NULL to the C API would raise
+ * a catchable exception (and not a fatal exception that would
+ * crash the interpreter).
+ */
+ #define NULL_WOULD_RAISE(NAME)    NAME
From 4d02c6ceedc344e538ebbf948838c22adad2abdc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:08:36 +0200
Subject: [PATCH 14/20] revert a cosmetic change
---
 Modules/_testcapi/codec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 2e4712bd55316f..2bcfcd64d01794 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -222,9 +222,9 @@ static PyMethodDef test_methods[] = {
 };
 
 int
-_PyTestCapi_Init_Codec(PyObject *module)
+_PyTestCapi_Init_Codec(PyObject *m)
 {
-    if (PyModule_AddFunctions(module, test_methods) < 0) {
+    if (PyModule_AddFunctions(m, test_methods) < 0) {
         return -1;
     }
     return 0;
From 0f26ca7ee4179d4688bbe64f55348a1fef1607b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:23:36 +0200
Subject: [PATCH 15/20] Move `PyCodec_NameReplaceErrors` test to the
 `_testlimitedcapi` module
---
 Lib/test/test_capi/test_codecs.py |  4 ++--
 Modules/Setup.stdlib.in           |  2 +-
 Modules/_testcapi/codec.c         | 13 +------------
 Modules/_testlimitedcapi.c        |  3 +++
 Modules/_testlimitedcapi/codec.c  | 29 +++++++++++++++++++++++++++++
 Modules/_testlimitedcapi/parts.h  |  1 +
 6 files changed, 37 insertions(+), 15 deletions(-)
 create mode 100644 Modules/_testlimitedcapi/codec.c
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index d9e2524a63fa88..91d36b65afba46 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -797,11 +797,11 @@ def test_codec_error_handlers(self):
             _testcapi.codec_ignore_errors,
             _testcapi.codec_replace_errors,
             _testcapi.codec_xmlcharrefreplace_errors,
-            _testcapi.codec_namereplace_errors,
+            _testlimitedcapi.codec_namereplace_errors,
         ]:
             for exc in exceptions:
                 with self.subTest(handler=handler, exc=exc):
-                    handler(exc)
+                    self.assertIsInstance(handler(exc), tuple)
 
 
 if __name__ == "__main__":
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index 9aa398a80efa1b..52c0f883d383db 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -163,7 +163,7 @@
 @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c
 @MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c
 @MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/run.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c _testcapi/bytes.c _testcapi/object.c _testcapi/monitoring.c  _testcapi/config.c
-@MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/eval.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/tuple.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c
+@MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/codec.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/eval.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/tuple.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c
 @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c
 @MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c
 
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 2bcfcd64d01794..ba614055915235 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -184,15 +184,6 @@ codec_backslashreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
     return PyCodec_BackslashReplaceErrors(exc);
 }
 
-#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000
-static PyObject *
-codec_namereplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
-{
-    assert(exc != NULL);
-    return PyCodec_NameReplaceErrors(exc);
-}
-#endif
-
 static PyMethodDef test_methods[] = {
     /* codecs registration */
     {"codec_register", codec_register, METH_O},
@@ -215,9 +206,7 @@ static PyMethodDef test_methods[] = {
     {"codec_replace_errors", codec_replace_errors, METH_O},
     {"codec_xmlcharrefreplace_errors", codec_xmlcharrefreplace_errors, METH_O},
     {"codec_backslashreplace_errors", codec_backslashreplace_errors, METH_O},
-#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000
-    {"codec_namereplace_errors", codec_namereplace_errors, METH_O},
-#endif
+    // PyCodec_NameReplaceErrors() is tested in _testlimitedcapi/codec.c
     {NULL, NULL, 0, NULL},
 };
 
diff --git a/Modules/_testlimitedcapi.c b/Modules/_testlimitedcapi.c
index e74cbfe19871bf..ba83a23117b2a5 100644
--- a/Modules/_testlimitedcapi.c
+++ b/Modules/_testlimitedcapi.c
@@ -38,6 +38,9 @@ PyInit__testlimitedcapi(void)
     if (_PyTestLimitedCAPI_Init_Bytes(mod) < 0) {
         return NULL;
     }
+    if (_PyTestLimitedCAPI_Init_Codec(mod) < 0) {
+        return NULL;
+    }
     if (_PyTestLimitedCAPI_Init_Complex(mod) < 0) {
         return NULL;
     }
diff --git a/Modules/_testlimitedcapi/codec.c b/Modules/_testlimitedcapi/codec.c
new file mode 100644
index 00000000000000..fdc18eedc2d288
--- /dev/null
+++ b/Modules/_testlimitedcapi/codec.c
@@ -0,0 +1,29 @@
+#include "pyconfig.h"   // Py_GIL_DISABLED
+
+// Need limited C API version 3.5 for PyCodec_NameReplaceErrors()
+#if !defined(Py_GIL_DISABLED) && !defined(Py_LIMITED_API)
+#  define Py_LIMITED_API 0x03050000
+#endif
+
+#include "parts.h"
+
+static PyObject *
+codec_namereplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+    assert(exc != NULL);
+    return PyCodec_NameReplaceErrors(exc);
+}
+
+static PyMethodDef test_methods[] = {
+    {"codec_namereplace_errors", codec_namereplace_errors, METH_O},
+    {NULL},
+};
+
+int
+_PyTestLimitedCAPI_Init_Codec(PyObject *module)
+{
+    if (PyModule_AddFunctions(module, test_methods) < 0) {
+        return -1;
+    }
+    return 0;
+}
diff --git a/Modules/_testlimitedcapi/parts.h b/Modules/_testlimitedcapi/parts.h
index 12b890853803f4..4107b150c5b4e0 100644
--- a/Modules/_testlimitedcapi/parts.h
+++ b/Modules/_testlimitedcapi/parts.h
@@ -25,6 +25,7 @@
 int _PyTestLimitedCAPI_Init_Abstract(PyObject *module);
 int _PyTestLimitedCAPI_Init_ByteArray(PyObject *module);
 int _PyTestLimitedCAPI_Init_Bytes(PyObject *module);
+int _PyTestLimitedCAPI_Init_Codec(PyObject *module);
 int _PyTestLimitedCAPI_Init_Complex(PyObject *module);
 int _PyTestLimitedCAPI_Init_Dict(PyObject *module);
 int _PyTestLimitedCAPI_Init_Eval(PyObject *module);
From 1399779a3d9aee5ebf548bbcd040f8d5f70e1ac9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:32:52 +0200
Subject: [PATCH 16/20] add comment for why we do not test
 `_PyCodec_UnregisterError`
---
 Lib/test/test_capi/test_codecs.py | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 91d36b65afba46..be64871c67d2ba 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -768,6 +768,10 @@ def custom_error_handler(exc):
                           b'\xff', 'ascii', errors='custom')
         error_handler.assert_called_once()
 
+    # _codecs._unregister_error directly delegates to the internal C
+    # function so a Python-level function test is sufficient (it is
+    # tested in test_codeccallbacks).
+
     def test_codec_lookup_error(self):
         codec_lookup_error = _testcapi.codec_lookup_error
         self.assertIs(codec_lookup_error(NULL), codecs.strict_errors)
From 914151e1a9206fbe39ba341506d9fbe819335a06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:34:22 +0200
Subject: [PATCH 17/20] update a comment
---
 Modules/_testcapi/codec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index ba614055915235..dee093d35ea070 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -24,7 +24,7 @@ codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function)
 static PyObject *
 codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL (see top-file comment)
+    const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
     if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
From 8dd7e8d5e8767de4a33b82fe105fc318b85fe079 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:35:49 +0200
Subject: [PATCH 18/20] revert one cosmetic change
---
 Modules/_testcapi/codec.c | 1 +
 1 file changed, 1 insertion(+)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index dee093d35ea070..e27e64e066c458 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -216,5 +216,6 @@ _PyTestCapi_Init_Codec(PyObject *m)
     if (PyModule_AddFunctions(m, test_methods) < 0) {
         return -1;
     }
+
     return 0;
 }
From 1e6a5ce9f32ea34a9bc515f2c2afcb138b89d912 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:52:28 +0200
Subject: [PATCH 19/20] Fix Windows compilation
---
 PCbuild/_testlimitedcapi.vcxproj         | 1 +
 PCbuild/_testlimitedcapi.vcxproj.filters | 1 +
 2 files changed, 2 insertions(+)
diff --git a/PCbuild/_testlimitedcapi.vcxproj b/PCbuild/_testlimitedcapi.vcxproj
index a1409ecf043d2d..846e027e10c7fa 100644
--- a/PCbuild/_testlimitedcapi.vcxproj
+++ b/PCbuild/_testlimitedcapi.vcxproj
@@ -97,6 +97,7 @@
     
     
     
+    
     
     
     
diff --git a/PCbuild/_testlimitedcapi.vcxproj.filters b/PCbuild/_testlimitedcapi.vcxproj.filters
index e27e3171e1e6aa..57be2e2fc5b950 100644
--- a/PCbuild/_testlimitedcapi.vcxproj.filters
+++ b/PCbuild/_testlimitedcapi.vcxproj.filters
@@ -12,6 +12,7 @@
     
     
     
+    
     
     
     
From 2ba5f031ed47eea1058f700e15a52046eb5cf37c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 16:58:29 +0200
Subject: [PATCH 20/20] address Victor's review
---
 Lib/test/test_capi/test_codecs.py | 39 ++++++++++++++++---------------
 1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index be64871c67d2ba..85491a89947318 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -528,7 +528,7 @@ class CAPICodecs(unittest.TestCase):
     def setUp(self):
         # Encoding names are normalized internally by converting them
         # to lowercase and their hyphens are replaced by underscores.
-        self.encoding_name = f'codec_reversed_{id(self)}'
+        self.encoding_name = 'test.test_capi.test_codecs.codec_reversed'
         # Make sure that our custom codec is not already registered (that
         # way we know whether we correctly unregistered the custom codec
         # after a test or not).
@@ -658,12 +658,13 @@ def test_codec_decode(self):
         self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
         self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
 
-        # _codecs.decode() only reports unknown errors policy when they are
-        # used; this is different from PyUnicode_Decode() which checks that
-        # both the encoding and the errors policy are recognized before even
-        # attempting to call the decoder.
-        self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '')
-        self.assertEqual(decode(b'a', 'utf-8', 'unknown-errors-policy'), 'a')
+        # _codecs.decode() only reports an unknown error handling name when
+        # the corresponding error handling function is used; this difers
+        # from PyUnicode_Decode() which checks that both the encoding and
+        # the error handling name are recognized before even attempting to
+        # call the decoder.
+        self.assertEqual(decode(b'', 'utf-8', 'unknown-error-handler'), '')
+        self.assertEqual(decode(b'a', 'utf-8', 'unknown-error-handler'), 'a')
 
         self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict')
         with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
@@ -695,9 +696,9 @@ def test_codec_incremental_encoder(self):
         with self.use_custom_encoder():
             encoding = self.encoding_name
 
-            for policy in ['strict', NULL]:
-                with self.subTest(policy=policy):
-                    encoder = codec_incremental_encoder(encoding, policy)
+            for errors in ['strict', NULL]:
+                with self.subTest(errors):
+                    encoder = codec_incremental_encoder(encoding, errors)
                     self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
 
             with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
@@ -709,9 +710,9 @@ def test_codec_incremental_decoder(self):
         with self.use_custom_encoder():
             encoding = self.encoding_name
 
-            for policy in ['strict', NULL]:
-                with self.subTest(policy=policy):
-                    decoder = codec_incremental_decoder(encoding, policy)
+            for errors in ['strict', NULL]:
+                with self.subTest(errors):
+                    decoder = codec_incremental_decoder(encoding, errors)
                     self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
 
             with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
@@ -722,9 +723,9 @@ def test_codec_stream_reader(self):
 
         with self.use_custom_encoder():
             encoding, stream = self.encoding_name, io.StringIO()
-            for policy in ['strict', NULL]:
-                with self.subTest(policy=policy):
-                    writer = codec_stream_reader(encoding, stream, policy)
+            for errors in ['strict', NULL]:
+                with self.subTest(errors):
+                    writer = codec_stream_reader(encoding, stream, errors)
                     self.assertIsInstance(writer, self.codec_info.streamreader)
 
             with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
@@ -735,9 +736,9 @@ def test_codec_stream_writer(self):
 
         with self.use_custom_encoder():
             encoding, stream = self.encoding_name, io.StringIO()
-            for policy in ['strict', NULL]:
-                with self.subTest(policy=policy):
-                    writer = codec_stream_writer(encoding, stream, policy)
+            for errors in ['strict', NULL]:
+                with self.subTest(errors):
+                    writer = codec_stream_writer(encoding, stream, errors)
                     self.assertIsInstance(writer, self.codec_info.streamwriter)
 
             with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):