update tests to reflect user errors

picnixz · picnixz · commit 4f474ddc570b · 2024-09-25T18:13:07.000+02:00
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
@@ -1,6 +1,7 @@
 import codecs
 import contextlib
 import io
+import re
 import sys
 import unittest
 import unittest.mock as mock
@@ -10,6 +11,7 @@
 _testlimitedcapi = import_helper.import_module('_testlimitedcapi')
 
 NULL = None
+BAD_ARGUMENT = re.escape('bad argument type for built-in operation')
 
 
 class CAPIUnicodeTest(unittest.TestCase):
@@ -635,7 +637,8 @@ def test_codec_encode(self):
         self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]')
 
         self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
-        # CRASHES encode('a', NULL, 'strict')
+        with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+            encode('a', NULL, 'strict')
 
     def test_codec_decode(self):
         decode = _testcapi.codec_decode
@@ -650,46 +653,90 @@ def test_codec_decode(self):
         self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
         self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
 
-        # _codecs.decode only reports unknown errors policy when they are
-        # used (it has a fast path for empty bytes); this is different from
-        # PyUnicode_Decode which checks that both the encoding and the errors
-        # policy are recognized.
+        # _codecs.decode() only reports unknown errors policy when they are
+        # used; this is different from PyUnicode_Decode() which checks that
+        # both the encoding and the errors policy are recognized before even
+        # attempting to call the decoder.
         self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '')
+        self.assertEqual(decode(b'a', 'utf-8', 'unknown-errors-policy'), 'a')
 
         self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict')
-        # CRASHES decode(b, NULL, 'strict')
+        with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+            decode(b, NULL, 'strict')
 
     def test_codec_encoder(self):
+        codec_encoder = _testcapi.codec_encoder
+
         with self.use_custom_encoder():
-            encoder = _testcapi.codec_encoder(self.encoding_name)
+            encoder = codec_encoder(self.encoding_name)
             self.assertIs(encoder, self.codec_info.encode)
 
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_encoder(NULL)
+
     def test_codec_decoder(self):
+        codec_decoder = _testcapi.codec_decoder
+
         with self.use_custom_encoder():
-            decoder = _testcapi.codec_decoder(self.encoding_name)
+            decoder = codec_decoder(self.encoding_name)
             self.assertIs(decoder, self.codec_info.decode)
 
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_decoder(NULL)
+
     def test_codec_incremental_encoder(self):
+        codec_incremental_encoder = _testcapi.codec_incremental_encoder
+
         with self.use_custom_encoder():
-            encoder = _testcapi.codec_incremental_encoder(self.encoding_name, 'strict')
-            self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+            encoding = self.encoding_name
+
+            for policy in ['strict', NULL]:
+                with self.subTest(policy=policy):
+                    encoder = codec_incremental_encoder(encoding, policy)
+                    self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_incremental_encoder(NULL, 'strict')
 
     def test_codec_incremental_decoder(self):
+        codec_incremental_decoder = _testcapi.codec_incremental_decoder
+
         with self.use_custom_encoder():
-            decoder = _testcapi.codec_incremental_decoder(self.encoding_name, 'strict')
-            self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+            encoding = self.encoding_name
+
+            for policy in ['strict', NULL]:
+                with self.subTest(policy=policy):
+                    decoder = codec_incremental_decoder(encoding, policy)
+                    self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_incremental_decoder(NULL, 'strict')
 
     def test_codec_stream_reader(self):
+        codec_stream_reader = _testcapi.codec_stream_reader
+
         with self.use_custom_encoder():
             encoding, stream = self.encoding_name, io.StringIO()
-            reader = _testcapi.codec_stream_reader(encoding, stream, 'strict')
-            self.assertIsInstance(reader, self.codec_info.streamreader)
+            for policy in ['strict', NULL]:
+                with self.subTest(policy=policy):
+                    writer = codec_stream_reader(encoding, stream, policy)
+                    self.assertIsInstance(writer, self.codec_info.streamreader)
+
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_stream_reader(NULL, stream, 'strict')
 
     def test_codec_stream_writer(self):
+        codec_stream_writer = _testcapi.codec_stream_writer
+
         with self.use_custom_encoder():
             encoding, stream = self.encoding_name, io.StringIO()
-            writer = _testcapi.codec_stream_writer(encoding, stream, 'strict')
-            self.assertIsInstance(writer, self.codec_info.streamwriter)
+            for policy in ['strict', NULL]:
+                with self.subTest(policy=policy):
+                    writer = codec_stream_writer(encoding, stream, policy)
+                    self.assertIsInstance(writer, self.codec_info.streamwriter)
+
+            with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+                codec_stream_writer(NULL, stream, 'strict')
 
 
 class CAPICodecErrors(unittest.TestCase):
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
@@ -1,5 +1,13 @@
 #include "parts.h"
 
+/*
+ * The Codecs C API assume that 'encoding' is not NULL, lest
+ * it uses PyErr_BadArgument() to set a TypeError exception.
+ *
+ * In this file, we allow to call the functions using None
+ * as NULL to explicitly check this behaviour.
+ */
+
 // === Codecs registration and un-registration ================================
 
 static PyObject *
@@ -23,8 +31,8 @@ codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function)
 static PyObject *
 codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL
-    if (!PyArg_ParseTuple(args, "s", &encoding)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
     return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False;
@@ -36,9 +44,9 @@ static PyObject *
 codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
 {
     PyObject *input;
-    const char *encoding;   // should not be NULL
+    const char *encoding;   // should not be NULL (see top-file comment)
     const char *errors;     // can be NULL
-    if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
         return NULL;
     }
     return PyCodec_Encode(input, encoding, errors);
@@ -48,9 +56,9 @@ static PyObject *
 codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
 {
     PyObject *input;
-    const char *encoding;   // should not be NULL
+    const char *encoding;   // should not be NULL (see top-file comment)
     const char *errors;     // can be NULL
-    if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
+    if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
         return NULL;
     }
     return PyCodec_Decode(input, encoding, errors);
@@ -59,8 +67,8 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
-    if (!PyArg_ParseTuple(args, "s", &encoding)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
     return PyCodec_Encoder(encoding);
@@ -69,8 +77,8 @@ codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
-    if (!PyArg_ParseTuple(args, "s", &encoding)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    if (!PyArg_ParseTuple(args, "z", &encoding)) {
         return NULL;
     }
     return PyCodec_Decoder(encoding);
@@ -79,9 +87,9 @@ codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL
-    const char *errors;     // should not be NULL
-    if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
         return NULL;
     }
     return PyCodec_IncrementalEncoder(encoding, errors);
@@ -90,9 +98,9 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;   // should not be NULL
-    const char *errors;     // should not be NULL
-    if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
+    const char *encoding;   // should not be NULL (see top-file comment)
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
         return NULL;
     }
     return PyCodec_IncrementalDecoder(encoding, errors);
@@ -101,10 +109,10 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
+    const char *encoding;   // should not be NULL (see top-file comment)
     PyObject *stream;
-    const char *errors;    // should not be NULL
-    if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
         return NULL;
     }
     return PyCodec_StreamReader(encoding, stream, errors);
@@ -113,10 +121,10 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
+    const char *encoding;   // should not be NULL (see top-file comment)
     PyObject *stream;
-    const char *errors;    // should not be NULL
-    if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
+    const char *errors;     // can be NULL
+    if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
         return NULL;
     }
     return PyCodec_StreamWriter(encoding, stream, errors);
@@ -127,7 +135,7 @@ codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
 static PyObject *
 codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
 {
-    const char *encoding;  // should not be NULL
+    const char *encoding;   // must not be NULL
     PyObject *error;
     if (!PyArg_ParseTuple(args, "sO", &encoding, &error)) {
         return NULL;