Address feedback; test more characters and refactor _testcapi functions

lysnikolaou · vstinner · commit 25f1cd808c8e · 2025-09-25T18:00:02.000+02:00
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
@@ -1765,6 +1765,7 @@ def test_tolower(self):
 
         # Test unicode character
         self.assertEqual(unicode_tolower("Č"), "č")
+        self.assertEqual(unicode_tolower("Σ"), "σ")
 
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
@@ -1778,6 +1779,8 @@ def test_toupper(self):
 
         # Test unicode character
         self.assertEqual(unicode_toupper("č"), "Č")
+        self.assertEqual(unicode_toupper("ß"), "SS")
+        self.assertEqual(unicode_toupper("ΐ"), "Ϊ́")
 
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
@@ -1788,6 +1791,8 @@ def test_totitle(self):
 
         # Test unicode character
         self.assertEqual(unicode_totitle("ł"), "Ł")
+        self.assertEqual(unicode_totitle("ß"), "Ss")
+        self.assertEqual(unicode_totitle("ΐ"), "Ϊ́")
 
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
@@ -1798,6 +1803,7 @@ def test_tofolded(self):
 
         # Test unicode character
         self.assertEqual(unicode_tofolded("Ł"), "ł")
+        self.assertEqual(unicode_tofolded("Σ"), "σ")
 
         # Test case-ignorable character
         self.assertEqual(unicode_tofolded("👍"), "👍")
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
@@ -220,109 +220,61 @@ unicode_copycharacters(PyObject *self, PyObject *args)
     return Py_BuildValue("(Nn)", to_copy, copied);
 }
 
-/* Test PyUnicode_ToLower() */
 static PyObject *
-unicode_tolower(PyObject *self, PyObject *arg)
+unicode_case_operation(PyObject *str, int (*function)(Py_UCS4, Py_UCS4 *, int), const char *name)
 {
-    if (PyUnicode_GET_LENGTH(arg) != 1) {
-        PyErr_SetString(PyExc_ValueError, "unicode_tolower only accepts 1-character strings");
+    if (PyUnicode_GET_LENGTH(str) != 1) {
+        PyErr_Format(PyExc_ValueError, "%s only accepts 1-character strings", name);
         return NULL;
     }
 
-    Py_UCS4 c = PyUnicode_READ_CHAR(arg, 0);
+    Py_UCS4 c = PyUnicode_READ_CHAR(str, 0);
 
-    Py_UCS4 lower[3];
-    int chars = PyUnicode_ToLower(c, lower, Py_ARRAY_LENGTH(lower));
-    assert(chars >= 1);
+    Py_UCS4 buf[3];
+    int chars = function(c, buf, Py_ARRAY_LENGTH(buf));
+    if (chars <= 0) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
 
     PyUnicodeWriter *writer = PyUnicodeWriter_Create(1);
     if (writer == NULL) {
         return NULL;
     }
-    if (PyUnicodeWriter_WriteUCS4(writer, lower, chars) < 0) {
+    if (PyUnicodeWriter_WriteUCS4(writer, buf, chars) < 0) {
         PyUnicodeWriter_Discard(writer);
         return NULL;
     }
     return PyUnicodeWriter_Finish(writer);
 }
 
+/* Test PyUnicode_ToLower() */
+static PyObject *
+unicode_tolower(PyObject *self, PyObject *arg)
+{
+    return unicode_case_operation(arg, PyUnicode_ToLower, "unicode_tolower");
+}
+
 /* Test PyUnicode_ToUpper() */
 static PyObject *
 unicode_toupper(PyObject *self, PyObject *arg)
 {
-    if (PyUnicode_GET_LENGTH(arg) != 1) {
-        PyErr_SetString(PyExc_ValueError, "unicode_toupper only accepts 1-character strings");
-        return NULL;
-    }
-
-    Py_UCS4 c = PyUnicode_READ_CHAR(arg, 0);
-
-    Py_UCS4 upper[3];
-    int chars = PyUnicode_ToUpper(c, upper, Py_ARRAY_LENGTH(upper));
-    assert(chars >= 1);
-
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(1);
-    if (writer == NULL) {
-        return NULL;
-    }
-    if (PyUnicodeWriter_WriteUCS4(writer, upper, chars) < 0) {
-        PyUnicodeWriter_Discard(writer);
-        return NULL;
-    }
-    return PyUnicodeWriter_Finish(writer);
+    return unicode_case_operation(arg, PyUnicode_ToUpper, "unicode_toupper");
 }
 
 
 /* Test PyUnicode_ToLower() */
 static PyObject *
 unicode_totitle(PyObject *self, PyObject *arg)
 {
-    if (PyUnicode_GET_LENGTH(arg) != 1) {
-        PyErr_SetString(PyExc_ValueError, "unicode_totitle only accepts 1-character strings");
-        return NULL;
-    }
-
-    Py_UCS4 c = PyUnicode_READ_CHAR(arg, 0);
-
-    Py_UCS4 title[3];
-    int chars = PyUnicode_ToTitle(c, title, Py_ARRAY_LENGTH(title));
-    assert(chars >= 1);
-
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(1);
-    if (writer == NULL) {
-        return NULL;
-    }
-    if (PyUnicodeWriter_WriteUCS4(writer, title, chars) < 0) {
-        PyUnicodeWriter_Discard(writer);
-        return NULL;
-    }
-    return PyUnicodeWriter_Finish(writer);
+    return unicode_case_operation(arg, PyUnicode_ToTitle, "unicode_totitle");
 }
 
 /* Test PyUnicode_ToLower() */
 static PyObject *
 unicode_tofolded(PyObject *self, PyObject *arg)
 {
-    if (PyUnicode_GET_LENGTH(arg) != 1) {
-        PyErr_SetString(PyExc_ValueError, "unicode_tofolded only accepts 1-character strings");
-        return NULL;
-    }
-
-    Py_UCS4 c = PyUnicode_READ_CHAR(arg, 0);
-
-    Py_UCS4 folded[3];
-    int chars = PyUnicode_ToFolded(c, folded, Py_ARRAY_LENGTH(folded));
-    assert(chars >= 1);
-
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(1);
-    if (writer == NULL) {
-        return NULL;
-    }
-    if (PyUnicodeWriter_WriteUCS4(writer, folded, chars) < 0) {
-        PyUnicodeWriter_Discard(writer);
-        return NULL;
-    }
-    return PyUnicodeWriter_Finish(writer);
+    return unicode_case_operation(arg, PyUnicode_ToFolded, "unicode_tofolded");
 }