python · vstinner · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
@@ -307,6 +307,75 @@ These APIs can be used for fast direct character conversions:
    possible.  This function does not raise exceptions.
 
 
+.. c:function:: Py_ssize_t PyUCS4_ToLower(const Py_UCS4 *str, Py_ssize_t str_size, Py_UCS4 *buffer, Py_ssize_t buf_size)
+
+   Convert *str* characters to lower case, store result in *buffer*, which should be
+   able to hold as many characters needed for *str* to be lower cased, and
+   return the number of characters stored. If at some point a buffer overflow
+   is detected, a :exc:`ValueError` is raised and ``-1`` is returned.
+
+   *str_size*, *buf_size* and the result are the number of UCS-4 characters.
+
+   In Unicode 16.0, any character can be lowercased into a buffer of *buf_size* ``2``.
+   See also :c:macro:`PyUCS4_CASE_CONVERSION_BUFFER_SIZE`.
+
+   .. versionadded:: next
+
+
+.. c:function:: Py_ssize_t PyUCS4_ToUpper(const Py_UCS4 *str, Py_ssize_t str_size, Py_UCS4 *buffer, Py_ssize_t buf_size)
+
+   Convert *str* characters to upper case, store result in *buffer*, which should be
+   able to hold as many characters needed for *str* to be upper cased, and
+   return the number of characters stored. If at some point a buffer overflow
+   is detected, a :exc:`ValueError` is raised and ``-1`` is returned.
+
+   *str_size*, *buf_size* and the result are number of UCS-4 characters.
+
+   In Unicode 16.0, any character can be uppercased into a buffer of *buf_size* ``3``.
+   See also :c:macro:`PyUCS4_CASE_CONVERSION_BUFFER_SIZE`.
+
+   .. versionadded:: next
+
+
+.. c:function:: Py_ssize_t PyUCS4_ToTitle(const Py_UCS4 *str, Py_ssize_t str_size, Py_UCS4 *buffer, Py_ssize_t buf_size)
+
+   Convert *str* characters to title case, store result in *buffer*, which should be
+   able to hold as many characters needed for *str* to be title cased, and
+   return the number of characters stored. If at some point a buffer overflow
+   is detected, a :exc:`ValueError` is raised and ``-1`` is returned.
+
+   *str_size*, *buf_size* and the result are number of UCS-4 characters.
+
+   In Unicode 16.0, any character can be titlecased into a buffer of *buf_size* ``3``.
+   See also :c:macro:`PyUCS4_CASE_CONVERSION_BUFFER_SIZE`.
+
+   .. versionadded:: next
+
+
+.. c:function:: Py_ssize_t PyUCS4_ToFolded(const Py_UCS4 *str, Py_ssize_t str_size, Py_UCS4 *buffer, Py_ssize_t buf_size)
+
+   Foldcase *str* characters, store result in *buffer*, which should be
+   able to hold as many characters needed for *str* to be foldcased, and
+   return the number of characters stored. If at some point a buffer overflow
+   is detected, a :exc:`ValueError` is raised and ``-1`` is returned.
+
+   *str_size*, *buf_size* and the result are number of UCS-4 characters.
+
+   In Unicode 16.0, any character can be foldcased into a buffer of *buf_size* ``3``.
+   See also :c:macro:`PyUCS4_CASE_CONVERSION_BUFFER_SIZE`.
+
+   .. versionadded:: next
+
+
+.. c:macro:: PyUCS4_CASE_CONVERSION_BUFFER_SIZE
+
+   The minimum buffer size needed for any call to :c:func:`PyUCS4_ToLower`,
+   :c:func:`PyUCS4_ToUpper`, :c:func:`PyUCS4_ToTitle`, or
+   :c:func:`PyUCS4_ToFolded`. That is, ``3`` for Unicode 16.0.
+
+.. versionadded:: next
+
+
 These APIs can be used to work with surrogates:
 
 .. c:function:: int Py_UNICODE_IS_SURROGATE(Py_UCS4 ch)

@@ -713,6 +713,12 @@ unicodedata
 * The Unicode database has been updated to Unicode 17.0.0.
 
 
+unicodedata
+-----------
+
+* The Unicode database has been updated to Unicode 17.0.0.
+
+
 wave
 ----
 

diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
@@ -733,6 +733,31 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
     Py_UCS4 ch       /* Unicode character */
     );
 
+PyAPI_FUNC(Py_ssize_t) PyUCS4_ToLower(
+    const Py_UCS4 *str,    /* Unicode string */
+    Py_ssize_t str_size,   /* Unicode string size (UCS-4 characters) */
+    Py_UCS4 *buf,          /* Output buffer */
+    Py_ssize_t buf_size);  /* Buffer size (UCS-4 characters) */
+
+PyAPI_FUNC(Py_ssize_t) PyUCS4_ToUpper(
+    const Py_UCS4 *str,    /* Unicode string */
+    Py_ssize_t str_size,   /* Unicode string size (UCS-4 characters) */
+    Py_UCS4 *buf,          /* Output buffer */
+    Py_ssize_t buf_size);  /* Buffer size (UCS-4 characters) */
+
+PyAPI_FUNC(Py_ssize_t) PyUCS4_ToTitle(
+    const Py_UCS4 *str,    /* Unicode string */
+    Py_ssize_t str_size,   /* Unicode string size (UCS-4 characters) */
+    Py_UCS4 *buf,          /* Output buffer */
+    Py_ssize_t buf_size);  /* Buffer size (UCS-4 characters) */
+
+PyAPI_FUNC(Py_ssize_t) PyUCS4_ToFolded(
+    const Py_UCS4 *str,    /* Unicode string */
+    Py_ssize_t str_size,   /* Unicode string size (UCS-4 characters) */
+    Py_UCS4 *buf,          /* Output buffer */
+    Py_ssize_t buf_size);  /* Buffer size (UCS-4 characters) */
+
+
 // Helper array used by Py_UNICODE_ISSPACE().
 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
 
@@ -767,6 +792,8 @@ static inline int Py_UNICODE_ISSPACE(Py_UCS4 ch) {
 
 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
 
+#define PyUCS4_CASE_CONVERSION_BUFFER_SIZE 3
+
 static inline int Py_UNICODE_ISALNUM(Py_UCS4 ch) {
    return (Py_UNICODE_ISALPHA(ch)
            || Py_UNICODE_ISDECIMAL(ch)

diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
@@ -15,10 +15,6 @@ extern "C" {
 
 extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
 extern int _PyUnicode_IsXidContinue(Py_UCS4 ch);
-extern int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res);
 extern int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch);
 extern int _PyUnicode_IsCased(Py_UCS4 ch);
 

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
@@ -1,5 +1,6 @@
 import unittest
 import sys
+import string
 from test import support
 from test.support import threading_helper
 
@@ -1753,6 +1754,66 @@ def test_GET_CACHED_HASH(self):
         # impl detail: ASCII string hashes are equal to bytes ones
         self.assertEqual(unicode_GET_CACHED_HASH(obj), hash(content_bytes))
 
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_tolower(self):
+        from _testcapi import unicode_tolower
+
+        self.assertEqual(unicode_tolower(string.ascii_uppercase),
+                         string.ascii_lowercase)
+
+        # Test unicode character
+        self.assertEqual(unicode_tolower("Č"), "č")
+        self.assertEqual(unicode_tolower("Σ"), "σ")
+        self.assertEqual(unicode_tolower("ABCΣ"), "abcσ")
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_toupper(self):
+        from _testcapi import unicode_toupper, unicode_toupper_buffer_too_small
+
+        self.assertEqual(unicode_toupper(string.ascii_lowercase),
+                         string.ascii_uppercase)
+
+        # Test unicode character
+        self.assertEqual(unicode_toupper("č"), "Č")
+        self.assertEqual(unicode_toupper("ß"), "SS")
+        self.assertEqual(unicode_toupper("ΐ"), "Ϊ́")
+        self.assertEqual(unicode_toupper("abcß"), "ABCSS")
+
+        # Test unicode character with smaller buffer
+        with self.assertRaisesRegex(ValueError, "output buffer is too small"):
+            unicode_toupper_buffer_too_small("ß")
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_totitle(self):
+        from _testcapi import unicode_totitle
+
+        self.assertEqual(unicode_totitle("t"), "T")
+
+        # Test unicode character
+        self.assertEqual(unicode_totitle("ł"), "Ł")
+        self.assertEqual(unicode_totitle("ß"), "Ss")
+        self.assertEqual(unicode_totitle("ΐ"), "Ϊ́")
+        self.assertEqual(unicode_totitle("abcß"), "ABCSs")
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_tofolded(self):
+        from _testcapi import unicode_tofolded
+
+        self.assertEqual(unicode_tofolded("T"), "t")
+
+        # Test unicode character
+        self.assertEqual(unicode_tofolded("Ł"), "ł")
+        self.assertEqual(unicode_tofolded("Σ"), "σ")
+        self.assertEqual(unicode_tofolded("abcΣ"), "abcσ")
+        self.assertEqual(unicode_tofolded("ABCσ"), "abcσ")
+
+        # Test case-ignorable character
+        self.assertEqual(unicode_tofolded("👍"), "👍")
+
 
 class PyUnicodeWriterTest(unittest.TestCase):
     def create_writer(self, size):

diff --git a/Misc/NEWS.d/next/C_API/2025-07-01-14-56-41.gh-issue-76535.9cwObj.rst b/Misc/NEWS.d/next/C_API/2025-07-01-14-56-41.gh-issue-76535.9cwObj.rst
@@ -0,0 +1 @@
+Make :c:func:`PyUCS4_ToLower`, :c:func:`PyUCS4_ToUpper`, :c:func:`PyUCS4_ToTitle` and :c:func:`PyUCS4_ToFolded` public.
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
@@ -220,6 +220,81 @@ unicode_copycharacters(PyObject *self, PyObject *args)
     return Py_BuildValue("(Nn)", to_copy, copied);
 }
 
+static PyObject *
+unicode_case_operation(PyObject *str,
+                       Py_ssize_t (*function)(const Py_UCS4*, Py_ssize_t, Py_UCS4 *, Py_ssize_t),
+                       int buf_too_small)
+{
+    if (!PyUnicode_Check(str)) {
+        PyErr_Format(PyExc_TypeError, "expected type str, got %T", str);
+        return NULL;
+    }
+    Py_ssize_t len = PyUnicode_GET_LENGTH(str);
+
+    Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(str);
+    if (ucs4 == NULL) {
+        return NULL;
+    }
+
+    Py_ssize_t buf_size;
+    if (!buf_too_small) {
+        buf_size = len * PyUCS4_CASE_CONVERSION_BUFFER_SIZE;
+    }
+    else {
+        buf_size = len * 1;
+    }
+    Py_UCS4 *buf = PyMem_Malloc(buf_size * sizeof(Py_UCS4));
+    if (buf == NULL) {
+        PyMem_Free(ucs4);
+        return NULL;
+    }
+
+    Py_ssize_t chars = function(ucs4, len, buf, buf_size);
+    PyMem_Free(ucs4);
+    if (chars < 0) {
+        return NULL;
+    }
+
+    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, chars);
+}
+
+/* Test PyUCS4_ToLower() */
+static PyObject *
+unicode_tolower(PyObject *self, PyObject *arg)
+{
+    return unicode_case_operation(arg, PyUCS4_ToLower, 0);
+}
+
+
+/* Test PyUCS4_ToUpper() */
+static PyObject *
+unicode_toupper(PyObject *self, PyObject *arg)
+{
+    return unicode_case_operation(arg, PyUCS4_ToUpper, 0);
+}
+
+/* Test PyUCS4_ToUpper() with a small buffer */
+static PyObject *
+unicode_toupper_buffer_too_small(PyObject *self, PyObject *arg)
+{
+    return unicode_case_operation(arg, PyUCS4_ToUpper, 1);
+}
+
+/* Test PyUCS4_ToLower() */
+static PyObject *
+unicode_totitle(PyObject *self, PyObject *arg)
+{
+    return unicode_case_operation(arg, PyUCS4_ToTitle, 0);
+}
+
+/* Test PyUCS4_ToLower() */
+static PyObject *
+unicode_tofolded(PyObject *self, PyObject *arg)
+{
+    return unicode_case_operation(arg, PyUCS4_ToFolded, 0);
+}
+
+
 static PyObject*
 unicode_GET_CACHED_HASH(PyObject *self, PyObject *arg)
 {
@@ -577,6 +652,11 @@ static PyMethodDef TestMethods[] = {
     {"unicode_asutf8",           unicode_asutf8,                 METH_VARARGS},
     {"unicode_copycharacters",   unicode_copycharacters,         METH_VARARGS},
     {"unicode_GET_CACHED_HASH",  unicode_GET_CACHED_HASH,        METH_O},
+    {"unicode_tolower",          unicode_tolower,                METH_O},
+    {"unicode_toupper",          unicode_toupper,                METH_O},
+    {"unicode_toupper_buffer_too_small",    unicode_toupper_buffer_too_small,   METH_O},
+    {"unicode_totitle",          unicode_totitle,                METH_O},
+    {"unicode_tofolded",         unicode_tofolded,               METH_O},
     {NULL},
 };
-Original file line number
+Diff line change
@@ Expand Up / @@ -713,6 +713,12 @@ unicodedata @@
     * The Unicode database has been updated to Unicode 17.0.0.
+    unicodedata
+    -----------
+    * The Unicode database has been updated to Unicode 17.0.0.
     wave
     ----
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Make :c:func:`PyUCS4_ToLower`, :c:func:`PyUCS4_ToUpper`, :c:func:`PyUCS4_ToTitle` and :c:func:`PyUCS4_ToFolded` public.