Skip to content

Commit ce6a3a6

Browse files
lysnikolaouvstinner
authored andcommitted
Address feedback; Rename to PyUCS4_*, define macro and test small buffer case
1 parent 4163898 commit ce6a3a6

File tree

7 files changed

+51
-32
lines changed

7 files changed

+51
-32
lines changed

Doc/c-api/unicode.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ These APIs can be used for fast direct character conversions:
307307
possible. This function does not raise exceptions.
308308
309309
310-
.. c:function:: Py_ssize_t PyUnicode_ToLower(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
310+
.. c:function:: Py_ssize_t PyUCS4_ToLower(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
311311
312312
Convert *ch* to lower case, store result in *buffer*, which should be
313313
able to hold as many characters needed for *ch* to be lower cased
@@ -318,7 +318,7 @@ These APIs can be used for fast direct character conversions:
318318
.. versionadded:: next
319319
320320
321-
.. c:function:: Py_ssize_t PyUnicode_ToUpper(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
321+
.. c:function:: Py_ssize_t PyUCS4_ToUpper(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
322322
323323
Convert *ch* to upper case, store result in *buffer*, which should be
324324
able to hold as many characters needed for *ch* to be upper cased
@@ -329,7 +329,7 @@ These APIs can be used for fast direct character conversions:
329329
.. versionadded:: next
330330
331331
332-
.. c:function:: Py_ssize_t PyUnicode_ToTitle(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
332+
.. c:function:: Py_ssize_t PyUCS4_ToTitle(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
333333
334334
Convert *ch* to title case, store result in *buffer*, which should be
335335
able to hold as many characters needed for *ch* to be title cased
@@ -340,7 +340,7 @@ These APIs can be used for fast direct character conversions:
340340
.. versionadded:: next
341341
342342
343-
.. c:function:: Py_ssize_t PyUnicode_ToFolded(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
343+
.. c:function:: Py_ssize_t PyUCS4_ToFolded(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
344344
345345
Foldcase *ch*, store result in *buffer*, which should be
346346
able to hold as many characters needed for *ch* to be foldcased

Include/cpython/unicodeobject.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -733,25 +733,25 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
733733
Py_UCS4 ch /* Unicode character */
734734
);
735735

736-
PyAPI_FUNC(Py_ssize_t) PyUnicode_ToLower(
736+
PyAPI_FUNC(Py_ssize_t) PyUCS4_ToLower(
737737
Py_UCS4 ch, /* Unicode character */
738738
Py_UCS4 *res, /* Output buffer */
739739
Py_ssize_t size /* Buffer size */
740740
);
741741

742-
PyAPI_FUNC(Py_ssize_t) PyUnicode_ToUpper(
742+
PyAPI_FUNC(Py_ssize_t) PyUCS4_ToUpper(
743743
Py_UCS4 ch, /* Unicode character */
744744
Py_UCS4 *res, /* Output buffer */
745745
Py_ssize_t size /* Buffer size */
746746
);
747747

748-
PyAPI_FUNC(Py_ssize_t) PyUnicode_ToTitle(
748+
PyAPI_FUNC(Py_ssize_t) PyUCS4_ToTitle(
749749
Py_UCS4 ch, /* Unicode character */
750750
Py_UCS4 *res, /* Output buffer */
751751
Py_ssize_t size /* Buffer size */
752752
);
753753

754-
PyAPI_FUNC(Py_ssize_t) PyUnicode_ToFolded(
754+
PyAPI_FUNC(Py_ssize_t) PyUCS4_ToFolded(
755755
Py_UCS4 ch, /* Unicode character */
756756
Py_UCS4 *res, /* Output buffer */
757757
Py_ssize_t size /* Buffer size */
@@ -792,6 +792,8 @@ static inline int Py_UNICODE_ISSPACE(Py_UCS4 ch) {
792792

793793
#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
794794

795+
#define PyUCS4_CASE_CONVERSION_BUFFER_SIZE 3
796+
795797
static inline int Py_UNICODE_ISALNUM(Py_UCS4 ch) {
796798
return (Py_UNICODE_ISALPHA(ch)
797799
|| Py_UNICODE_ISDECIMAL(ch)

Lib/test/test_capi/test_unicode.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1771,7 +1771,7 @@ def test_tolower(self):
17711771
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
17721772
def test_toupper(self):
17731773
import string
1774-
from _testcapi import unicode_toupper
1774+
from _testcapi import unicode_toupper, unicode_toupper_buffer_too_small
17751775

17761776
for i, c in enumerate(string.ascii_lowercase):
17771777
with self.subTest(c):
@@ -1782,6 +1782,10 @@ def test_toupper(self):
17821782
self.assertEqual(unicode_toupper("ß"), "SS")
17831783
self.assertEqual(unicode_toupper("ΐ"), "Ϊ́")
17841784

1785+
# Test unicode character with smaller buffer
1786+
with self.assertRaisesRegex(ValueError, "output buffer is too small"):
1787+
unicode_toupper_buffer_too_small("ß")
1788+
17851789
@support.cpython_only
17861790
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
17871791
def test_totitle(self):
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Make :c:func:`PyUnicode_ToLower`, :c:func:`PyUnicode_ToUpper`, :c:func:`PyUnicode_ToTitle` and :c:func:`PyUnicode_ToFolded` public.
1+
Make :c:func:`PyUCS4_ToLower`, :c:func:`PyUCS4_ToUpper`, :c:func:`PyUCS4_ToTitle` and :c:func:`PyUCS4_ToFolded` public.

Modules/_testcapi/unicode.c

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ unicode_copycharacters(PyObject *self, PyObject *args)
221221
}
222222

223223
static PyObject *
224-
unicode_case_operation(PyObject *str, Py_ssize_t (*function)(Py_UCS4, Py_UCS4 *, Py_ssize_t))
224+
unicode_case_operation(PyObject *str, Py_ssize_t (*function)(Py_UCS4, Py_UCS4 *, Py_ssize_t),
225+
Py_UCS4 *buf, Py_ssize_t size)
225226
{
226227
if (!PyUnicode_Check(str)) {
227228
PyErr_Format(PyExc_TypeError, "expect str type, got %T", str);
@@ -235,42 +236,53 @@ unicode_case_operation(PyObject *str, Py_ssize_t (*function)(Py_UCS4, Py_UCS4 *,
235236

236237
Py_UCS4 c = PyUnicode_READ_CHAR(str, 0);
237238

238-
Py_UCS4 buf[3];
239-
Py_ssize_t chars = function(c, buf, Py_ARRAY_LENGTH(buf));
239+
Py_ssize_t chars = function(c, buf, size);
240240
if (chars < 0) {
241241
return NULL;
242242
}
243243

244244
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, chars);
245245
}
246246

247-
/* Test PyUnicode_ToLower() */
247+
/* Test PyUCS4_ToLower() */
248248
static PyObject *
249249
unicode_tolower(PyObject *self, PyObject *arg)
250250
{
251-
return unicode_case_operation(arg, PyUnicode_ToLower);
251+
Py_UCS4 buf[PyUCS4_CASE_CONVERSION_BUFFER_SIZE];
252+
return unicode_case_operation(arg, PyUCS4_ToLower, buf, PyUCS4_CASE_CONVERSION_BUFFER_SIZE);
252253
}
253254

254-
/* Test PyUnicode_ToUpper() */
255+
256+
/* Test PyUCS4_ToUpper() */
255257
static PyObject *
256258
unicode_toupper(PyObject *self, PyObject *arg)
257259
{
258-
return unicode_case_operation(arg, PyUnicode_ToUpper);
260+
Py_UCS4 buf[PyUCS4_CASE_CONVERSION_BUFFER_SIZE];
261+
return unicode_case_operation(arg, PyUCS4_ToUpper, buf, PyUCS4_CASE_CONVERSION_BUFFER_SIZE);
259262
}
260263

264+
/* Test PyUCS4_ToUpper() with a small buffer */
265+
static PyObject *
266+
unicode_toupper_buffer_too_small(PyObject *self, PyObject *arg)
267+
{
268+
Py_UCS4 buf;
269+
return unicode_case_operation(arg, PyUCS4_ToUpper, &buf, 1);
270+
}
261271

262-
/* Test PyUnicode_ToLower() */
272+
/* Test PyUCS4_ToLower() */
263273
static PyObject *
264274
unicode_totitle(PyObject *self, PyObject *arg)
265275
{
266-
return unicode_case_operation(arg, PyUnicode_ToTitle);
276+
Py_UCS4 buf[PyUCS4_CASE_CONVERSION_BUFFER_SIZE];
277+
return unicode_case_operation(arg, PyUCS4_ToTitle, buf, PyUCS4_CASE_CONVERSION_BUFFER_SIZE);
267278
}
268279

269-
/* Test PyUnicode_ToLower() */
280+
/* Test PyUCS4_ToLower() */
270281
static PyObject *
271282
unicode_tofolded(PyObject *self, PyObject *arg)
272283
{
273-
return unicode_case_operation(arg, PyUnicode_ToFolded);
284+
Py_UCS4 buf[PyUCS4_CASE_CONVERSION_BUFFER_SIZE];
285+
return unicode_case_operation(arg, PyUCS4_ToFolded, buf, PyUCS4_CASE_CONVERSION_BUFFER_SIZE);
274286
}
275287

276288

@@ -633,6 +645,7 @@ static PyMethodDef TestMethods[] = {
633645
{"unicode_GET_CACHED_HASH", unicode_GET_CACHED_HASH, METH_O},
634646
{"unicode_tolower", unicode_tolower, METH_O},
635647
{"unicode_toupper", unicode_toupper, METH_O},
648+
{"unicode_toupper_buffer_too_small", unicode_toupper_buffer_too_small, METH_O},
636649
{"unicode_totitle", unicode_totitle, METH_O},
637650
{"unicode_tofolded", unicode_tofolded, METH_O},
638651
{NULL},

Objects/unicodectype.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
198198
return ch + ctype->lower;
199199
}
200200

201-
Py_ssize_t PyUnicode_ToLower(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
201+
Py_ssize_t PyUCS4_ToLower(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
202202
{
203203
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
204204

@@ -224,7 +224,7 @@ Py_ssize_t PyUnicode_ToLower(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
224224
return 1;
225225
}
226226

227-
Py_ssize_t PyUnicode_ToTitle(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
227+
Py_ssize_t PyUCS4_ToTitle(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
228228
{
229229
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
230230

@@ -250,7 +250,7 @@ Py_ssize_t PyUnicode_ToTitle(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
250250
return 1;
251251
}
252252

253-
Py_ssize_t PyUnicode_ToUpper(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
253+
Py_ssize_t PyUCS4_ToUpper(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
254254
{
255255
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
256256

@@ -276,7 +276,7 @@ Py_ssize_t PyUnicode_ToUpper(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
276276
return 1;
277277
}
278278

279-
Py_ssize_t PyUnicode_ToFolded(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
279+
Py_ssize_t PyUCS4_ToFolded(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
280280
{
281281
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
282282

@@ -294,7 +294,7 @@ Py_ssize_t PyUnicode_ToFolded(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
294294
return n;
295295
}
296296

297-
return PyUnicode_ToLower(ch, res, size);
297+
return PyUCS4_ToLower(ch, res, size);
298298
}
299299

300300
int _PyUnicode_IsCased(Py_UCS4 ch)

Objects/unicodeobject.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10003,7 +10003,7 @@ lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i,
1000310003
mapped[0] = handle_capital_sigma(kind, data, length, i);
1000410004
return 1;
1000510005
}
10006-
return PyUnicode_ToLower(c, mapped, mapped_size);
10006+
return PyUCS4_ToLower(c, mapped, mapped_size);
1000710007
}
1000810008

1000910009
static Py_ssize_t
@@ -10013,7 +10013,7 @@ do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UC
1001310013
Py_UCS4 c, mapped[3];
1001410014

1001510015
c = PyUnicode_READ(kind, data, 0);
10016-
n_res = PyUnicode_ToTitle(c, mapped, Py_ARRAY_LENGTH(mapped));
10016+
n_res = PyUCS4_ToTitle(c, mapped, Py_ARRAY_LENGTH(mapped));
1001710017
assert(n_res >= 1);
1001810018
for (j = 0; j < n_res; j++) {
1001910019
*maxchar = Py_MAX(*maxchar, mapped[j]);
@@ -10042,7 +10042,7 @@ do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4
1004210042
n_res = lower_ucs4(kind, data, length, i, c, mapped, Py_ARRAY_LENGTH(mapped));
1004310043
}
1004410044
else if (Py_UNICODE_ISLOWER(c)) {
10045-
n_res = PyUnicode_ToUpper(c, mapped, Py_ARRAY_LENGTH(mapped));
10045+
n_res = PyUCS4_ToUpper(c, mapped, Py_ARRAY_LENGTH(mapped));
1004610046
}
1004710047
else {
1004810048
n_res = 1;
@@ -10069,7 +10069,7 @@ do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res,
1006910069
if (lower)
1007010070
n_res = lower_ucs4(kind, data, length, i, c, mapped, Py_ARRAY_LENGTH(mapped));
1007110071
else
10072-
n_res = PyUnicode_ToUpper(c, mapped, Py_ARRAY_LENGTH(mapped));
10072+
n_res = PyUCS4_ToUpper(c, mapped, Py_ARRAY_LENGTH(mapped));
1007310073
assert(n_res >= 1);
1007410074
for (j = 0; j < n_res; j++) {
1007510075
*maxchar = Py_MAX(*maxchar, mapped[j]);
@@ -10099,7 +10099,7 @@ do_casefold(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4
1009910099
for (i = 0; i < length; i++) {
1010010100
Py_UCS4 c = PyUnicode_READ(kind, data, i);
1010110101
Py_UCS4 mapped[3];
10102-
Py_ssize_t j, n_res = PyUnicode_ToFolded(c, mapped, Py_ARRAY_LENGTH(mapped));
10102+
Py_ssize_t j, n_res = PyUCS4_ToFolded(c, mapped, Py_ARRAY_LENGTH(mapped));
1010310103
assert(n_res >= 1);
1010410104
for (j = 0; j < n_res; j++) {
1010510105
*maxchar = Py_MAX(*maxchar, mapped[j]);
@@ -10124,7 +10124,7 @@ do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *m
1012410124
if (previous_is_cased)
1012510125
n_res = lower_ucs4(kind, data, length, i, c, mapped, Py_ARRAY_LENGTH(mapped));
1012610126
else
10127-
n_res = PyUnicode_ToTitle(c, mapped, Py_ARRAY_LENGTH(mapped));
10127+
n_res = PyUCS4_ToTitle(c, mapped, Py_ARRAY_LENGTH(mapped));
1012810128
assert(n_res >= 1);
1012910129
for (j = 0; j < n_res; j++) {
1013010130
*maxchar = Py_MAX(*maxchar, mapped[j]);

0 commit comments

Comments
 (0)