Skip to content

Commit 25f1cd8

Browse files
lysnikolaouvstinner
authored andcommitted
Address feedback; test more characters and refactor _testcapi functions
1 parent d7ed172 commit 25f1cd8

File tree

2 files changed

+27
-69
lines changed

2 files changed

+27
-69
lines changed

Lib/test/test_capi/test_unicode.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1765,6 +1765,7 @@ def test_tolower(self):
17651765

17661766
# Test unicode character
17671767
self.assertEqual(unicode_tolower("Č"), "č")
1768+
self.assertEqual(unicode_tolower("Σ"), "σ")
17681769

17691770
@support.cpython_only
17701771
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
@@ -1778,6 +1779,8 @@ def test_toupper(self):
17781779

17791780
# Test unicode character
17801781
self.assertEqual(unicode_toupper("č"), "Č")
1782+
self.assertEqual(unicode_toupper("ß"), "SS")
1783+
self.assertEqual(unicode_toupper("ΐ"), "Ϊ́")
17811784

17821785
@support.cpython_only
17831786
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
@@ -1788,6 +1791,8 @@ def test_totitle(self):
17881791

17891792
# Test unicode character
17901793
self.assertEqual(unicode_totitle("ł"), "Ł")
1794+
self.assertEqual(unicode_totitle("ß"), "Ss")
1795+
self.assertEqual(unicode_totitle("ΐ"), "Ϊ́")
17911796

17921797
@support.cpython_only
17931798
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
@@ -1798,6 +1803,7 @@ def test_tofolded(self):
17981803

17991804
# Test unicode character
18001805
self.assertEqual(unicode_tofolded("Ł"), "ł")
1806+
self.assertEqual(unicode_tofolded("Σ"), "σ")
18011807

18021808
# Test case-ignorable character
18031809
self.assertEqual(unicode_tofolded("👍"), "👍")

Modules/_testcapi/unicode.c

Lines changed: 21 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -220,109 +220,61 @@ unicode_copycharacters(PyObject *self, PyObject *args)
220220
return Py_BuildValue("(Nn)", to_copy, copied);
221221
}
222222

223-
/* Test PyUnicode_ToLower() */
224223
static PyObject *
225-
unicode_tolower(PyObject *self, PyObject *arg)
224+
unicode_case_operation(PyObject *str, int (*function)(Py_UCS4, Py_UCS4 *, int), const char *name)
226225
{
227-
if (PyUnicode_GET_LENGTH(arg) != 1) {
228-
PyErr_SetString(PyExc_ValueError, "unicode_tolower only accepts 1-character strings");
226+
if (PyUnicode_GET_LENGTH(str) != 1) {
227+
PyErr_Format(PyExc_ValueError, "%s only accepts 1-character strings", name);
229228
return NULL;
230229
}
231230

232-
Py_UCS4 c = PyUnicode_READ_CHAR(arg, 0);
231+
Py_UCS4 c = PyUnicode_READ_CHAR(str, 0);
233232

234-
Py_UCS4 lower[3];
235-
int chars = PyUnicode_ToLower(c, lower, Py_ARRAY_LENGTH(lower));
236-
assert(chars >= 1);
233+
Py_UCS4 buf[3];
234+
int chars = function(c, buf, Py_ARRAY_LENGTH(buf));
235+
if (chars <= 0) {
236+
PyErr_BadInternalCall();
237+
return NULL;
238+
}
237239

238240
PyUnicodeWriter *writer = PyUnicodeWriter_Create(1);
239241
if (writer == NULL) {
240242
return NULL;
241243
}
242-
if (PyUnicodeWriter_WriteUCS4(writer, lower, chars) < 0) {
244+
if (PyUnicodeWriter_WriteUCS4(writer, buf, chars) < 0) {
243245
PyUnicodeWriter_Discard(writer);
244246
return NULL;
245247
}
246248
return PyUnicodeWriter_Finish(writer);
247249
}
248250

251+
/* Test PyUnicode_ToLower() */
252+
static PyObject *
253+
unicode_tolower(PyObject *self, PyObject *arg)
254+
{
255+
return unicode_case_operation(arg, PyUnicode_ToLower, "unicode_tolower");
256+
}
257+
249258
/* Test PyUnicode_ToUpper() */
250259
static PyObject *
251260
unicode_toupper(PyObject *self, PyObject *arg)
252261
{
253-
if (PyUnicode_GET_LENGTH(arg) != 1) {
254-
PyErr_SetString(PyExc_ValueError, "unicode_toupper only accepts 1-character strings");
255-
return NULL;
256-
}
257-
258-
Py_UCS4 c = PyUnicode_READ_CHAR(arg, 0);
259-
260-
Py_UCS4 upper[3];
261-
int chars = PyUnicode_ToUpper(c, upper, Py_ARRAY_LENGTH(upper));
262-
assert(chars >= 1);
263-
264-
PyUnicodeWriter *writer = PyUnicodeWriter_Create(1);
265-
if (writer == NULL) {
266-
return NULL;
267-
}
268-
if (PyUnicodeWriter_WriteUCS4(writer, upper, chars) < 0) {
269-
PyUnicodeWriter_Discard(writer);
270-
return NULL;
271-
}
272-
return PyUnicodeWriter_Finish(writer);
262+
return unicode_case_operation(arg, PyUnicode_ToUpper, "unicode_toupper");
273263
}
274264

275265

276266
/* Test PyUnicode_ToLower() */
277267
static PyObject *
278268
unicode_totitle(PyObject *self, PyObject *arg)
279269
{
280-
if (PyUnicode_GET_LENGTH(arg) != 1) {
281-
PyErr_SetString(PyExc_ValueError, "unicode_totitle only accepts 1-character strings");
282-
return NULL;
283-
}
284-
285-
Py_UCS4 c = PyUnicode_READ_CHAR(arg, 0);
286-
287-
Py_UCS4 title[3];
288-
int chars = PyUnicode_ToTitle(c, title, Py_ARRAY_LENGTH(title));
289-
assert(chars >= 1);
290-
291-
PyUnicodeWriter *writer = PyUnicodeWriter_Create(1);
292-
if (writer == NULL) {
293-
return NULL;
294-
}
295-
if (PyUnicodeWriter_WriteUCS4(writer, title, chars) < 0) {
296-
PyUnicodeWriter_Discard(writer);
297-
return NULL;
298-
}
299-
return PyUnicodeWriter_Finish(writer);
270+
return unicode_case_operation(arg, PyUnicode_ToTitle, "unicode_totitle");
300271
}
301272

302273
/* Test PyUnicode_ToLower() */
303274
static PyObject *
304275
unicode_tofolded(PyObject *self, PyObject *arg)
305276
{
306-
if (PyUnicode_GET_LENGTH(arg) != 1) {
307-
PyErr_SetString(PyExc_ValueError, "unicode_tofolded only accepts 1-character strings");
308-
return NULL;
309-
}
310-
311-
Py_UCS4 c = PyUnicode_READ_CHAR(arg, 0);
312-
313-
Py_UCS4 folded[3];
314-
int chars = PyUnicode_ToFolded(c, folded, Py_ARRAY_LENGTH(folded));
315-
assert(chars >= 1);
316-
317-
PyUnicodeWriter *writer = PyUnicodeWriter_Create(1);
318-
if (writer == NULL) {
319-
return NULL;
320-
}
321-
if (PyUnicodeWriter_WriteUCS4(writer, folded, chars) < 0) {
322-
PyUnicodeWriter_Discard(writer);
323-
return NULL;
324-
}
325-
return PyUnicodeWriter_Finish(writer);
277+
return unicode_case_operation(arg, PyUnicode_ToFolded, "unicode_tofolded");
326278
}
327279

328280

0 commit comments

Comments
 (0)