Skip to content

Commit 2750549

Browse files
Optimize CPyStr_Lower and CPyStr_Upper for ASCII strings by removing static lookup tables and using direct character conversion
1 parent 1d40499 commit 2750549

File tree

1 file changed

+34
-48
lines changed

1 file changed

+34
-48
lines changed

mypyc/lib-rt/str_ops.c

Lines changed: 34 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -547,55 +547,36 @@ CPyTagged CPyStr_Ord(PyObject *obj) {
547547
return CPY_INT_TAG;
548548
}
549549

550-
// Fast ASCII lower/upper tables
551-
static const unsigned char ascii_lower_table[128] = {
552-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
553-
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
554-
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
555-
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
556-
64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
557-
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
558-
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
559-
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
560-
};
561-
562-
static const unsigned char ascii_upper_table[128] = {
563-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
564-
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
565-
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
566-
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
567-
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
568-
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
569-
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
570-
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127
571-
};
572-
573-
574-
// Implementation of s.lower()
575550
PyObject *CPyStr_Lower(PyObject *self) {
576551
if (PyUnicode_READY(self) == -1)
577552
return NULL;
553+
578554
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
555+
556+
// Fast path: ASCII only
557+
if (PyUnicode_IS_ASCII(self)) {
558+
PyObject *res = PyUnicode_New(len, 127);
559+
if (res == NULL)
560+
return NULL;
561+
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
562+
Py_UCS1 *res_data = PyUnicode_1BYTE_DATA(res);
563+
for (Py_ssize_t i = 0; i < len; i++) {
564+
res_data[i] = Py_TOLOWER((unsigned char) data[i]);
565+
}
566+
return res;
567+
}
568+
569+
// General Unicode path
579570
int kind = PyUnicode_KIND(self);
580571
void *data = PyUnicode_DATA(self);
581-
582572
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
583573
PyObject *res = PyUnicode_New(len, maxchar);
584574
if (res == NULL)
585575
return NULL;
586576
int res_kind = PyUnicode_KIND(res);
587577
void *res_data = PyUnicode_DATA(res);
588578

589-
// Fast path for ASCII strings
590-
if (PyUnicode_IS_ASCII(self)) {
591-
for (Py_ssize_t i = 0; i < len; i++) {
592-
Py_UCS1 ch = ((Py_UCS1 *)data)[i];
593-
Py_UCS1 lower = ascii_lower_table[ch];
594-
((Py_UCS1 *)res_data)[i] = lower;
595-
}
596-
return res;
597-
}
598-
579+
// Unified loop for all Unicode kinds
599580
for (Py_ssize_t i = 0; i < len; i++) {
600581
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
601582
Py_UCS4 lower = Py_UNICODE_TOLOWER(ch);
@@ -604,31 +585,36 @@ PyObject *CPyStr_Lower(PyObject *self) {
604585
return res;
605586
}
606587

607-
// Implementation of s.upper()
608588
PyObject *CPyStr_Upper(PyObject *self) {
609589
if (PyUnicode_READY(self) == -1)
610590
return NULL;
591+
611592
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
593+
594+
// Fast path: ASCII only
595+
if (PyUnicode_IS_ASCII(self)) {
596+
PyObject *res = PyUnicode_New(len, 127);
597+
if (res == NULL)
598+
return NULL;
599+
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
600+
Py_UCS1 *res_data = PyUnicode_1BYTE_DATA(res);
601+
for (Py_ssize_t i = 0; i < len; i++) {
602+
res_data[i] = Py_TOUPPER((unsigned char) data[i]);
603+
}
604+
return res;
605+
}
606+
607+
// General Unicode path
612608
int kind = PyUnicode_KIND(self);
613609
void *data = PyUnicode_DATA(self);
614-
615610
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
616611
PyObject *res = PyUnicode_New(len, maxchar);
617612
if (res == NULL)
618613
return NULL;
619614
int res_kind = PyUnicode_KIND(res);
620615
void *res_data = PyUnicode_DATA(res);
621616

622-
// Fast path for ASCII strings
623-
if (PyUnicode_IS_ASCII(self)) {
624-
for (Py_ssize_t i = 0; i < len; i++) {
625-
Py_UCS1 ch = ((Py_UCS1 *)data)[i];
626-
Py_UCS1 upper = ascii_upper_table[ch];
627-
((Py_UCS1 *)res_data)[i] = upper;
628-
}
629-
return res;
630-
}
631-
617+
// Unified loop for all Unicode kinds
632618
for (Py_ssize_t i = 0; i < len; i++) {
633619
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
634620
Py_UCS4 upper = Py_UNICODE_TOUPPER(ch);

0 commit comments

Comments
 (0)