Skip to content

Commit 1d40499

Browse files
Optimize CPyStr_Lower and CPyStr_Upper for ASCII strings by removing fallback logic and using direct table lookups
1 parent 8065f9c commit 1d40499

File tree

1 file changed

+24
-43
lines changed

1 file changed

+24
-43
lines changed

mypyc/lib-rt/str_ops.c

Lines changed: 24 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -570,20 +570,6 @@ static const unsigned char ascii_upper_table[128] = {
570570
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127
571571
};
572572

573-
// Helper for lower/upper: get the lower/upper code point for a character
574-
static inline Py_UCS4 tolower_ucs4(Py_UCS4 ch) {
575-
if (ch < 128) {
576-
return ascii_lower_table[ch];
577-
}
578-
return Py_UNICODE_TOLOWER(ch);
579-
}
580-
581-
static inline Py_UCS4 toupper_ucs4(Py_UCS4 ch) {
582-
if (ch < 128) {
583-
return ascii_upper_table[ch];
584-
}
585-
return Py_UNICODE_TOUPPER(ch);
586-
}
587573

588574
// Implementation of s.lower()
589575
PyObject *CPyStr_Lower(PyObject *self) {
@@ -593,29 +579,26 @@ PyObject *CPyStr_Lower(PyObject *self) {
593579
int kind = PyUnicode_KIND(self);
594580
void *data = PyUnicode_DATA(self);
595581

596-
// Fast path: check if already all lower
597-
int unchanged = 1;
598-
for (Py_ssize_t i = 0; i < len; i++) {
599-
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
600-
if (tolower_ucs4(ch) != ch) {
601-
unchanged = 0;
602-
break;
603-
}
604-
}
605-
if (unchanged) {
606-
return Py_NewRef(self);
607-
}
608-
609582
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
610583
PyObject *res = PyUnicode_New(len, maxchar);
611-
if (!res)
584+
if (res == NULL)
612585
return NULL;
613586
int res_kind = PyUnicode_KIND(res);
614587
void *res_data = PyUnicode_DATA(res);
615588

589+
// Fast path for ASCII strings
590+
if (PyUnicode_IS_ASCII(self)) {
591+
for (Py_ssize_t i = 0; i < len; i++) {
592+
Py_UCS1 ch = ((Py_UCS1 *)data)[i];
593+
Py_UCS1 lower = ascii_lower_table[ch];
594+
((Py_UCS1 *)res_data)[i] = lower;
595+
}
596+
return res;
597+
}
598+
616599
for (Py_ssize_t i = 0; i < len; i++) {
617600
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
618-
Py_UCS4 lower = tolower_ucs4(ch);
601+
Py_UCS4 lower = Py_UNICODE_TOLOWER(ch);
619602
PyUnicode_WRITE(res_kind, res_data, i, lower);
620603
}
621604
return res;
@@ -629,28 +612,26 @@ PyObject *CPyStr_Upper(PyObject *self) {
629612
int kind = PyUnicode_KIND(self);
630613
void *data = PyUnicode_DATA(self);
631614

632-
int unchanged = 1;
633-
for (Py_ssize_t i = 0; i < len; i++) {
634-
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
635-
if (toupper_ucs4(ch) != ch) {
636-
unchanged = 0;
637-
break;
638-
}
639-
}
640-
if (unchanged) {
641-
return Py_NewRef(self);
642-
}
643-
644615
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
645616
PyObject *res = PyUnicode_New(len, maxchar);
646-
if (!res)
617+
if (res == NULL)
647618
return NULL;
648619
int res_kind = PyUnicode_KIND(res);
649620
void *res_data = PyUnicode_DATA(res);
650621

622+
// Fast path for ASCII strings
623+
if (PyUnicode_IS_ASCII(self)) {
624+
for (Py_ssize_t i = 0; i < len; i++) {
625+
Py_UCS1 ch = ((Py_UCS1 *)data)[i];
626+
Py_UCS1 upper = ascii_upper_table[ch];
627+
((Py_UCS1 *)res_data)[i] = upper;
628+
}
629+
return res;
630+
}
631+
651632
for (Py_ssize_t i = 0; i < len; i++) {
652633
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
653-
Py_UCS4 upper = toupper_ucs4(ch);
634+
Py_UCS4 upper = Py_UNICODE_TOUPPER(ch);
654635
PyUnicode_WRITE(res_kind, res_data, i, upper);
655636
}
656637
return res;

0 commit comments

Comments
 (0)