Skip to content

Commit d4ef31a

Browse files
Add str.lower() and str.upper() primitives
1 parent 1bf186c commit d4ef31a

File tree

6 files changed

+180
-1
lines changed

6 files changed

+180
-1
lines changed

mypyc/lib-rt/CPy.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,8 @@ PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors);
756756
Py_ssize_t CPyStr_Count(PyObject *unicode, PyObject *substring, CPyTagged start);
757757
Py_ssize_t CPyStr_CountFull(PyObject *unicode, PyObject *substring, CPyTagged start, CPyTagged end);
758758
CPyTagged CPyStr_Ord(PyObject *obj);
759+
PyObject *CPyStr_Lower(PyObject *self);
760+
PyObject *CPyStr_Upper(PyObject *self);
759761

760762

761763
// Bytes operations

mypyc/lib-rt/str_ops.c

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,3 +546,122 @@ CPyTagged CPyStr_Ord(PyObject *obj) {
546546
PyExc_TypeError, "ord() expected a character, but a string of length %zd found", s);
547547
return CPY_INT_TAG;
548548
}
549+
550+
// Fast ASCII lower/upper tables
551+
static const unsigned char ascii_lower_table[128] = {
552+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
553+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
554+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
555+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
556+
64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
557+
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
558+
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
559+
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
560+
};
561+
562+
static const unsigned char ascii_upper_table[128] = {
563+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
564+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
565+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
566+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
567+
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
568+
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
569+
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
570+
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127
571+
};
572+
573+
// Helper for lower/upper: get the lower/upper code point for a character
574+
static inline Py_UCS4 tolower_ucs4(Py_UCS4 ch) {
575+
if (ch < 128) {
576+
return ascii_lower_table[ch];
577+
}
578+
#ifdef Py_UNICODE_TOLOWER
579+
return Py_UNICODE_TOLOWER(ch);
580+
#else
581+
// fallback: no-op for non-ASCII if macro is unavailable
582+
return ch;
583+
#endif
584+
}
585+
586+
static inline Py_UCS4 toupper_ucs4(Py_UCS4 ch) {
587+
if (ch < 128) {
588+
return ascii_upper_table[ch];
589+
}
590+
#ifdef Py_UNICODE_TOUPPER
591+
return Py_UNICODE_TOUPPER(ch);
592+
#else
593+
// fallback: no-op for non-ASCII if macro is unavailable
594+
return ch;
595+
#endif
596+
}
597+
598+
// Implementation of s.lower()
599+
PyObject *CPyStr_Lower(PyObject *self) {
600+
if (PyUnicode_READY(self) == -1)
601+
return NULL;
602+
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
603+
int kind = PyUnicode_KIND(self);
604+
void *data = PyUnicode_DATA(self);
605+
606+
// Fast path: check if already all lower
607+
int unchanged = 1;
608+
for (Py_ssize_t i = 0; i < len; i++) {
609+
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
610+
if (tolower_ucs4(ch) != ch) {
611+
unchanged = 0;
612+
break;
613+
}
614+
}
615+
if (unchanged) {
616+
return Py_NewRef(self);
617+
}
618+
619+
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
620+
PyObject *res = PyUnicode_New(len, maxchar);
621+
if (!res)
622+
return NULL;
623+
int res_kind = PyUnicode_KIND(res);
624+
void *res_data = PyUnicode_DATA(res);
625+
626+
for (Py_ssize_t i = 0; i < len; i++) {
627+
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
628+
Py_UCS4 lower = tolower_ucs4(ch);
629+
PyUnicode_WRITE(res_kind, res_data, i, lower);
630+
}
631+
return res;
632+
}
633+
634+
// Implementation of s.upper()
635+
PyObject *CPyStr_Upper(PyObject *self) {
636+
if (PyUnicode_READY(self) == -1)
637+
return NULL;
638+
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
639+
int kind = PyUnicode_KIND(self);
640+
void *data = PyUnicode_DATA(self);
641+
642+
int unchanged = 1;
643+
for (Py_ssize_t i = 0; i < len; i++) {
644+
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
645+
if (toupper_ucs4(ch) != ch) {
646+
unchanged = 0;
647+
break;
648+
}
649+
}
650+
if (unchanged) {
651+
return Py_NewRef(self);
652+
}
653+
654+
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
655+
PyObject *res = PyUnicode_New(len, maxchar);
656+
if (!res)
657+
return NULL;
658+
int res_kind = PyUnicode_KIND(res);
659+
void *res_data = PyUnicode_DATA(res);
660+
661+
for (Py_ssize_t i = 0; i < len; i++) {
662+
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
663+
Py_UCS4 upper = toupper_ucs4(ch);
664+
PyUnicode_WRITE(res_kind, res_data, i, upper);
665+
}
666+
return res;
667+
}

mypyc/primitives/str_ops.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,21 @@
428428
c_function_name="CPyStr_Ord",
429429
error_kind=ERR_MAGIC,
430430
)
431+
432+
# str.lower()
433+
method_op(
434+
name="lower",
435+
arg_types=[str_rprimitive],
436+
return_type=str_rprimitive,
437+
c_function_name="CPyStr_Lower",
438+
error_kind=ERR_MAGIC,
439+
)
440+
441+
# str.upper()
442+
method_op(
443+
name="upper",
444+
arg_types=[str_rprimitive],
445+
return_type=str_rprimitive,
446+
c_function_name="CPyStr_Upper",
447+
error_kind=ERR_MAGIC,
448+
)

mypyc/test-data/fixtures/ir.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ def lstrip(self, item: Optional[str] = None) -> str: pass
112112
def rstrip(self, item: Optional[str] = None) -> str: pass
113113
def join(self, x: Iterable[str]) -> str: pass
114114
def format(self, *args: Any, **kwargs: Any) -> str: ...
115-
def upper(self) -> str: ...
116115
def startswith(self, x: Union[str, Tuple[str, ...]], start: int=..., end: int=...) -> bool: ...
117116
def endswith(self, x: Union[str, Tuple[str, ...]], start: int=..., end: int=...) -> bool: ...
118117
def replace(self, old: str, new: str, maxcount: int=...) -> str: ...
@@ -122,6 +121,8 @@ def rpartition(self, sep: str, /) -> Tuple[str, str, str]: ...
122121
def removeprefix(self, prefix: str, /) -> str: ...
123122
def removesuffix(self, suffix: str, /) -> str: ...
124123
def islower(self) -> bool: ...
124+
def lower(self) -> str: ...
125+
def upper(self) -> str: ...
125126

126127
class float:
127128
def __init__(self, x: object) -> None: pass

mypyc/test-data/irbuild-str.test

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,3 +562,24 @@ L0:
562562
r3 = box(native_int, r1)
563563
r4 = unbox(int, r3)
564564
return r4
565+
566+
[case testLower]
567+
def do_lower(s: str) -> str:
568+
return s.lower()
569+
[out]
570+
def do_lower(s):
571+
s, r0 :: str
572+
L0:
573+
r0 = CPyStr_Lower(s)
574+
return r0
575+
576+
[case testUpper]
577+
def do_upper(s: str) -> str:
578+
return s.upper()
579+
[out]
580+
def do_upper(s):
581+
s, r0 :: str
582+
L0:
583+
r0 = CPyStr_Upper(s)
584+
return r0
585+

mypyc/test-data/run-strings.test

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,3 +906,21 @@ def test_count_multi_start_end_emoji() -> None:
906906
assert string.count("😴😴😴", 0, 12) == 1, string.count("😴😴😴", 0, 12)
907907
assert string.count("🚀🚀🚀", 0, 12) == 2, string.count("🚀🚀🚀", 0, 12)
908908
assert string.count("ñññ", 0, 12) == 1, string.count("ñññ", 0, 12)
909+
910+
[case testLower]
911+
def test_str_lower() -> None:
912+
assert "".lower() == ""
913+
assert "ABC".lower() == "abc"
914+
assert "abc".lower() == "abc"
915+
assert "AbC123".lower() == "abc123"
916+
assert "áÉÍ".lower() == "áéí"
917+
assert "😴🚀".lower() == "😴🚀"
918+
919+
[case testUpper]
920+
def test_str_upper() -> None:
921+
assert "".upper() == ""
922+
assert "abc".upper() == "ABC"
923+
assert "ABC".upper() == "ABC"
924+
assert "AbC123".upper() == "ABC123"
925+
assert "áéí".upper() == "ÁÉÍ"
926+
assert "😴🚀".upper() == "😴🚀"

0 commit comments

Comments
 (0)