@@ -546,3 +546,122 @@ CPyTagged CPyStr_Ord(PyObject *obj) {
546546 PyExc_TypeError , "ord() expected a character, but a string of length %zd found" , s );
547547 return CPY_INT_TAG ;
548548}
549+
550+ // Fast ASCII lower/upper tables
551+ static const unsigned char ascii_lower_table [128 ] = {
552+ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
553+ 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
554+ 32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 , 40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 ,
555+ 48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 , 56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 ,
556+ 64 , 97 , 98 , 99 ,100 ,101 ,102 ,103 ,104 ,105 ,106 ,107 ,108 ,109 ,110 ,111 ,
557+ 112 ,113 ,114 ,115 ,116 ,117 ,118 ,119 ,120 ,121 ,122 , 91 , 92 , 93 , 94 , 95 ,
558+ 96 , 97 , 98 , 99 ,100 ,101 ,102 ,103 ,104 ,105 ,106 ,107 ,108 ,109 ,110 ,111 ,
559+ 112 ,113 ,114 ,115 ,116 ,117 ,118 ,119 ,120 ,121 ,122 ,123 ,124 ,125 ,126 ,127
560+ };
561+
562+ static const unsigned char ascii_upper_table [128 ] = {
563+ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
564+ 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
565+ 32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 , 40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 ,
566+ 48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 , 56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 ,
567+ 64 , 65 , 66 , 67 , 68 , 69 , 70 , 71 , 72 , 73 , 74 , 75 , 76 , 77 , 78 , 79 ,
568+ 80 , 81 , 82 , 83 , 84 , 85 , 86 , 87 , 88 , 89 , 90 , 91 , 92 , 93 , 94 , 95 ,
569+ 96 , 65 , 66 , 67 , 68 , 69 , 70 , 71 , 72 , 73 , 74 , 75 , 76 , 77 , 78 , 79 ,
570+ 80 , 81 , 82 , 83 , 84 , 85 , 86 , 87 , 88 , 89 , 90 ,123 ,124 ,125 ,126 ,127
571+ };
572+
573+ // Helper for lower/upper: get the lower/upper code point for a character
574+ static inline Py_UCS4 tolower_ucs4 (Py_UCS4 ch ) {
575+ if (ch < 128 ) {
576+ return ascii_lower_table [ch ];
577+ }
578+ #ifdef Py_UNICODE_TOLOWER
579+ return Py_UNICODE_TOLOWER (ch );
580+ #else
581+ // fallback: no-op for non-ASCII if macro is unavailable
582+ return ch ;
583+ #endif
584+ }
585+
586+ static inline Py_UCS4 toupper_ucs4 (Py_UCS4 ch ) {
587+ if (ch < 128 ) {
588+ return ascii_upper_table [ch ];
589+ }
590+ #ifdef Py_UNICODE_TOUPPER
591+ return Py_UNICODE_TOUPPER (ch );
592+ #else
593+ // fallback: no-op for non-ASCII if macro is unavailable
594+ return ch ;
595+ #endif
596+ }
597+
598+ // Implementation of s.lower()
599+ PyObject * CPyStr_Lower (PyObject * self ) {
600+ if (PyUnicode_READY (self ) == -1 )
601+ return NULL ;
602+ Py_ssize_t len = PyUnicode_GET_LENGTH (self );
603+ int kind = PyUnicode_KIND (self );
604+ void * data = PyUnicode_DATA (self );
605+
606+ // Fast path: check if already all lower
607+ int unchanged = 1 ;
608+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
609+ Py_UCS4 ch = PyUnicode_READ (kind , data , i );
610+ if (tolower_ucs4 (ch ) != ch ) {
611+ unchanged = 0 ;
612+ break ;
613+ }
614+ }
615+ if (unchanged ) {
616+ return Py_NewRef (self );
617+ }
618+
619+ Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE (self );
620+ PyObject * res = PyUnicode_New (len , maxchar );
621+ if (!res )
622+ return NULL ;
623+ int res_kind = PyUnicode_KIND (res );
624+ void * res_data = PyUnicode_DATA (res );
625+
626+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
627+ Py_UCS4 ch = PyUnicode_READ (kind , data , i );
628+ Py_UCS4 lower = tolower_ucs4 (ch );
629+ PyUnicode_WRITE (res_kind , res_data , i , lower );
630+ }
631+ return res ;
632+ }
633+
634+ // Implementation of s.upper()
635+ PyObject * CPyStr_Upper (PyObject * self ) {
636+ if (PyUnicode_READY (self ) == -1 )
637+ return NULL ;
638+ Py_ssize_t len = PyUnicode_GET_LENGTH (self );
639+ int kind = PyUnicode_KIND (self );
640+ void * data = PyUnicode_DATA (self );
641+
642+ int unchanged = 1 ;
643+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
644+ Py_UCS4 ch = PyUnicode_READ (kind , data , i );
645+ if (toupper_ucs4 (ch ) != ch ) {
646+ unchanged = 0 ;
647+ break ;
648+ }
649+ }
650+ if (unchanged ) {
651+ return Py_NewRef (self );
652+ }
653+
654+ Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE (self );
655+ PyObject * res = PyUnicode_New (len , maxchar );
656+ if (!res )
657+ return NULL ;
658+ int res_kind = PyUnicode_KIND (res );
659+ void * res_data = PyUnicode_DATA (res );
660+
661+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
662+ Py_UCS4 ch = PyUnicode_READ (kind , data , i );
663+ Py_UCS4 upper = toupper_ucs4 (ch );
664+ PyUnicode_WRITE (res_kind , res_data , i , upper );
665+ }
666+ return res ;
667+ }
0 commit comments