@@ -2955,6 +2955,77 @@ lxb_encoding_decode_valid_utf_8_single(const lxb_char_t **data,
29552955 return cp ;
29562956}
29572957
2958+ lxb_codepoint_t
2959+ lxb_encoding_decode_valid_utf_8_single_reverse (const lxb_char_t * * end ,
2960+ const lxb_char_t * begin )
2961+ {
2962+ lxb_codepoint_t cp ;
2963+ const lxb_char_t * p = * end ;
2964+
2965+ while (p > begin ) {
2966+ p -= 1 ;
2967+
2968+ if (* p < 0x80 ){
2969+ cp = (lxb_codepoint_t ) * p ;
2970+
2971+ (* end ) = p ;
2972+ return cp ;
2973+ }
2974+ else if ((* p & 0xe0 ) == 0xc0 ) {
2975+ /* 110xxxxx 10xxxxxx */
2976+
2977+ if (* end - p < 2 ) {
2978+ * end = p ;
2979+ return LXB_ENCODING_DECODE_ERROR ;
2980+ }
2981+
2982+ cp = (p [0 ] ^ (0xC0 & p [0 ])) << 6 ;
2983+ cp |= (p [1 ] ^ (0x80 & p [1 ]));
2984+
2985+ (* end ) = p ;
2986+ return cp ;
2987+ }
2988+ else if ((* p & 0xf0 ) == 0xe0 ) {
2989+ /* 1110xxxx 10xxxxxx 10xxxxxx */
2990+
2991+ if (* end - p < 3 ) {
2992+ * end = p ;
2993+ return LXB_ENCODING_DECODE_ERROR ;
2994+ }
2995+
2996+ cp = (p [0 ] ^ (0xE0 & p [0 ])) << 12 ;
2997+ cp |= (p [1 ] ^ (0x80 & p [1 ])) << 6 ;
2998+ cp |= (p [2 ] ^ (0x80 & p [2 ]));
2999+
3000+ (* end ) = p ;
3001+ return cp ;
3002+ }
3003+ else if ((* p & 0xf8 ) == 0xf0 ) {
3004+ /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
3005+
3006+ if (* end - p < 4 ) {
3007+ * end = p ;
3008+ return LXB_ENCODING_DECODE_ERROR ;
3009+ }
3010+
3011+ cp = (p [0 ] ^ (0xF0 & p [0 ])) << 18 ;
3012+ cp |= (p [1 ] ^ (0x80 & p [1 ])) << 12 ;
3013+ cp |= (p [2 ] ^ (0x80 & p [2 ])) << 6 ;
3014+ cp |= (p [3 ] ^ (0x80 & p [3 ]));
3015+
3016+ (* end ) = p ;
3017+ return cp ;
3018+ }
3019+ else if (* end - p >= 4 ) {
3020+ break ;
3021+ }
3022+ }
3023+
3024+ * end = p ;
3025+
3026+ return LXB_ENCODING_DECODE_ERROR ;
3027+ }
3028+
29583029uint8_t
29593030lxb_encoding_decode_utf_8_length (lxb_char_t data )
29603031{
0 commit comments