File tree Expand file tree Collapse file tree 3 files changed +0
-59
lines changed
Expand file tree Collapse file tree 3 files changed +0
-59
lines changed Original file line number Diff line number Diff line change @@ -44,6 +44,4 @@ size_t rbs_string_len(const rbs_string_t self);
4444 */
4545bool rbs_string_equal (const rbs_string_t lhs , const rbs_string_t rhs );
4646
47- unsigned int rbs_utf8_string_to_codepoint (const rbs_string_t string );
48-
4947#endif
Original file line number Diff line number Diff line change 11#include "rbs/string.h"
2- #include "rbs/defines.h"
32
43#include <stdlib.h>
54#include <string.h>
65#include <stdio.h>
76#include <ctype.h>
87
9- unsigned int rbs_utf8_string_to_codepoint (const rbs_string_t string ) {
10- unsigned int codepoint = 0 ;
11- int remaining_bytes = 0 ;
12-
13- const char * s = string .start ;
14- const char * end = string .end ;
15-
16- if (s >= end ) return 0 ; // End of string
17-
18- if (RBS_LIKELY ((* s & 0x80 ) == 0 )) {
19- // Single byte character (0xxxxxxx)
20- return * s ;
21- } else if ((* s & 0xE0 ) == 0xC0 ) {
22- // Two byte character (110xxxxx 10xxxxxx)
23- codepoint = * s & 0x1F ;
24- remaining_bytes = 1 ;
25- } else if ((* s & 0xF0 ) == 0xE0 ) {
26- // Three byte character (1110xxxx 10xxxxxx 10xxxxxx)
27- codepoint = * s & 0x0F ;
28- remaining_bytes = 2 ;
29- } else if ((* s & 0xF8 ) == 0xF0 ) {
30- // Four byte character (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
31- codepoint = * s & 0x07 ;
32- remaining_bytes = 3 ;
33- } else {
34- // Invalid UTF-8 sequence
35- return 0xFFFD ; // Unicode replacement character
36- }
37-
38- s ++ ;
39- while (remaining_bytes > 0 && s < end ) {
40- if ((* s & 0xC0 ) != 0x80 ) {
41- // Invalid continuation byte
42- return 0xFFFD ;
43- }
44- codepoint = (codepoint << 6 ) | (* s & 0x3F );
45- s ++ ;
46- remaining_bytes -- ;
47- }
48-
49- if (remaining_bytes > 0 ) {
50- // Incomplete sequence
51- return 0xFFFD ;
52- }
53-
54- return codepoint ;
55- }
56-
578rbs_string_t rbs_string_new (const char * start , const char * end ) {
589 return (rbs_string_t ) {
5910 .start = start ,
Original file line number Diff line number Diff line change @@ -43,14 +43,6 @@ static int octal_to_int(const char *octal, int length) {
4343 return result ;
4444}
4545
46- int rbs_utf8_codelen (unsigned int c ) {
47- if (c <= 0x7F ) return 1 ;
48- if (c <= 0x7FF ) return 2 ;
49- if (c <= 0xFFFF ) return 3 ;
50- if (c <= 0x10FFFF ) return 4 ;
51- return 1 ; // Invalid Unicode codepoint, treat as 1 byte
52- }
53-
5446// Fills buf starting at index 'start' with the UTF-8 encoding of 'codepoint'.
5547// Returns the number of bytes written, or 0 when the output is not changed.
5648//
You can’t perform that action at this time.
0 commit comments