Skip to content

Commit 761a7a9

Browse files
committed
Delete unused UTF-8 related functions
1 parent 70c7207 commit 761a7a9

File tree

3 files changed

+0
-59
lines changed

3 files changed

+0
-59
lines changed

include/rbs/string.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,4 @@ size_t rbs_string_len(const rbs_string_t self);
4444
*/
4545
bool rbs_string_equal(const rbs_string_t lhs, const rbs_string_t rhs);
4646

47-
unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string);
48-
4947
#endif

src/string.c

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,10 @@
11
#include "rbs/string.h"
2-
#include "rbs/defines.h"
32

43
#include <stdlib.h>
54
#include <string.h>
65
#include <stdio.h>
76
#include <ctype.h>
87

9-
unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string) {
10-
unsigned int codepoint = 0;
11-
int remaining_bytes = 0;
12-
13-
const char *s = string.start;
14-
const char *end = string.end;
15-
16-
if (s >= end) return 0; // End of string
17-
18-
if (RBS_LIKELY((*s & 0x80) == 0)) {
19-
// Single byte character (0xxxxxxx)
20-
return *s;
21-
} else if ((*s & 0xE0) == 0xC0) {
22-
// Two byte character (110xxxxx 10xxxxxx)
23-
codepoint = *s & 0x1F;
24-
remaining_bytes = 1;
25-
} else if ((*s & 0xF0) == 0xE0) {
26-
// Three byte character (1110xxxx 10xxxxxx 10xxxxxx)
27-
codepoint = *s & 0x0F;
28-
remaining_bytes = 2;
29-
} else if ((*s & 0xF8) == 0xF0) {
30-
// Four byte character (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
31-
codepoint = *s & 0x07;
32-
remaining_bytes = 3;
33-
} else {
34-
// Invalid UTF-8 sequence
35-
return 0xFFFD; // Unicode replacement character
36-
}
37-
38-
s++;
39-
while (remaining_bytes > 0 && s < end) {
40-
if ((*s & 0xC0) != 0x80) {
41-
// Invalid continuation byte
42-
return 0xFFFD;
43-
}
44-
codepoint = (codepoint << 6) | (*s & 0x3F);
45-
s++;
46-
remaining_bytes--;
47-
}
48-
49-
if (remaining_bytes > 0) {
50-
// Incomplete sequence
51-
return 0xFFFD;
52-
}
53-
54-
return codepoint;
55-
}
56-
578
rbs_string_t rbs_string_new(const char *start, const char *end) {
589
return (rbs_string_t) {
5910
.start = start,

src/util/rbs_unescape.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,6 @@ static int octal_to_int(const char *octal, int length) {
4343
return result;
4444
}
4545

46-
int rbs_utf8_codelen(unsigned int c) {
47-
if (c <= 0x7F) return 1;
48-
if (c <= 0x7FF) return 2;
49-
if (c <= 0xFFFF) return 3;
50-
if (c <= 0x10FFFF) return 4;
51-
return 1; // Invalid Unicode codepoint, treat as 1 byte
52-
}
53-
5446
// Fills buf starting at index 'start' with the UTF-8 encoding of 'codepoint'.
5547
// Returns the number of bytes written, or 0 when the output is not changed.
5648
//

0 commit comments

Comments
 (0)