Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/include/utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
#define HERB_UTF8_H

#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>

int utf8_char_byte_length(unsigned char first_byte);
int utf8_sequence_length(const char* str, size_t position, size_t max_length);
uint32_t utf8_char_byte_length(unsigned char first_byte);
uint32_t utf8_sequence_length(const char* str, size_t position, size_t max_length);
bool utf8_is_valid_continuation_byte(unsigned char byte);

#endif
8 changes: 4 additions & 4 deletions src/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// 110xxxxx = 2 bytes
// 1110xxxx = 3 bytes
// 11110xxx = 4 bytes
int utf8_char_byte_length(unsigned char first_byte) {
uint32_t utf8_char_byte_length(unsigned char first_byte) {
if ((first_byte & 0x80) == 0) {
return 1;
} else if ((first_byte & 0xE0) == 0xC0) {
Expand All @@ -24,18 +24,18 @@ bool utf8_is_valid_continuation_byte(unsigned char byte) {
return (byte & 0xC0) == 0x80;
}

int utf8_sequence_length(const char* str, size_t position, size_t max_length) {
uint32_t utf8_sequence_length(const char* str, size_t position, size_t max_length) {
if (position >= max_length) { return 0; }

unsigned char first_byte = (unsigned char) str[position];
int expected_length = utf8_char_byte_length(first_byte);
uint32_t expected_length = utf8_char_byte_length(first_byte);

if (position + expected_length > max_length) {
return 1; // Not enough bytes, treat as single byte
}

if (expected_length > 1) {
for (int i = 1; i < expected_length; i++) {
for (uint32_t i = 1; i < expected_length; i++) {
if (!utf8_is_valid_continuation_byte((unsigned char) str[position + i])) {
return 1; // Invalid continuation byte, treat first byte as single byte
}
Expand Down
Loading