Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions data/data_generator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ function print_c_data_tables(io, sequences, prop_page_indices, prop_pages, dedup
end
print(io, "};\n\n")

print(io, "static const utf8proc_uint32_t utf8proc_combinations_second[] = {\n")
print(io, "static const utf8proc_int32_t utf8proc_combinations_second[] = {\n")
for dm0 in sort!(collect(keys(comb_mapping)))
print(io, " ");
for dm1 in sort!(collect(keys(comb_mapping[dm0])))
Expand All @@ -479,7 +479,7 @@ function print_c_data_tables(io, sequences, prop_page_indices, prop_pages, dedup
end
print(io, "};\n\n")

print(io, "static const utf8proc_uint32_t utf8proc_combinations_combined[] = {\n")
print(io, "static const utf8proc_int32_t utf8proc_combinations_combined[] = {\n")
for dm0 in sort!(collect(keys(comb_mapping)))
print(io, " ");
for dm1 in sort!(collect(keys(comb_mapping[dm0])))
Expand Down
13 changes: 6 additions & 7 deletions utf8proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -646,14 +646,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
}
if (options & UTF8PROC_COMPOSE) {
utf8proc_int32_t *starter = NULL;
utf8proc_int32_t current_char;
const utf8proc_property_t *starter_property = NULL, *current_property;
const utf8proc_property_t *starter_property = NULL;
utf8proc_propval_t max_combining_class = -1;
utf8proc_ssize_t rpos;
utf8proc_ssize_t wpos = 0;
for (rpos = 0; rpos < length; rpos++) {
current_char = buffer[rpos];
current_property = unsafe_get_property(current_char);
utf8proc_int32_t current_char = buffer[rpos];
const utf8proc_property_t *current_property = unsafe_get_property(current_char);
if (starter && current_property->combining_class > max_combining_class) {
/* combination perhaps possible */
utf8proc_int32_t hangul_lindex;
Expand Down Expand Up @@ -687,18 +686,18 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
int idx = starter_property->comb_index;
if (idx < 0x3FF && current_property->comb_issecond) {
int len = starter_property->comb_length;
utf8proc_uint32_t max_second = utf8proc_combinations_second[idx + len - 1];
utf8proc_int32_t max_second = utf8proc_combinations_second[idx + len - 1];
if (current_char <= max_second) {
// TODO: binary search? arithmetic search?
for (int off = 0; off < len; ++off) {
utf8proc_uint32_t second = utf8proc_combinations_second[idx + off];
utf8proc_int32_t second = utf8proc_combinations_second[idx + off];
if (current_char < second) {
/* not found */
break;
}
if (current_char == second) {
/* found */
utf8proc_uint32_t composition = utf8proc_combinations_combined[idx + off];
utf8proc_int32_t composition = utf8proc_combinations_combined[idx + off];
*starter = composition;
starter_property = NULL;
break;
Expand Down
4 changes: 2 additions & 2 deletions utf8proc_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -16316,7 +16316,7 @@ static const utf8proc_property_t utf8proc_properties[] = {
{UTF8PROC_CATEGORY_CF, 0, UTF8PROC_BIDI_CLASS_BN, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, 1023, 0, false, false, false, true, true, 0, false, 0, UTF8PROC_BOUNDCLASS_EXTEND, UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND},
};

static const utf8proc_uint32_t utf8proc_combinations_second[] = {
static const utf8proc_int32_t utf8proc_combinations_second[] = {
824,
824,
824,
Expand Down Expand Up @@ -16710,7 +16710,7 @@ static const utf8proc_uint32_t utf8proc_combinations_second[] = {
93543,
};

static const utf8proc_uint32_t utf8proc_combinations_combined[] = {
static const utf8proc_int32_t utf8proc_combinations_combined[] = {
8814,
8800,
8815,
Expand Down
Loading