Skip to content

Commit 93ae320

Browse files
committed
Correct handling exclusions
1 parent b18c5b5 commit 93ae320

File tree

4 files changed

+3736
-2842
lines changed

4 files changed

+3736
-2842
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ include (utils.cmake)
44

55
disallow_intree_builds()
66

7-
project (utf8proc VERSION 2.9.0 LANGUAGES C)
7+
project (utf8proc VERSION 2.10.0 LANGUAGES C)
88

99
# This is the ABI version number, which may differ from the
1010
# API version number (defined in utf8proc.h and above).

data/data_generator.jl

Lines changed: 3 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -259,10 +259,11 @@ end
259259
comb_mapping = Dict{UInt32, Dict{UInt32, UInt32}}()
260260
comb_issecond = Set{UInt32}()
261261
for char in char_props
262+
# What happens with decompositions that are longer than 2?
262263
if isnothing(char.decomp_type) && !isnothing(char.decomp_mapping) &&
263264
length(char.decomp_mapping) == 2 && !isnothing(char_hash[char.decomp_mapping[1]]) &&
264265
char_hash[char.decomp_mapping[1]].combining_class == 0 &&
265-
char.code exclusions
266+
(char.code exclusions && char.code excl_version)
266267
dm0 = char.decomp_mapping[1]
267268
dm1 = char.decomp_mapping[2]
268269
if !haskey(comb_mapping, dm0)
@@ -285,70 +286,6 @@ let
285286
end
286287
end
287288

288-
# comb1st_indices = Dict{UInt32,Int}()
289-
# comb1st_indices_sorted_keys = Origin(0)(UInt32[])
290-
# comb2nd_indices = Dict{UInt32,Int}()
291-
# comb2nd_indices_sorted_keys = Origin(0)(UInt32[])
292-
# comb2nd_indices_length(code::UInt32) = code < 0x8000 ? 1 : 2
293-
# comb_array = Origin(0)(Vector{Dict{Int,UInt32}}())
294-
# for (i,char) in enumerate(char_props)
295-
# if isnothing(char.decomp_type) && !isnothing(char.decomp_mapping) &&
296-
# length(char.decomp_mapping) == 2 && !isnothing(char_hash[char.decomp_mapping[1]]) &&
297-
# char_hash[char.decomp_mapping[1]].combining_class == 0 &&
298-
# char.code ∉ exclusions
299-
# dm0 = char.decomp_mapping[1]
300-
# dm1 = char.decomp_mapping[2]
301-
# if !haskey(comb1st_indices, dm0)
302-
# comb1st_indices[dm0] = length(comb1st_indices)
303-
# push!(comb1st_indices_sorted_keys, dm0)
304-
# push!(comb_array, Dict{Int,UInt32}())
305-
# @assert length(comb1st_indices) == length(comb_array)
306-
# end
307-
# if !haskey(comb2nd_indices, dm1)
308-
# push!(comb2nd_indices_sorted_keys, dm1)
309-
# comb2nd_indices[dm1] = length(comb2nd_indices)
310-
# end
311-
# @assert !haskey(comb_array[comb1st_indices[dm0]], comb2nd_indices[dm1])
312-
# comb_array[comb1st_indices[dm0]][comb2nd_indices[dm1]] = char.code
313-
# end
314-
# end
315-
#
316-
# comb_indices = Dict{UInt32,Int}()
317-
# comb1st_indices_lastoffsets = Origin(0)(zeros(Int, length(comb1st_indices)))
318-
# comb1st_indices_firstoffsets = Origin(0)(zeros(Int, length(comb1st_indices)))
319-
# let
320-
# cumoffset = 0
321-
# for dm0 in comb1st_indices_sorted_keys
322-
# index = comb1st_indices[dm0]
323-
# first = nothing
324-
# last = nothing
325-
# offset = 0
326-
# for b in eachindex(comb2nd_indices_sorted_keys)
327-
# dm1 = comb2nd_indices_sorted_keys[b]
328-
# if haskey(comb_array[index], b)
329-
# if isnothing(first)
330-
# first = offset
331-
# end
332-
# last = offset + comb2nd_indices_length(dm1) - 1
333-
# end
334-
# offset += comb2nd_indices_length(dm1)
335-
# end
336-
# comb1st_indices_firstoffsets[index] = first
337-
# comb1st_indices_lastoffsets[index] = last
338-
# @assert !haskey(comb_indices, dm0)
339-
# comb_indices[dm0] = 0x4000 | cumoffset
340-
# cumoffset += last - first + 1 + 2
341-
# end
342-
#
343-
# offset = 0
344-
# for dm1 in comb2nd_indices_sorted_keys
345-
# @assert !haskey(comb_indices, dm1)
346-
# comb_indices[dm1] = 0x8000 | (comb2nd_indices[dm1] + offset)
347-
# @assert comb2nd_indices[dm1] + offset < 0x4000
348-
# offset += comb2nd_indices_length(dm1) - 1
349-
# end
350-
# end
351-
352289
utf16_encode(utf32_seq) = transcode(UInt16, transcode(String, utf32_seq))
353290

354291
# Utility for packing all UTF-16 encoded sequences into one big array
@@ -536,7 +473,7 @@ function print_c_data_tables(io, sequences, prop_page_indices, prop_pages, dedup
536473
for dm0 in sort!(collect(keys(comb_mapping)))
537474
for dm1 in sort!(collect(keys(comb_mapping[dm0])))
538475
code = comb_mapping[dm0][dm1]
539-
print(io, " { ", dm1, ", ", code, " },\n")
476+
print(io, " {", dm1, ", ", code, "},\n")
540477
end
541478
end
542479
print(io, "};\n\n")

0 commit comments

Comments
 (0)