Skip to content

Commit 958a224

Browse files
committed
Work around sysimage compilation bug with unicode normalization
Using the code from the Unicode stdlib directly, it seems there's something funky going on with @ cfunction, generic dispatch and sysimage generation. JuliaLang/julia#45716 Here I hard code the charmap to avoid this!
1 parent 3ab8589 commit 958a224

File tree

1 file changed

+18
-20
lines changed

1 file changed

+18
-20
lines changed

src/value_parsing.jl

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -210,41 +210,39 @@ end
210210
# stdlib under the name `Unicode.julia_chartransform`. See
211211
# https://github.com/JuliaLang/julia/pull/42561
212212
#
213-
# To allow use on older Julia versions, we reproduce that logic here.
213+
# To allow use on older Julia versions and to workaround the bug
214+
# https://github.com/JuliaLang/julia/issues/45716
215+
# we reproduce a specialized version of that logic here.
214216

215217
# static wrapper around user callback function
216-
utf8proc_custom_func(codepoint::UInt32, callback::Any) =
217-
UInt32(callback(codepoint))::UInt32
218+
function utf8proc_custom_func(codepoint::UInt32, ::Ptr{Cvoid})::UInt32
219+
(codepoint == 0x025B ? 0x03B5 :
220+
codepoint == 0x00B5 ? 0x03BC :
221+
codepoint == 0x00B7 ? 0x22C5 :
222+
codepoint == 0x0387 ? 0x22C5 :
223+
codepoint == 0x2212 ? 0x002D :
224+
codepoint)
225+
end
218226

219-
function utf8proc_decompose(str, options, buffer, nwords, chartransform::T) where T
220-
ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{T}),
227+
function utf8proc_decompose(str, options, buffer, nwords)
228+
ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ptr{Cvoid}),
221229
str, sizeof(str), buffer, nwords, options,
222-
@cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{T})), chartransform)
230+
@cfunction(utf8proc_custom_func, UInt32, (UInt32, Ptr{Cvoid})), C_NULL)
223231
ret < 0 && utf8proc_error(ret)
224232
return ret
225233
end
226234

227-
function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform=identity)
228-
nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform)
235+
function utf8proc_map(str::Union{String,SubString{String}}, options::Integer)
236+
nwords = utf8proc_decompose(str, options, C_NULL, 0)
229237
buffer = Base.StringVector(nwords*4)
230-
nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform)
238+
nwords = utf8proc_decompose(str, options, buffer, nwords)
231239
nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options)
232240
nbytes < 0 && utf8proc_error(nbytes)
233241
return String(resize!(buffer, nbytes))
234242
end
235243

236-
const _julia_charmap = Dict{UInt32,UInt32}(
237-
0x025B => 0x03B5,
238-
0x00B5 => 0x03BC,
239-
0x00B7 => 0x22C5,
240-
0x0387 => 0x22C5,
241-
0x2212 => 0x002D,
242-
)
243-
244-
julia_chartransform(codepoint::UInt32) = get(_julia_charmap, codepoint, codepoint)
245-
246244
function normalize_identifier(str)
247245
flags = Base.Unicode.UTF8PROC_STABLE | Base.Unicode.UTF8PROC_COMPOSE
248-
utf8proc_map(str, flags, julia_chartransform)
246+
utf8proc_map(str, flags)
249247
end
250248

0 commit comments

Comments
 (0)