1
- # License is MIT: https://github.com/JuliaString/LaTeX_Entities /LICENSE.md
1
+ # License is MIT: https://github.com/JuliaString/HTML_Entities /LICENSE.md
2
2
#
3
3
# Mapping from HTML entities to the corresponding Unicode codepoint.
4
4
@@ -8,7 +8,9 @@ using StrTables
8
8
9
9
VER = UInt32 (1 )
10
10
11
- include (" htmlnames.jl" )
11
+ const inpname = " htmlnames.jl"
12
+
13
+ include (inpname)
12
14
13
15
const disp = [false ]
14
16
@@ -17,62 +19,50 @@ const datapath = joinpath(Pkg.dir(), "HTML_Entities", "data")
17
19
18
20
const empty_str = " "
19
21
20
- function sortsplit! {T} (index:: Vector{UInt16} , vec:: Vector{Tuple{T, UInt16}} , base)
21
- sort! (vec)
22
- len = length (vec)
23
- valvec = Vector {T} (len)
24
- indvec = Vector {UInt16} (len)
25
- for (i, val) in enumerate (vec)
26
- valvec[i], ind = val
27
- indvec[i] = ind
28
- index[ind] = UInt16 (base + i)
29
- end
30
- base += len
31
- valvec, indvec, base
32
- end
33
-
34
22
function make_tables ()
35
- symnam = Vector { String} ()
36
- symval = Vector {String} ()
23
+ symnam = String[]
24
+ symval = Vector{UInt32}[]
37
25
38
- for pair in htmlonechar
39
- push! (symnam, pair[ 1 ] )
40
- push! (symval, string ( Char (pair[ 2 ])) )
26
+ for (nam, val) in htmlonechar
27
+ push! (symnam, nam )
28
+ push! (symval, [val] )
41
29
end
42
- for pair in htmlnonbmp
43
- push! (symnam, pair[ 1 ] )
44
- push! (symval, string ( Char ( 0x10000 + pair[ 2 ])) )
30
+ for (nam, val) in htmlnonbmp
31
+ push! (symnam, nam )
32
+ push! (symval, [ 0x10000 + val] )
45
33
end
46
- for pair in htmltwochar
47
- push! (symnam, pair[1 ])
48
- p = pair[2 ]
49
- push! (symval, string (Char (p[1 ]), Char (p[2 ])))
34
+ for (nam, val) in htmltwochar
35
+ push! (symnam, nam)
36
+ push! (symval, UInt32[val... ])
50
37
end
51
38
52
39
# We want to build a table of all the names, sort them, then create a StrTable out of them
53
40
srtnam = sortperm (symnam)
54
41
srtval = symval[srtnam] # Values, sorted the same as srtnam
55
42
56
43
# BMP characters
57
- l16 = Vector { Tuple{UInt16, UInt16}} ()
44
+ l16 = Tuple{UInt16, UInt16}[]
58
45
# non-BMP characters (in range 0x10000 - 0x1ffff)
59
- l32 = Vector { Tuple{UInt16, UInt16}} ()
46
+ l32 = Tuple{UInt16, UInt16}[]
60
47
# two characters packed into UInt32, first character in high 16-bits
61
- l2c = Vector { Tuple{UInt32, UInt16}} ()
48
+ l2c = Tuple{UInt32, UInt16}[]
62
49
63
50
for i in eachindex (srtnam)
64
- chrs = convert (Vector{Char}, srtval[i])
65
- length (chrs) > 2 && error (" Too long sequence of characters $chrs " )
66
- if length (chrs) == 2
67
- (chrs[1 ] > ' \u ffff' || chrs[2 ] > ' \u ffff' ) &&
68
- error (" Character $(chrs[1 ]) or $(chrs[2 ]) > 0xffff" )
69
- push! (l2c, (chrs[1 ]% UInt32<< 16 | chrs[2 ]% UInt32, i))
70
- elseif chrs[1 ] > ' \U 1ffff'
71
- error (" Character $(chrs[1 ]) too large: $(UInt32 (chrs[1 ])) " )
72
- elseif chrs[1 ] > ' \u ffff'
73
- push! (l32, ((chrs[1 ]- 0x10000 )% UInt32, i))
51
+ chrs = srtval[i]
52
+ len = length (chrs)
53
+ len > 2 && error (" Too long sequence of characters $chrs " )
54
+ ch1 = chrs[1 ]
55
+ if len == 2
56
+ ch2 = chrs[end ]
57
+ (ch1 > 0x0ffff || ch2 > 0x0ffff ) &&
58
+ error (" Character $ch1 or $ch2 > 0xffff" )
59
+ push! (l2c, (ch1<< 16 | ch2, i))
60
+ elseif ch1 > 0x1ffff
61
+ error (" Character $ch1 too large" )
62
+ elseif ch1 > 0x0ffff
63
+ push! (l32, (ch1% UInt16, i))
74
64
else
75
- push! (l16, (chrs[ 1 ] % UInt16, i))
65
+ push! (l16, (ch1 % UInt16, i))
76
66
end
77
67
end
78
68
@@ -84,18 +74,24 @@ function make_tables()
84
74
# in each table to the index into the name table (so that we can find multiple names for
85
75
# each character)
86
76
87
- indvec = Vector {UInt16} ( length (srtnam))
77
+ indvec = create_vector (UInt16, length (srtnam))
88
78
vec16, ind16, base32 = sortsplit! (indvec, l16, 0 )
89
79
vec32, ind32, base2c = sortsplit! (indvec, l32, base32)
90
80
vec2c, ind2c, basefn = sortsplit! (indvec, l2c, base2c)
91
81
92
- (VER, string (now ()), " loaded from htmlnames.jl " ,
82
+ (VER, string (now ()), " loaded from $inpname " ,
93
83
base32% UInt32, base2c% UInt32, StrTable (symnam[srtnam]), indvec,
94
84
vec16, ind16, vec32, ind32, vec2c, ind2c)
95
85
end
96
86
97
87
println (" Creating tables" )
98
- tup = make_tables ()
88
+ tup = nothing
89
+ try
90
+ global tup
91
+ tup = make_tables ()
92
+ catch ex
93
+ println (sprint (showerror, ex, catch_backtrace ()))
94
+ end
99
95
savfile = joinpath (datapath, fname)
100
96
println (" Saving tables to " , savfile)
101
97
StrTables. save (savfile, tup)
0 commit comments