Skip to content

Commit 3431334

Browse files
authored
Merge pull request #11 from JuliaString/spj/updatetab
Update string table code
2 parents 89e6743 + e4dab78 commit 3431334

File tree

1 file changed

+23
-37
lines changed

1 file changed

+23
-37
lines changed

src/Unicode_Entities.jl

Lines changed: 23 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,45 +15,35 @@ module Unicode_Entities
1515

1616
using StrTables
1717

18-
struct PackedEntities{S,T} <: AbstractPackedTable{String}
19-
offsetvec::Vector{T}
18+
VER = UInt32(1)
19+
20+
struct PackedNames{T,S,O} <: AbstractPackedTable{T}
21+
offsetvec::Vector{O}
2022
namtab::Vector{S}
23+
wrd1::StrTable{T} # This has sorted words for 1-byte
24+
wrd2::StrTable{T} # This has sorted words for 2-byte
2125
end
22-
PackedEntities(tab::PackedTable) = PackedEntities(tab.offsetvec, tab.namtab)
2326

24-
VER = UInt32(1)
27+
Base.getindex(tab::PackedNames, ind::Integer) =
28+
_unpackword(tab.namtab[tab.offsetvec[ind] + 1 : tab.offsetvec[ind+1]], tab.wrd1, tab.wrd2)
2529

26-
struct Unicode_Table{S,T,V}
30+
struct Unicode_Table{S,T,V} <: AbstractEntityTable
2731
ver::UInt32
2832
tim::String
2933
inf::String
3034
base32::UInt32
31-
nam::PackedTable{S,V} # This has packed byte vectors
35+
nam::PackedNames{S,T}
3236
ind::Vector{UInt16}
33-
wrd1::StrTable{T} # This has sorted words for 1-byte
34-
wrd2::StrTable{T} # This has sorted words for 2-byte
3537
val16::Vector{UInt16}
3638
ind16::Vector{UInt16}
3739
val32::Vector{UInt16}
3840
ind32::Vector{UInt16}
3941
end
4042

41-
struct Unicode_Entity <: AbstractEntityTable
42-
tab::Unicode_Table
43-
nam::PackedEntities
44-
end
45-
46-
function Base.getindex(ent::Unicode_Entity, ind::Integer)
47-
str = ent.nam
48-
_unpackword(str.namtab[str.offsetvec[ind] + 1 : str.offsetvec[ind+1]],
49-
ent.tab.wrd1, ent.tab.wrd2)
50-
end
51-
5243
function __init__()
53-
tab = Unicode_Table(StrTables.load(joinpath(Pkg.dir("Unicode_Entities"),
54-
"data", "unicode.dat"))...)
55-
nam = PackedEntities(tab.nam)
56-
global default = Unicode_Entity(tab, nam)
44+
(ver, tim, inf, base32, nam, ind, wrd1, wrd2, val16, ind16, val32, ind32) =
45+
StrTables.load(joinpath(Pkg.dir("Unicode_Entities"), "data", "unicode.dat"))
46+
global default = Unicode_Table(ver, tim, inf, base32, PackedNames(nam, wrd1, wrd2), ind,
5747
end
5848

5949
"""
@@ -101,28 +91,24 @@ end
10191

10292
## Override methods
10393

104-
StrTables._get_table(ent::Unicode_Entity) = ent.tab
105-
StrTables._get_names(ent::Unicode_Entity) = ent
106-
107-
function StrTables._get_str(ent::Unicode_Entity, ind)
108-
tab = ent.tab
94+
function StrTables._get_str(tab::Unicode_Table, ind)
10995
string(Char(ind <= tab.base32 ? tab.val16[ind] : tab.val32[ind - tab.base32] + 0x10000))
11096
end
11197

112-
function StrTables.lookupname(ent::Unicode_Entity, str::AbstractString)
113-
rng = searchsorted(ent.nam, uppercase(str))
114-
isempty(rng) ? StrTables._empty_str : _get_str(ent.tab, ent.tab.ind[rng.start])
98+
function StrTables.lookupname(tab::Unicode_Table, str::AbstractString)
99+
rng = searchsorted(tab.nam, uppercase(str))
100+
isempty(rng) ? StrTables._empty_str : _get_str(tab, tab.ind[rng.start])
115101
end
116102

117-
StrTables.matches(ent::Unicode_Entity, vec::Vector{T}) where {T} =
118-
length(vec) == 1 ? matchchar(ent, vec[1]) : StrTables._empty_str_vec
103+
StrTables.matches(tab::Unicode_Table, vec::Vector{T}) where {T} =
104+
length(vec) == 1 ? matchchar(tab, vec[1]) : StrTables._empty_str_vec
119105

120-
StrTables.longestmatches(ent::Unicode_Entity, vec::Vector{T}) where {T} =
121-
isempty(vec) ? StrTables._empty_str_vec : matchchar(ent, uppercase(vec[1]))
106+
StrTables.longestmatches(tab::Unicode_Table, vec::Vector{T}) where {T} =
107+
isempty(vec) ? StrTables._empty_str_vec : matchchar(tab, uppercase(vec[1]))
122108

123-
function StrTables.completions(ent::Unicode_Entity, str)
109+
function StrTables.completions(tab::Unicode_Table, str)
124110
up = uppercase(str)
125-
[nam for nam in ent.nam if startswith(nam, up)]
111+
[nam for nam in tab.nam if startswith(nam, up)]
126112
end
127113

128114
end # module Unicode_Entities

0 commit comments

Comments
 (0)