Skip to content

Commit 9871753

Browse files
authored
Merge pull request #7 from JuliaString/spj/makegeneric
Make API more generic
2 parents 5770aa9 + a912212 commit 9871753

File tree

2 files changed

+54
-55
lines changed

2 files changed

+54
-55
lines changed

src/Unicode_Entities.jl

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ struct PackedEntities{S,T} <: AbstractPackedTable{String}
2121
end
2222
PackedEntities(tab::PackedTable) = PackedEntities(tab.offsetvec, tab.namtab)
2323
Base.getindex(str::PackedEntities, ind::Integer) =
24-
_unpackword(str.namtab[str.offsetvec[ind]+1:str.offsetvec[ind+1]])
24+
_unpackword(_tab, str.namtab[str.offsetvec[ind]+1:str.offsetvec[ind+1]])
2525

2626
VER = UInt32(1)
2727

28-
struct Unicode_Table{S,T,V} <: AbstractEntityTable
28+
struct Unicode_Table{S,T,V}
2929
ver::UInt32
3030
tim::String
3131
inf::String
@@ -40,14 +40,17 @@ struct Unicode_Table{S,T,V} <: AbstractEntityTable
4040
ind32::Vector{UInt16}
4141
end
4242

43-
function __init__()
44-
global _tab =
45-
Unicode_Table(StrTables.load(joinpath(Pkg.dir("Unicode_Entities"), "data", "unicode.dat"))...)
46-
global _names = PackedEntities(_tab.nam)
43+
struct Unicode_Entity <: AbstractEntityTable
44+
tab::Unicode_Table
45+
nam::PackedEntities
4746
end
4847

49-
const _empty_str = ""
50-
const _empty_str_vec = Vector{String}()
48+
function __init__()
49+
tab = Unicode_Table(StrTables.load(joinpath(Pkg.dir("Unicode_Entities"),
50+
"data", "unicode.dat"))...)
51+
nam = PackedEntities(tab.nam)
52+
global default = Unicode_Entity(tab, nam)
53+
end
5154

5255
"""
5356
Internal function to unpack the packed Unicode entity names
@@ -62,7 +65,6 @@ Unicode has a very limited character set for the entity names, 0-9, A-Z, -, (, a
6265
38-53 represents up to 16*256 words stored in the wrd2 table
6366
54-255 represent 202 words stored in the wrd1 table
6467
"""
65-
_unpackword(v::Vector{UInt8}) = _unpackword(v, _tab.wrd1, _tab.wrd2)
6668
function _unpackword(v::Vector{UInt8}, w1, w2)
6769
io = IOBuffer()
6870
pos = 0
@@ -90,39 +92,32 @@ function _unpackword(v::Vector{UInt8}, w1, w2)
9092
prevw = true
9193
end
9294
end
93-
@static VERSION < v"0.6-" ? takebuf_string(io) : String(take!(io))
95+
String(take!(io))
9496
end
97+
_unpackword(tab, v::Vector{UInt8}) = _unpackword(v, tab.wrd1, tab.wrd2)
9598

96-
_get_str(ind) =
97-
string(Char(ind <= _tab.base32 ? _tab.val16[ind] : _tab.val32[ind - _tab.base32] + 0x10000))
98-
99-
function _get_strings(val::T, tab::Vector{T}, ind::Vector{UInt16}) where {T}
100-
rng = searchsorted(tab, val)
101-
isempty(rng) && return _empty_str_vec
102-
_names[ind[rng]]
103-
end
99+
## Override methods
104100

105-
function lookupname(str::AbstractString)
106-
rng = searchsorted(_names, uppercase(str))
107-
isempty(rng) ? _empty_str : _get_str(_tab.ind[rng.start])
101+
StrTables._get_table(ent::Unicode_Entity) = ent.tab
102+
StrTables._get_names(ent::Unicode_Entity) = ent.nam
103+
104+
function StrTables._get_str(ent::Unicode_Entity, ind)
105+
tab = ent.tab
106+
string(Char(ind <= tab.base32 ? tab.val16[ind] : tab.val32[ind - tab.base32] + 0x10000))
108107
end
109108

110-
matchchar(ch::UInt32) =
111-
(ch <= 0x0ffff
112-
? _get_strings(ch%UInt16, _tab.val16, _tab.ind16)
113-
: (ch <= 0x1ffff ? _get_strings(ch%UInt16, _tab.val32, _tab.ind32) : _empty_str_vec))
114-
matchchar(ch::Char) = matchchar(UInt32(ch))
109+
StrTables.lookupname(tab::Unicode_Entity, str::AbstractString) =
110+
rng = searchsorted(tab.nam, uppercase(str))
111+
isempty(rng) ? StrTables._empty_str : _get_str(tab.tab, tab.tab.ind[rng.start])
112+
end
115113

116-
matches(str::AbstractString) = matches(convert(Vector{Char}, str))
117-
matches(vec::Vector{Char}) = length(vec) == 1 ? matchchar(vec[1]) : _empty_str_vec
114+
StrTables.matches(ent::Unicode_Entity, vec::Vector{T}) where {T} =
115+
length(vec) == 1 ? matchchar(ent, vec[1]) : StrTables._empty_str_vec
118116

119-
longestmatches(str::AbstractString) = longestmatches(convert(Vector{Char},str))
120-
longestmatches(vec::Vector{Char}) = isempty(vec) ? _empty_str_vec : matchchar(uppercase(vec[1]))
117+
StrTables.longestmatches(ent::Unicode_Entity, vec::Vector{T}) where {T} =
118+
isempty(vec) ? StrTables._empty_str_vec : matchchar(ent, uppercase(vec[1]))
121119

122-
completions(str::AbstractString) = completions(String(str))
123-
function completions(str::String)
124-
str = uppercase(str)
125-
[nam for nam in _names if startswith(nam, str)]
126-
end
120+
StrTables.completions(ent::Unicode_Entity, str) =
121+
[nam for nam in ent.nam if startswith(nam, str)]
127122

128123
end # module Unicode_Entities

test/runtests.jl

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ using Unicode_Entities
77

88
UE = Unicode_Entities
99

10+
ue_matchchar(ch) = UE.matchchar(UE.default, ch)
11+
ue_lookupname(nam) = UE.lookupname(UE.default, nam)
12+
ue_longestmatches(str) = UE.longestmatches(UE.default, str)
13+
ue_matches(str) = UE.matches(UE.default, str)
14+
ue_completions(str) = UE.completions(UE.default, str)
15+
1016
const datapath = joinpath(Pkg.dir(), "Unicode_Entities", "data")
1117
const dpath = "ftp://ftp.unicode.org/Public/UNIDATA/"
1218
const fname = "UnicodeData.txt"
@@ -51,67 +57,65 @@ function load_unicode_data()
5157
end
5258

5359
load_unicode_data()
54-
uln = UE.lookupname
55-
umc = UE.matchchar
5660

5761
@testset "Unicode_Entities" begin
5862

5963
@testset "matches data file" begin
6064
for (i, ch) in enumerate(symval)
61-
list = umc(ch)
65+
list = ue_matchchar(ch)
6266
if !isempty(list)
6367
@test symnam[i] in list
6468
end
6569
end
6670
for (i, nam) in enumerate(symnam)
67-
str = uln(nam)
71+
str = ue_lookupname(nam)
6872
if str != ""
6973
@test symval[i] == str[1]
7074
end
7175
end
7276
end
7377

7478
@testset "lookupname" begin
75-
@test UE.lookupname("foobar") == ""
76-
@test UE.lookupname(SubString("My name is Spock", 12)) == ""
77-
@test UE.lookupname("end of text") == "\x03" # \3
78-
@test UE.lookupname("TIBETAN LETTER -A") == "\u0f60"
79-
@test UE.lookupname("LESS-THAN OR SLANTED EQUAL TO") == "\u2a7d"
80-
@test UE.lookupname("REVERSED HAND WITH MIDDLE FINGER EXTENDED") == "\U1f595"
79+
@test ue_lookupname("foobar") == ""
80+
@test ue_lookupname(SubString("My name is Spock", 12)) == ""
81+
@test ue_lookupname("end of text") == "\x03" # \3
82+
@test ue_lookupname("TIBETAN LETTER -A") == "\u0f60"
83+
@test ue_lookupname("LESS-THAN OR SLANTED EQUAL TO") == "\u2a7d"
84+
@test ue_lookupname("REVERSED HAND WITH MIDDLE FINGER EXTENDED") == "\U1f595"
8185
end
8286

8387
@testset "matches" begin
84-
@test isempty(UE.matches(""))
85-
@test isempty(UE.matches("\uf900"))
86-
@test isempty(UE.matches(SubString("This is \uf900", 9)))
88+
@test isempty(ue_matches(""))
89+
@test isempty(ue_matches("\uf900"))
90+
@test isempty(ue_matches(SubString("This is \uf900", 9)))
8791
for (chrs, exp) in (("\U1f596", ["RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS"]),
8892
("\u0f4a", ["TIBETAN LETTER REVERSED TA"]),
8993
(".", ["FULL STOP", "PERIOD"]))
90-
res = UE.matches(chrs)
94+
res = ue_matches(chrs)
9195
@test length(res) >= length(exp)
9296
@test intersect(res, exp) == exp
9397
end
9498
end
9599

96100
@testset "longestmatches" begin
97-
@test isempty(UE.longestmatches("\uf900 abcd"))
98-
@test isempty(UE.longestmatches(SubString("This is \uf900 abcd", 9)))
101+
@test isempty(ue_longestmatches("\uf900 abcd"))
102+
@test isempty(ue_longestmatches(SubString("This is \uf900 abcd", 9)))
99103
for (chrs, exp) in (("\U1f596 abcd", ["RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS"]),
100104
(".abcd", ["FULL STOP", "PERIOD"]),
101105
("\u0f4a#123", ["TIBETAN LETTER REVERSED TA", "TIBETAN LETTER TTA"]))
102-
res = UE.longestmatches(chrs)
106+
res = ue_longestmatches(chrs)
103107
@test length(res) >= length(exp)
104108
@test intersect(res, exp) == exp
105109
end
106110
end
107111

108112
@testset "completions" begin
109-
@test isempty(UE.completions("ScottPaulJones"))
110-
@test isempty(UE.completions(SubString("My name is Scott", 12)))
113+
@test isempty(ue_completions("ScottPaulJones"))
114+
@test isempty(ue_completions(SubString("My name is Scott", 12)))
111115
for (chrs, exp) in (("ZERO", ["ZERO WIDTH JOINER", "ZERO WIDTH NO-BREAK SPACE",
112116
"ZERO WIDTH NON-JOINER", "ZERO WIDTH SPACE"]),
113117
("BACK OF", ["BACK OF ENVELOPE"]))
114-
res = UE.completions(chrs)
118+
res = ue_completions(chrs)
115119
@test length(res) >= length(exp)
116120
@test intersect(res, exp) == exp
117121
end

0 commit comments

Comments
 (0)