Skip to content

Commit fc5b3f6

Browse files
committed
DTD parsing done-ish
1 parent e917cf1 commit fc5b3f6

File tree

1 file changed

+61
-14
lines changed

1 file changed

+61
-14
lines changed

src/dtd.jl

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
#-----------------------------------------------------------------------------# position_after
2+
function position_after(needle::Vector{UInt8}, haystack::Vector{UInt8}, i)
3+
x = findnext(needle, haystack, i)
4+
isnothing(x) ? nothing : x[end] + 1
5+
end
6+
7+
position_after(needle::String, haystack::Vector{UInt8}, i) = position_after(Vector{UInt8}(needle), haystack, i)
8+
9+
10+
11+
112
#-----------------------------------------------------------------------------# DeclaredElement
213
struct DeclaredElement
314
name::String
@@ -11,7 +22,7 @@ end
1122
Base.show(io::IO, o::DeclaredElement) = print(io, "<!ELEMENT ", o.name, " ", o.content, ">")
1223

1324
function get_declared_elements(data::Vector{UInt8})
14-
i = findnext(Vector{UInt8}("<!ELEMENT"), data, 1)[end]
25+
i = position_after("<!ELEMENT", data, 1)
1526
out = DeclaredElement[]
1627
while !isnothing(i)
1728
name, i = get_name(data, i + 1)
@@ -23,8 +34,7 @@ function get_declared_elements(data::Vector{UInt8})
2334
content, i = get_name(data, i)
2435
end
2536
push!(out, DeclaredElement(name, content))
26-
fn = findnext(Vector{UInt8}("<!ELEMENT"), data, i)
27-
i = isnothing(fn) ? nothing : fn[end]
37+
i = position_after("<!ELEMENT", data, i)
2838
end
2939
return out
3040
end
@@ -38,19 +48,56 @@ struct DeclaredAttribute
3848
end
3949
Base.show(io::IO, o::DeclaredAttribute) = print(io, "<!ATTLIST ", o.element_name, " ", o.attribute_name, " ", o.attribute_type, " ", o.attribute_value, ">")
4050

51+
4152
function get_declared_attributes(data)
42-
[]
53+
i = position_after("<!ATTLIST", data, 1)
54+
out = DeclaredAttribute[]
55+
while !isnothing(i)
56+
element_name, i = get_name(data, i)
57+
attribute_name, i = get_name(data, i)
58+
i = findnext(!isspace, data, i)
59+
attribute_type = if data[i] == UInt('(')
60+
j = findnext(==(UInt8(')')), data, i)
61+
String(data[i:j])
62+
i = j + 1
63+
else
64+
nm, i = get_name(data, i)
65+
nm
66+
end
67+
i = findnext(!isspace, data, i)
68+
is_hash = data[i] == UInt8('#')
69+
val, i = get_name(data, i)
70+
attribute_value = is_hash ? '#' * val : val
71+
push!(out, DeclaredAttribute(element_name, attribute_name, attribute_type, attribute_value))
72+
i = position_after("<!ATTLIST", data, i)
73+
end
74+
return out
4375
end
4476

4577
#-----------------------------------------------------------------------------# DeclaredEntity
4678
struct DeclaredEntity
4779
name::String
80+
external::Bool
4881
value::String
4982
end
50-
Base.show(io::IO, o::DeclaredEntity) = print(io, "<!ENTITY ", o.name, " ", o.value, ">")
83+
function Base.show(io::IO, o::DeclaredEntity)
84+
print(io, "<!ENTITY ", o.name, " ", o.external ? "SYSTEM" : "", repr(o.value), ">")
85+
end
5186

5287
function get_declared_entities(data)
53-
[]
88+
i = position_after("<!ENTITY", data, 1)
89+
out = DeclaredEntity[]
90+
while !isnothing(i)
91+
name, i = get_name(data, i)
92+
value, i = get_name(data, i)
93+
external = value == "SYSTEM"
94+
if external
95+
value, i = get_name(data, i)
96+
end
97+
push!(out, DeclaredEntity(name, external, value))
98+
i = position_after("<!ENTITY", data, i)
99+
end
100+
return out
54101
end
55102

56103
#-----------------------------------------------------------------------------# DTDBody
@@ -62,19 +109,19 @@ struct DTDBody
62109
end
63110

64111
function Base.show(io::IO, o::DTDBody)
65-
println(io, "DTDBody(root=\"", o.root)
66-
println(io, " • DeclaredElements")
67-
foreach(x -> println(io, " ", x), o.elements)
68-
println(io, " • DeclaredAttributes")
69-
println(io, " • DeclaredEntities")
112+
printstyled(io, "DTDBody(root=\"", o.root, "\")\n", color=:light_cyan)
113+
printstyled(io, " DeclaredElements (", length(o.elements), ")\n", color=:light_green)
114+
foreach(x -> println(io, " ", x), o.elements)
115+
printstyled(io, " DeclaredAttributes (", length(o.attributes), ")\n", color=:light_green)
116+
foreach(x -> println(io, " ", x), o.attributes)
117+
printstyled(io, " DeclaredEntities (", length(o.entities), ")\n", color=:light_green)
118+
foreach(x -> println(io, " ", x), o.entities)
70119
end
71120

72121

73122

74123
function DTDBody(data::Vector{UInt8})
75-
start = "<!DOCTYPE"
76-
data[1:length(start)] == Vector{UInt8}(start) || error("DTD must start with `<!DOCTYPE`.")
77-
i = length(start) + 1
124+
i = position_after("<!DOCTYPE", data, 1)
78125
root, i = get_name(data, i)
79126

80127
i = findnext(==(UInt8('[')), data, i)

0 commit comments

Comments
 (0)