Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions src/XML.jl
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ end
function Node(o::Node, x...; kw...)
attrs = !isnothing(kw) ?
merge(
OrderedDict(string(k) => string(v) for (k,v) in pairs(kw)),
isnothing(o.attributes) ? OrderedDict{String, String}() : o.attributes
OrderedDict(string(k) => string(v) for (k, v) in pairs(kw)),
isnothing(o.attributes) ? OrderedDict{String,String}() : o.attributes
) :
o.attributes
children = isempty(x) ? o.children : vcat(isnothing(o.children) ? [] : o.children, collect(x))
Expand Down Expand Up @@ -357,56 +357,69 @@ write(x; kw...) = (io = IOBuffer(); write(io, x; kw...); String(take!(io)))

write(filename::AbstractString, x; kw...) = open(io -> write(io, x; kw...), filename, "w")

function write(io::IO, x; indentsize::Int=2, depth::Int=1)
function write(io::IO, x, ctx::Vector{Bool}=[false]; indentsize::Int=2, depth::Int=1)
indent = ' ' ^ indentsize
nodetype = XML.nodetype(x)
tag = XML.tag(x)
value = XML.value(x)
children = XML.children(x)

padding = indent ^ max(0, depth - 1)
print(io, padding)
!ctx[end] && print(io, padding)

if nodetype === Text
print(io, value)

elseif nodetype === Element
push!(ctx, ctx[end])
update_ctx!(ctx, x)
print(io, '<', tag)
_print_attrs(io, x)
print(io, isempty(children) ? '/' : "", '>')
if !isempty(children)
if length(children) == 1 && XML.nodetype(only(children)) === Text
write(io, only(children); indentsize=0)
write(io, only(children), ctx; indentsize=0)
print(io, "</", tag, '>')
else
println(io)
!ctx[end] && println(io)
foreach(children) do child
write(io, child; indentsize, depth = depth + 1)
println(io)
write(io, child, ctx; indentsize, depth=depth + 1)
!ctx[end] && println(io)
end
print(io, padding, "</", tag, '>')
print(io, !ctx[end] ? padding : "", "</", tag, '>')
end
end
pop!(ctx)

elseif nodetype === DTD
print(io, "<!DOCTYPE ", value, '>')

elseif nodetype === Declaration
print(io, "<?xml")
_print_attrs(io, x)
print(io, "?>")

elseif nodetype === ProcessingInstruction
print(io, "<?", tag)
_print_attrs(io, x)
print(io, "?>")

elseif nodetype === Comment
print(io, "<!--", value, "-->")

elseif nodetype === CData
print(io, "<![CData[", value, "]]>")

elseif nodetype === Document
foreach(children) do child
write(io, child; indentsize)
println(io)
write(io, child, ctx; indentsize)
!ctx[end] && println(io)
end

else
error("Unreachable case reached during XML.write")
end
end

end

end # module XML
24 changes: 14 additions & 10 deletions src/raw.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ function get_attributes(data, i, j)
out = OrderedDict{String,String}()
while !isnothing(i) && i < j
key, i = get_name(data, i)
#haskey(out, key) && error("Duplicate attribute name found: $key") # would this be useful?
# get quotechar the value is wrapped in (either ' or ")
i = findnext(x -> x === UInt8('"') || x === UInt8('''), data, i + 1)
quotechar = data[i]
Expand Down Expand Up @@ -329,7 +328,11 @@ function parent(o::Raw)
end

#-----------------------------------------------------------------------------# next Raw
isspace(x::UInt8) = Base.isspace(Char(x))
# isspace(x::UInt8) = Base.isspace(Char(x))

# XML whitespace per XML 1.0/1.1 production S:
# S ::= (#x20 | #x9 | #xD | #xA)+
@inline xml_isspace(b::UInt8)::Bool = (b == 0x20) | (b == 0x09) | (b == 0x0A) | (b == 0x0D)

"""
next(node) --> typeof(node) or Nothing
Expand All @@ -353,7 +356,7 @@ function next_xml_space(o::Raw)
has_xml_space = o.has_xml_space
ctx = copy(o.ctx)
last_type = type
k = findnext(!isspace, data, i)
k = findnext(!xml_isspace, data, i)
if isnothing(k)
return nothing
end
Expand All @@ -369,11 +372,11 @@ function next_xml_space(o::Raw)
if c !== '<' || ctx[end] && c === '<' && b === ' ' && last_type === RawElementOpen && d === '/'
type = RawText
j = findnext(==(UInt8('<')), data, i) - 1
j = ctx[end] ? j : findprev(!isspace, data, j) # preserving whitespace if needed
j = ctx[end] ? j : findprev(!xml_isspace, data, j) # preserving whitespace if needed
if last_type === RawElementClose || last_type === RawElementSelfClosed|| last_type === RawDocument
# Maybe drop pure-whitespace inter-element text nodes?
# (e.g. whitespace between a closing and an opening tag which would otherwise make an orphan text node)
#if all(isspace, @view data[i:j]) && depth > 1
#if all(xml_isspace, @view data[i:j]) && depth > 1
# return next(Raw(type, depth, j, 0, data, ctx, has_xml_space))
#end
end
Expand Down Expand Up @@ -421,15 +424,15 @@ function next_xml_space(o::Raw)
end
return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
end
#

function next_no_xml_space(o::Raw) # same as v0.3.5
i = o.pos + o.len + 1
depth = o.depth
data = o.data
type = o.type
has_xml_space = o.has_xml_space
ctx = [false]
i = findnext(!isspace, data, i)
i = findnext(!xml_isspace, data, i)
if isnothing(i)
return nothing
end
Expand All @@ -441,7 +444,7 @@ function next_no_xml_space(o::Raw) # same as v0.3.5
if c !== '<'
type = RawText
j = findnext(==(UInt8('<')), data, i) - 1
j = findprev(!isspace, data, j) # "rstrip"
j = findprev(!xml_isspace, data, j) # "rstrip"
elseif c === '<'
c2 = Char(o.data[i+1])
if c2 === '!'
Expand Down Expand Up @@ -514,7 +517,7 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5
ctx = has_xml_space ? copy(o.ctx) : [false]
type === RawDocument && return nothing
j = o.pos - 1
j = findprev(!isspace, data, j)
j = findprev(!xml_isspace, data, j)
if isnothing(j)
return Raw(data, has_xml_space, ctx) # RawDocument
end
Expand All @@ -523,7 +526,7 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5
if c !== '>' # text
type = RawText
i = findprev(==(UInt8('>')), data, j) + 1
i = findnext(!isspace, data, i) # "lstrip"
i = findnext(!xml_isspace, data, i) # "lstrip"
elseif c === '>'
c2 = Char(o.data[j-1])
if c2 === '-'
Expand Down Expand Up @@ -562,3 +565,4 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5
end
return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
end

95 changes: 49 additions & 46 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,13 @@ end
@test text_content == "hello"
n=XML.next(n)
text_content = XML.write(n)
@test text_content == "<text3 xml:space=\"preserve\">\n hello \n <text3b> preserve </text3b>\n</text3>"
@test text_content == "<text3 xml:space=\"preserve\"> hello <text3b> preserve </text3b></text3>"
n=XML.prev(n)
text_content = XML.write(n)
@test text_content == "hello"
n=XML.next(n)
text_content = XML.write(n)
@test text_content == "<text3 xml:space=\"preserve\">\n hello \n <text3b> preserve </text3b>\n</text3>"
@test text_content == "<text3 xml:space=\"preserve\"> hello <text3b> preserve </text3b></text3>"
n=XML.next(n)
text_content = XML.write(n)
@test text_content == " hello "
Expand Down Expand Up @@ -183,13 +183,13 @@ end
@test text_content == " hello "
n=XML.prev(n)
text_content = XML.write(n)
@test text_content == "<text3 xml:space=\"preserve\">\n hello \n <text3b> preserve </text3b>\n</text3>"
@test text_content == "<text3 xml:space=\"preserve\"> hello <text3b> preserve </text3b></text3>"
n=XML.next(n)
text_content = XML.write(n)
@test text_content == " hello "
n=XML.prev(n)
text_content = XML.write(n)
@test text_content == "<text3 xml:space=\"preserve\">\n hello \n <text3b> preserve </text3b>\n</text3>"
@test text_content == "<text3 xml:space=\"preserve\"> hello <text3b> preserve </text3b></text3>"
n=XML.prev(n)
text_content = XML.write(n)
@test text_content == "hello"
Expand All @@ -201,7 +201,7 @@ end
@test text_content == "<text/>"
n=XML.prev(n)
text_content = XML.write(n)
@test text_content == "<root>\n <text/>\n <text2>hello</text2>\n <text3 xml:space=\"preserve\">\n hello \n <text3b> preserve </text3b>\n </text3>\n <text4 xml:space=\"preserve\"/>\n <text5/>\n</root>"
@test text_content == "<root>\n <text/>\n <text2>hello</text2>\n <text3 xml:space=\"preserve\"> hello <text3b> preserve </text3b></text3>\n <text4 xml:space=\"preserve\"/>\n <text5/>\n</root>"
end

@testset "depth and parent" begin
Expand Down Expand Up @@ -428,19 +428,18 @@ end
@test XML.value(d2[1][6][1]) == " after default gap "
@test XML.value(d2[1][7]) == "\n"
end

# @testset "XML whitespace vs Unicode whitespace" begin
# nbsp = "\u00A0"
# s = """<root>
# <a> x\t\n </a>
# <b>$(nbsp) y $(nbsp)</b>
# <c xml:space="default">$(nbsp) z $(nbsp)</c>
# </root>"""
# d = XML.parse(XML.Node, s)
# @test XML.value(d[1][1][1]) == "x"
# @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)"
# @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)"
# end
@testset "XML whitespace vs Unicode whitespace" begin
nbsp = "\u00A0"
s = """<root>
<a> x\t\n </a>
<b>$(nbsp) y $(nbsp)</b>
<c xml:space="default">$(nbsp) z $(nbsp)</c>
</root>"""
d = XML.parse(XML.Node, s)
@test XML.value(d[1][1][1]) == "x"
@test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)"
@test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)"
end

@testset "CDATA/Comment/PI boundaries" begin
s = """<root>
Expand Down Expand Up @@ -485,17 +484,21 @@ end
@test XML.value(d[1][1]) == "a"
end

# @testset "entities expanding to whitespace" begin
# s = """<root>
# <a> &#x20; a &#x0A; </a>
# <b xml:space="preserve">&#x20; b &#x0A;</b>
# <c>&#xA0;c&#xA0;</c>
# </root>"""
# d = XML.parse(XML.Node, s)
# @test XML.value(d[1][1][1]) == "a"
# @test XML.value(d[1][2][1]) == " b \n"
# @test XML.value(d[1][3][1]) == "\u00A0c\u00A0"
# end
@testset "entities expanding to whitespace" begin
chr1="\u0020"
chr2="\u000A"
chr3="\u00A0"

s = """<root>
<a> $(chr1) a $(chr2) </a>
<b xml:space="preserve">$(chr1) b $(chr2)</b>
<c>$(chr3)c$(chr3)</c>
</root>"""
d = XML.parse(XML.Node, s)
@test XML.value(d[1][1][1]) == "a"
@test XML.value(d[1][2][1]) == " b \n"
@test XML.value(d[1][3][1]) == "$(chr3)c$(chr3)"
end

@testset "invalid values and placement" begin
s_bad = """<root><x xml:space="weird"> t </x></root>"""
Expand Down Expand Up @@ -534,23 +537,22 @@ end
@test reverse(back)[2:end] == toks[1:end-1]
end

# @testset "write/read roundtrip extremes" begin
# XML.write doesn't respect xml:space="preserve" in the current implementation so roundtrip isn't possible.
# xml = """<root>
# <p xml:space="preserve"> </p>
# <q> </q>
# <r xml:space="default"> r </r>
# <s xml:space="preserve"> pre <t/> post </s>
# </root>"""
# n = XML.parse(XML.Node, xml)
# io = IOBuffer(); XML.write(io, n)
# n2 = XML.parse(XML.Node, String(take!(io)))
# @test n == n2
# @test XML.write(n2[1][1]) == "<p xml:space=\"preserve\"> </p>"
# @test XML.write(n2[1][2]) == "<q/>"
# @test XML.value(n2[1][3][1]) == "r"
# @test XML.write(n2[1][4]) == "<s xml:space=\"preserve\"> pre <t/> post </s>"
# end
@testset "write/read roundtrip extremes" begin
xml = """<root>
<p xml:space="preserve"> </p>
<q> </q>
<r xml:space="default"> r </r>
<s xml:space="preserve"> pre <t/> post </s>
</root>"""
n = XML.parse(XML.Node, xml)
io = IOBuffer(); XML.write(io, n)
n2 = XML.parse(XML.Node, String(take!(io)))
@test n == n2
@test XML.write(n2[1][1]) == "<p xml:space=\"preserve\"> </p>"
@test XML.write(n2[1][2]) == "<q/>"
@test XML.value(n2[1][3][1]) == "r"
@test XML.write(n2[1][4]) == "<s xml:space=\"preserve\"> pre <t/> post </s>"
end

@testset "self-closing/empty/whitespace-only children" begin
s = """<root>
Expand Down Expand Up @@ -641,3 +643,4 @@ end
xyz = XML.Element("point"; kw...)
@test collect(keys(attributes(xyz))) == string.(collect('a':'z'))
end

Loading