diff --git a/src/XML.jl b/src/XML.jl
index 9027ade..273bfda 100644
--- a/src/XML.jl
+++ b/src/XML.jl
@@ -149,8 +149,8 @@ end
function Node(o::Node, x...; kw...)
attrs = !isnothing(kw) ?
merge(
- OrderedDict(string(k) => string(v) for (k,v) in pairs(kw)),
- isnothing(o.attributes) ? OrderedDict{String, String}() : o.attributes
+ OrderedDict(string(k) => string(v) for (k, v) in pairs(kw)),
+ isnothing(o.attributes) ? OrderedDict{String,String}() : o.attributes
) :
o.attributes
children = isempty(x) ? o.children : vcat(isnothing(o.children) ? [] : o.children, collect(x))
@@ -357,7 +357,7 @@ write(x; kw...) = (io = IOBuffer(); write(io, x; kw...); String(take!(io)))
write(filename::AbstractString, x; kw...) = open(io -> write(io, x; kw...), filename, "w")
-function write(io::IO, x; indentsize::Int=2, depth::Int=1)
+function write(io::IO, x, ctx::Vector{Bool}=[false]; indentsize::Int=2, depth::Int=1)
indent = ' ' ^ indentsize
nodetype = XML.nodetype(x)
tag = XML.tag(x)
@@ -365,48 +365,61 @@ function write(io::IO, x; indentsize::Int=2, depth::Int=1)
children = XML.children(x)
padding = indent ^ max(0, depth - 1)
- print(io, padding)
+ !ctx[end] && print(io, padding)
+
if nodetype === Text
print(io, value)
+
elseif nodetype === Element
+ push!(ctx, ctx[end])
+ update_ctx!(ctx, x)
print(io, '<', tag)
_print_attrs(io, x)
print(io, isempty(children) ? '/' : "", '>')
if !isempty(children)
if length(children) == 1 && XML.nodetype(only(children)) === Text
- write(io, only(children); indentsize=0)
+ write(io, only(children), ctx; indentsize=0)
print(io, "", tag, '>')
else
- println(io)
+ !ctx[end] && println(io)
foreach(children) do child
- write(io, child; indentsize, depth = depth + 1)
- println(io)
+ write(io, child, ctx; indentsize, depth=depth + 1)
+ !ctx[end] && println(io)
end
- print(io, padding, "", tag, '>')
+ print(io, !ctx[end] ? padding : "", "", tag, '>')
end
end
+ pop!(ctx)
+
elseif nodetype === DTD
print(io, "')
+
elseif nodetype === Declaration
print(io, "")
+
elseif nodetype === ProcessingInstruction
print(io, "", tag)
_print_attrs(io, x)
print(io, "?>")
+
elseif nodetype === Comment
print(io, "")
+
elseif nodetype === CData
print(io, "")
+
elseif nodetype === Document
foreach(children) do child
- write(io, child; indentsize)
- println(io)
+ write(io, child, ctx; indentsize)
+ !ctx[end] && println(io)
end
+
else
error("Unreachable case reached during XML.write")
end
-end
end
+
+end # module XML
diff --git a/src/raw.jl b/src/raw.jl
index 8b77bba..29d0a10 100644
--- a/src/raw.jl
+++ b/src/raw.jl
@@ -146,7 +146,6 @@ function get_attributes(data, i, j)
out = OrderedDict{String,String}()
while !isnothing(i) && i < j
key, i = get_name(data, i)
- #haskey(out, key) && error("Duplicate attribute name found: $key") # would this be useful?
# get quotechar the value is wrapped in (either ' or ")
i = findnext(x -> x === UInt8('"') || x === UInt8('''), data, i + 1)
quotechar = data[i]
@@ -329,7 +328,11 @@ function parent(o::Raw)
end
#-----------------------------------------------------------------------------# next Raw
-isspace(x::UInt8) = Base.isspace(Char(x))
+# isspace(x::UInt8) = Base.isspace(Char(x))
+
+# XML whitespace per XML 1.0/1.1 production S:
+# S ::= (#x20 | #x9 | #xD | #xA)+
+@inline xml_isspace(b::UInt8)::Bool = (b == 0x20) | (b == 0x09) | (b == 0x0A) | (b == 0x0D)
"""
next(node) --> typeof(node) or Nothing
@@ -353,7 +356,7 @@ function next_xml_space(o::Raw)
has_xml_space = o.has_xml_space
ctx = copy(o.ctx)
last_type = type
- k = findnext(!isspace, data, i)
+ k = findnext(!xml_isspace, data, i)
if isnothing(k)
return nothing
end
@@ -369,11 +372,11 @@ function next_xml_space(o::Raw)
if c !== '<' || ctx[end] && c === '<' && b === ' ' && last_type === RawElementOpen && d === '/'
type = RawText
j = findnext(==(UInt8('<')), data, i) - 1
- j = ctx[end] ? j : findprev(!isspace, data, j) # preserving whitespace if needed
+ j = ctx[end] ? j : findprev(!xml_isspace, data, j) # preserving whitespace if needed
if last_type === RawElementClose || last_type === RawElementSelfClosed|| last_type === RawDocument
# Maybe drop pure-whitespace inter-element text nodes?
# (e.g. whitespace between a closing and an opening tag which would otherwise make an orphan text node)
- #if all(isspace, @view data[i:j]) && depth > 1
+ #if all(xml_isspace, @view data[i:j]) && depth > 1
# return next(Raw(type, depth, j, 0, data, ctx, has_xml_space))
#end
end
@@ -421,7 +424,7 @@ function next_xml_space(o::Raw)
end
return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
end
-#
+
function next_no_xml_space(o::Raw) # same as v0.3.5
i = o.pos + o.len + 1
depth = o.depth
@@ -429,7 +432,7 @@ function next_no_xml_space(o::Raw) # same as v0.3.5
type = o.type
has_xml_space = o.has_xml_space
ctx = [false]
- i = findnext(!isspace, data, i)
+ i = findnext(!xml_isspace, data, i)
if isnothing(i)
return nothing
end
@@ -441,7 +444,7 @@ function next_no_xml_space(o::Raw) # same as v0.3.5
if c !== '<'
type = RawText
j = findnext(==(UInt8('<')), data, i) - 1
- j = findprev(!isspace, data, j) # "rstrip"
+ j = findprev(!xml_isspace, data, j) # "rstrip"
elseif c === '<'
c2 = Char(o.data[i+1])
if c2 === '!'
@@ -514,7 +517,7 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5
ctx = has_xml_space ? copy(o.ctx) : [false]
type === RawDocument && return nothing
j = o.pos - 1
- j = findprev(!isspace, data, j)
+ j = findprev(!xml_isspace, data, j)
if isnothing(j)
return Raw(data, has_xml_space, ctx) # RawDocument
end
@@ -523,7 +526,7 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5
if c !== '>' # text
type = RawText
i = findprev(==(UInt8('>')), data, j) + 1
- i = findnext(!isspace, data, i) # "lstrip"
+ i = findnext(!xml_isspace, data, i) # "lstrip"
elseif c === '>'
c2 = Char(o.data[j-1])
if c2 === '-'
@@ -562,3 +565,4 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5
end
return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
end
+
diff --git a/test/runtests.jl b/test/runtests.jl
index f329c4a..89978eb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -147,13 +147,13 @@ end
@test text_content == "hello"
n=XML.next(n)
text_content = XML.write(n)
- @test text_content == "\n hello \n preserve \n"
+ @test text_content == " hello preserve "
n=XML.prev(n)
text_content = XML.write(n)
@test text_content == "hello"
n=XML.next(n)
text_content = XML.write(n)
- @test text_content == "\n hello \n preserve \n"
+ @test text_content == " hello preserve "
n=XML.next(n)
text_content = XML.write(n)
@test text_content == " hello "
@@ -183,13 +183,13 @@ end
@test text_content == " hello "
n=XML.prev(n)
text_content = XML.write(n)
- @test text_content == "\n hello \n preserve \n"
+ @test text_content == " hello preserve "
n=XML.next(n)
text_content = XML.write(n)
@test text_content == " hello "
n=XML.prev(n)
text_content = XML.write(n)
- @test text_content == "\n hello \n preserve \n"
+ @test text_content == " hello preserve "
n=XML.prev(n)
text_content = XML.write(n)
@test text_content == "hello"
@@ -201,7 +201,7 @@ end
@test text_content == ""
n=XML.prev(n)
text_content = XML.write(n)
- @test text_content == "\n \n hello\n \n hello \n preserve \n \n \n \n"
+ @test text_content == "\n \n hello\n hello preserve \n \n \n"
end
@testset "depth and parent" begin
@@ -428,19 +428,18 @@ end
@test XML.value(d2[1][6][1]) == " after default gap "
@test XML.value(d2[1][7]) == "\n"
end
-
-# @testset "XML whitespace vs Unicode whitespace" begin
-# nbsp = "\u00A0"
-# s = """
-# x\t\n
-# $(nbsp) y $(nbsp)
-# $(nbsp) z $(nbsp)
-# """
-# d = XML.parse(XML.Node, s)
-# @test XML.value(d[1][1][1]) == "x"
-# @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)"
-# @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)"
-# end
+ @testset "XML whitespace vs Unicode whitespace" begin
+ nbsp = "\u00A0"
+ s = """
+ x\t\n
+ $(nbsp) y $(nbsp)
+ $(nbsp) z $(nbsp)
+ """
+ d = XML.parse(XML.Node, s)
+ @test XML.value(d[1][1][1]) == "x"
+ @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)"
+ @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)"
+ end
@testset "CDATA/Comment/PI boundaries" begin
s = """
@@ -485,17 +484,21 @@ end
@test XML.value(d[1][1]) == "a"
end
-# @testset "entities expanding to whitespace" begin
-# s = """
-# a
-# b
-# c
-# """
-# d = XML.parse(XML.Node, s)
-# @test XML.value(d[1][1][1]) == "a"
-# @test XML.value(d[1][2][1]) == " b \n"
-# @test XML.value(d[1][3][1]) == "\u00A0c\u00A0"
-# end
+ @testset "entities expanding to whitespace" begin
+ chr1="\u0020"
+ chr2="\u000A"
+ chr3="\u00A0"
+
+ s = """
+ $(chr1) a $(chr2)
+ $(chr1) b $(chr2)
+ $(chr3)c$(chr3)
+ """
+ d = XML.parse(XML.Node, s)
+ @test XML.value(d[1][1][1]) == "a"
+ @test XML.value(d[1][2][1]) == " b \n"
+ @test XML.value(d[1][3][1]) == "$(chr3)c$(chr3)"
+ end
@testset "invalid values and placement" begin
s_bad = """ t """
@@ -534,23 +537,22 @@ end
@test reverse(back)[2:end] == toks[1:end-1]
end
-# @testset "write/read roundtrip extremes" begin
- # XML.write doesn't respect xml:space="preserve" in the current implementation so roundtrip isn't possible.
-# xml = """
-#
-#
-# r
-# pre post
-# """
-# n = XML.parse(XML.Node, xml)
-# io = IOBuffer(); XML.write(io, n)
-# n2 = XML.parse(XML.Node, String(take!(io)))
-# @test n == n2
-# @test XML.write(n2[1][1]) == "
"
-# @test XML.write(n2[1][2]) == ""
-# @test XML.value(n2[1][3][1]) == "r"
-# @test XML.write(n2[1][4]) == " pre post "
-# end
+ @testset "write/read roundtrip extremes" begin
+ xml = """
+
+
+ r
+ pre post
+ """
+ n = XML.parse(XML.Node, xml)
+ io = IOBuffer(); XML.write(io, n)
+ n2 = XML.parse(XML.Node, String(take!(io)))
+ @test n == n2
+ @test XML.write(n2[1][1]) == "
"
+ @test XML.write(n2[1][2]) == ""
+ @test XML.value(n2[1][3][1]) == "r"
+ @test XML.write(n2[1][4]) == " pre post "
+ end
@testset "self-closing/empty/whitespace-only children" begin
s = """
@@ -641,3 +643,4 @@ end
xyz = XML.Element("point"; kw...)
@test collect(keys(attributes(xyz))) == string.(collect('a':'z'))
end
+