diff --git a/src/XML.jl b/src/XML.jl index 9027ade..273bfda 100644 --- a/src/XML.jl +++ b/src/XML.jl @@ -149,8 +149,8 @@ end function Node(o::Node, x...; kw...) attrs = !isnothing(kw) ? merge( - OrderedDict(string(k) => string(v) for (k,v) in pairs(kw)), - isnothing(o.attributes) ? OrderedDict{String, String}() : o.attributes + OrderedDict(string(k) => string(v) for (k, v) in pairs(kw)), + isnothing(o.attributes) ? OrderedDict{String,String}() : o.attributes ) : o.attributes children = isempty(x) ? o.children : vcat(isnothing(o.children) ? [] : o.children, collect(x)) @@ -357,7 +357,7 @@ write(x; kw...) = (io = IOBuffer(); write(io, x; kw...); String(take!(io))) write(filename::AbstractString, x; kw...) = open(io -> write(io, x; kw...), filename, "w") -function write(io::IO, x; indentsize::Int=2, depth::Int=1) +function write(io::IO, x, ctx::Vector{Bool}=[false]; indentsize::Int=2, depth::Int=1) indent = ' ' ^ indentsize nodetype = XML.nodetype(x) tag = XML.tag(x) @@ -365,48 +365,61 @@ function write(io::IO, x; indentsize::Int=2, depth::Int=1) children = XML.children(x) padding = indent ^ max(0, depth - 1) - print(io, padding) + !ctx[end] && print(io, padding) + if nodetype === Text print(io, value) + elseif nodetype === Element + push!(ctx, ctx[end]) + update_ctx!(ctx, x) print(io, '<', tag) _print_attrs(io, x) print(io, isempty(children) ? '/' : "", '>') if !isempty(children) if length(children) == 1 && XML.nodetype(only(children)) === Text - write(io, only(children); indentsize=0) + write(io, only(children), ctx; indentsize=0) print(io, "') else - println(io) + !ctx[end] && println(io) foreach(children) do child - write(io, child; indentsize, depth = depth + 1) - println(io) + write(io, child, ctx; indentsize, depth=depth + 1) + !ctx[end] && println(io) end - print(io, padding, "') + print(io, !ctx[end] ? padding : "", "') end end + pop!(ctx) + elseif nodetype === DTD print(io, "') + elseif nodetype === Declaration print(io, "") + elseif nodetype === ProcessingInstruction print(io, "") + elseif nodetype === Comment print(io, "") + elseif nodetype === CData print(io, "") + elseif nodetype === Document foreach(children) do child - write(io, child; indentsize) - println(io) + write(io, child, ctx; indentsize) + !ctx[end] && println(io) end + else error("Unreachable case reached during XML.write") end -end end + +end # module XML diff --git a/src/raw.jl b/src/raw.jl index 8b77bba..29d0a10 100644 --- a/src/raw.jl +++ b/src/raw.jl @@ -146,7 +146,6 @@ function get_attributes(data, i, j) out = OrderedDict{String,String}() while !isnothing(i) && i < j key, i = get_name(data, i) - #haskey(out, key) && error("Duplicate attribute name found: $key") # would this be useful? # get quotechar the value is wrapped in (either ' or ") i = findnext(x -> x === UInt8('"') || x === UInt8('''), data, i + 1) quotechar = data[i] @@ -329,7 +328,11 @@ function parent(o::Raw) end #-----------------------------------------------------------------------------# next Raw -isspace(x::UInt8) = Base.isspace(Char(x)) +# isspace(x::UInt8) = Base.isspace(Char(x)) + +# XML whitespace per XML 1.0/1.1 production S: +# S ::= (#x20 | #x9 | #xD | #xA)+ +@inline xml_isspace(b::UInt8)::Bool = (b == 0x20) | (b == 0x09) | (b == 0x0A) | (b == 0x0D) """ next(node) --> typeof(node) or Nothing @@ -353,7 +356,7 @@ function next_xml_space(o::Raw) has_xml_space = o.has_xml_space ctx = copy(o.ctx) last_type = type - k = findnext(!isspace, data, i) + k = findnext(!xml_isspace, data, i) if isnothing(k) return nothing end @@ -369,11 +372,11 @@ function next_xml_space(o::Raw) if c !== '<' || ctx[end] && c === '<' && b === ' ' && last_type === RawElementOpen && d === '/' type = RawText j = findnext(==(UInt8('<')), data, i) - 1 - j = ctx[end] ? j : findprev(!isspace, data, j) # preserving whitespace if needed + j = ctx[end] ? j : findprev(!xml_isspace, data, j) # preserving whitespace if needed if last_type === RawElementClose || last_type === RawElementSelfClosed|| last_type === RawDocument # Maybe drop pure-whitespace inter-element text nodes? # (e.g. whitespace between a closing and an opening tag which would otherwise make an orphan text node) - #if all(isspace, @view data[i:j]) && depth > 1 + #if all(xml_isspace, @view data[i:j]) && depth > 1 # return next(Raw(type, depth, j, 0, data, ctx, has_xml_space)) #end end @@ -421,7 +424,7 @@ function next_xml_space(o::Raw) end return Raw(type, depth, i, j - i, data, ctx, has_xml_space) end -# + function next_no_xml_space(o::Raw) # same as v0.3.5 i = o.pos + o.len + 1 depth = o.depth @@ -429,7 +432,7 @@ function next_no_xml_space(o::Raw) # same as v0.3.5 type = o.type has_xml_space = o.has_xml_space ctx = [false] - i = findnext(!isspace, data, i) + i = findnext(!xml_isspace, data, i) if isnothing(i) return nothing end @@ -441,7 +444,7 @@ function next_no_xml_space(o::Raw) # same as v0.3.5 if c !== '<' type = RawText j = findnext(==(UInt8('<')), data, i) - 1 - j = findprev(!isspace, data, j) # "rstrip" + j = findprev(!xml_isspace, data, j) # "rstrip" elseif c === '<' c2 = Char(o.data[i+1]) if c2 === '!' @@ -514,7 +517,7 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5 ctx = has_xml_space ? copy(o.ctx) : [false] type === RawDocument && return nothing j = o.pos - 1 - j = findprev(!isspace, data, j) + j = findprev(!xml_isspace, data, j) if isnothing(j) return Raw(data, has_xml_space, ctx) # RawDocument end @@ -523,7 +526,7 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5 if c !== '>' # text type = RawText i = findprev(==(UInt8('>')), data, j) + 1 - i = findnext(!isspace, data, i) # "lstrip" + i = findnext(!xml_isspace, data, i) # "lstrip" elseif c === '>' c2 = Char(o.data[j-1]) if c2 === '-' @@ -562,3 +565,4 @@ function prev_no_xml_space(o::Raw) # same as v0.3.5 end return Raw(type, depth, i, j - i, data, ctx, has_xml_space) end + diff --git a/test/runtests.jl b/test/runtests.jl index f329c4a..89978eb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -147,13 +147,13 @@ end @test text_content == "hello" n=XML.next(n) text_content = XML.write(n) - @test text_content == "\n hello \n preserve \n" + @test text_content == " hello preserve " n=XML.prev(n) text_content = XML.write(n) @test text_content == "hello" n=XML.next(n) text_content = XML.write(n) - @test text_content == "\n hello \n preserve \n" + @test text_content == " hello preserve " n=XML.next(n) text_content = XML.write(n) @test text_content == " hello " @@ -183,13 +183,13 @@ end @test text_content == " hello " n=XML.prev(n) text_content = XML.write(n) - @test text_content == "\n hello \n preserve \n" + @test text_content == " hello preserve " n=XML.next(n) text_content = XML.write(n) @test text_content == " hello " n=XML.prev(n) text_content = XML.write(n) - @test text_content == "\n hello \n preserve \n" + @test text_content == " hello preserve " n=XML.prev(n) text_content = XML.write(n) @test text_content == "hello" @@ -201,7 +201,7 @@ end @test text_content == "" n=XML.prev(n) text_content = XML.write(n) - @test text_content == "\n \n hello\n \n hello \n preserve \n \n \n \n" + @test text_content == "\n \n hello\n hello preserve \n \n \n" end @testset "depth and parent" begin @@ -428,19 +428,18 @@ end @test XML.value(d2[1][6][1]) == " after default gap " @test XML.value(d2[1][7]) == "\n" end - -# @testset "XML whitespace vs Unicode whitespace" begin -# nbsp = "\u00A0" -# s = """ -# x\t\n -# $(nbsp) y $(nbsp) -# $(nbsp) z $(nbsp) -# """ -# d = XML.parse(XML.Node, s) -# @test XML.value(d[1][1][1]) == "x" -# @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)" -# @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)" -# end + @testset "XML whitespace vs Unicode whitespace" begin + nbsp = "\u00A0" + s = """ + x\t\n + $(nbsp) y $(nbsp) + $(nbsp) z $(nbsp) + """ + d = XML.parse(XML.Node, s) + @test XML.value(d[1][1][1]) == "x" + @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)" + @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)" + end @testset "CDATA/Comment/PI boundaries" begin s = """ @@ -485,17 +484,21 @@ end @test XML.value(d[1][1]) == "a" end -# @testset "entities expanding to whitespace" begin -# s = """ -# a -# b -#  c  -# """ -# d = XML.parse(XML.Node, s) -# @test XML.value(d[1][1][1]) == "a" -# @test XML.value(d[1][2][1]) == " b \n" -# @test XML.value(d[1][3][1]) == "\u00A0c\u00A0" -# end + @testset "entities expanding to whitespace" begin + chr1="\u0020" + chr2="\u000A" + chr3="\u00A0" + + s = """ + $(chr1) a $(chr2) + $(chr1) b $(chr2) + $(chr3)c$(chr3) + """ + d = XML.parse(XML.Node, s) + @test XML.value(d[1][1][1]) == "a" + @test XML.value(d[1][2][1]) == " b \n" + @test XML.value(d[1][3][1]) == "$(chr3)c$(chr3)" + end @testset "invalid values and placement" begin s_bad = """ t """ @@ -534,23 +537,22 @@ end @test reverse(back)[2:end] == toks[1:end-1] end -# @testset "write/read roundtrip extremes" begin - # XML.write doesn't respect xml:space="preserve" in the current implementation so roundtrip isn't possible. -# xml = """ -#

-# -# r -# pre post -#
""" -# n = XML.parse(XML.Node, xml) -# io = IOBuffer(); XML.write(io, n) -# n2 = XML.parse(XML.Node, String(take!(io))) -# @test n == n2 -# @test XML.write(n2[1][1]) == "

" -# @test XML.write(n2[1][2]) == "" -# @test XML.value(n2[1][3][1]) == "r" -# @test XML.write(n2[1][4]) == " pre post " -# end + @testset "write/read roundtrip extremes" begin + xml = """ +

+ + r + pre post +
""" + n = XML.parse(XML.Node, xml) + io = IOBuffer(); XML.write(io, n) + n2 = XML.parse(XML.Node, String(take!(io))) + @test n == n2 + @test XML.write(n2[1][1]) == "

" + @test XML.write(n2[1][2]) == "" + @test XML.value(n2[1][3][1]) == "r" + @test XML.write(n2[1][4]) == " pre post " + end @testset "self-closing/empty/whitespace-only children" begin s = """ @@ -641,3 +643,4 @@ end xyz = XML.Element("point"; kw...) @test collect(keys(attributes(xyz))) == string.(collect('a':'z')) end +