diff --git a/src/raw.jl b/src/raw.jl index f0ae362..8b77bba 100644 --- a/src/raw.jl +++ b/src/raw.jl @@ -16,23 +16,18 @@ RawDeclaration, RawDTD, RawElementOpen, RawElementClose, RawElementSelfClosed) @inline nodetype(x::RawType) = - x === RawElementOpen ? Element : - x === RawElementClose ? Element : - x === RawElementSelfClosed ? Element : - x === RawText ? Text : - x === RawComment ? Comment : - x === RawCData ? CData : - x === RawDeclaration ? Declaration : - x === RawDTD ? DTD : - x === RawProcessingInstruction ? ProcessingInstruction : - x === RawDocument ? Document : + x === RawElementOpen ? Element : + x === RawElementClose ? Element : + x === RawElementSelfClosed ? Element : + x === RawText ? Text : + x === RawComment ? Comment : + x === RawCData ? CData : + x === RawDeclaration ? Declaration : + x === RawDTD ? DTD : + x === RawProcessingInstruction ? ProcessingInstruction : + x === RawDocument ? Document : nothing -#struct XMLSpaceContext -# preserve_space::Vector{Bool} # Stack to track xml:space state -#end -#XMLSpaceContext() = XMLSpaceContext([false]) # Default is not preserving - #-----------------------------------------------------------------------------# Raw """ Raw(filename::String) @@ -69,31 +64,54 @@ struct Raw pos::Int len::Int data::Vector{UInt8} - ctx::Vector{Bool} # Context for xml:space (Vector so mutable) + ctx::Vector{Bool} # Context for xml:space (Vector to support inheritance of context) + has_xml_space::Bool # Whether data contains `xml:space` attribute at least once +end +function Raw(data::Vector{UInt8})#, ctx::Vector{Bool}=Bool[false]) + needle = Vector{UInt8}("xml:space") + has_xml_space = findfirst(needle, data) !== nothing + return Raw(RawDocument, 0, 0, 0, data, [false], has_xml_space) +end +function Raw(data::Vector{UInt8}, has_xml_space::Bool, ctx::Vector{Bool}=Bool[false]) + return Raw(RawDocument, 0, 0, 0, data, ctx, has_xml_space) end -Raw(data::Vector{UInt8}, ctx=[false]) = Raw(RawDocument, 0, 0, 0, data, ctx) +const _RAW_INDEX = WeakKeyDict{Vector{UInt8}, Any}() + +struct _TokRec + type::RawType + depth::Int + pos::Int + len::Int + ctx::Vector{Bool} +end + +mutable struct _Index + recs::Vector{_TokRec} + last_raw::Raw + built_end::Int +end Base.read(filename::String, ::Type{Raw}) = isfile(filename) ? - Raw(Mmap.mmap(filename)) : - error("File \"$filename\" does not exist.") + Raw(Mmap.mmap(filename)) : + error("File \"$filename\" does not exist.") Base.read(io::IO, ::Type{Raw}) = Raw(read(io)) Base.parse(x::AbstractString, ::Type{Raw}) = Raw(Vector{UInt8}(x)) # Mostly for debugging -Base.peek(o::Raw, n::Int) = String(@view(o.data[o.pos + o.len + 1:min(end, o.pos + o.len + n + 1)])) +Base.peek(o::Raw, n::Int) = String(view(o.data[o.pos+o.len+1:min(end, o.pos + o.len + n + 1)])) function Base.show(io::IO, o::Raw) print(io, o.type, ':', o.depth, " (pos=", o.pos, ", len=", o.len, ")") o.len > 0 && printstyled(io, ": ", String(o); color=:light_green) end function Base.:(==)(a::Raw, b::Raw) - a.type == b.type && a.depth == b.depth && a.pos == b.pos && a.len == b.len && a.data === b.data + a.type == b.type && a.depth == b.depth && a.pos == b.pos && a.len == b.len && a.data === b.data && a.ctx == b.ctx && a.has_xml_space == b.has_xml_space end -Base.view(o::Raw) = view(o.data, o.pos:o.pos + o.len) +Base.view(o::Raw) = view(o.data, o.pos:o.pos+o.len) Base.String(o::Raw) = String(view(o)) Base.IteratorSize(::Type{Raw}) = Base.SizeUnknown() @@ -125,9 +143,10 @@ end function get_attributes(data, i, j) i = name_start(data, i) (isnothing(j) || isnothing(i) || i > j) && return nothing - out = OrderedDict{String, String}() + out = OrderedDict{String,String}() while !isnothing(i) && i < j key, i = get_name(data, i) + #haskey(out, key) && error("Duplicate attribute name found: $key") # would this be useful? # get quotechar the value is wrapped in (either ' or ") i = findnext(x -> x === UInt8('"') || x === UInt8('''), data, i + 1) quotechar = data[i] @@ -139,6 +158,74 @@ function get_attributes(data, i, j) return out end +# ----------------------------------------------------------------------------# Utilities supporting prev +function _get_or_init_index(o::Raw) + idx = get(_RAW_INDEX, o.data, nothing) + if idx === nothing + start = Raw(o.data) # fresh RawDocument + _RAW_INDEX[o.data] = _Index(_TokRec[], start, 0) + idx = _RAW_INDEX[o.data] + end + return idx +end +function _ensure_index_upto!(o::Raw, target_pos::Int) + idx = _get_or_init_index(o) + r = idx.last_raw + while true + n = next(r) + if n === nothing + idx.built_end = typemax(Int) + idx.last_raw = r + return idx + end + push!(idx.recs, _TokRec(n.type, n.depth, n.pos, n.len, copy(n.ctx))) + endpos = n.pos + n.len + idx.built_end = endpos + idx.last_raw = n + r = n + if endpos >= target_pos + return idx + end + end +end +function _find_prev_token(recs::Vector{_TokRec}, p::Int) + lo, hi = 1, length(recs) + ans = 0 + while lo <= hi + mid = (lo + hi) >>> 1 + endpos = recs[mid].pos + recs[mid].len + if endpos < p + 1 + ans = mid + lo = mid + 1 + else + hi = mid - 1 + end + end + return ans == 0 ? nothing : recs[ans] +end + +#-----------------------------------------------------------------------------# update xml:space context +# check attributes for xml:space and update ctx if necessary +function get_ctx(o) + att = attributes(o) + if !isnothing(att) && haskey(att, "xml:space") + if att["xml:space"] == "preserve" + return true + elseif att["xml:space"] == "default" + return false + else + error("Invalid value for xml:space attribute: $(att["xml:space"]). Must be 'preserve' or 'default'.") + end + end + return nothing +end +function update_ctx!(ctx, o) + new_ctx = get_ctx(o) + if new_ctx !== nothing + ctx[end] = new_ctx + end + return nothing +end #-----------------------------------------------------------------------------# interface """ @@ -168,18 +255,7 @@ function attributes(o::Raw) i = o.pos i = name_start(o.data, i) i = name_stop(o.data, i) - out=get_attributes(o.data, i + 1, o.pos + o.len) - if o.type === RawElementOpen && !isnothing(out) && haskey(out, "xml:space") - # If xml:space attribute is present, we need to preserve whitespace - if out["xml:space"] == "preserve" - o.ctx[1]= true - elseif out["xml:space"] == "default" - o.ctx[1] = false - else - error("Invalid value for xml:space attribute: $(out["xml:space"]). Must be 'preserve' or 'default'.") - end - end - out + get_attributes(o.data, i + 1, o.pos + o.len) elseif o.type === RawDeclaration get_attributes(o.data, o.pos + 6, o.pos + o.len) else @@ -196,11 +272,11 @@ function value(o::Raw) if o.type === RawText String(o) elseif o.type === RawCData - String(view(o.data, o.pos + length(" 1 ? Char(o.data[i-1]) : Char('<') + c = Char(o.data[i]) + d = Char(o.data[k+1]) + if c !== '<' || ctx[end] && c === '<' && b === ' ' && last_type === RawElementOpen && d === '/' type = RawText j = findnext(==(UInt8('<')), data, i) - 1 - j = (ctx[1]) ? j : findprev(!isspace, data, j) # preserving whitespace if needed + j = ctx[end] ? j : findprev(!isspace, data, j) # preserving whitespace if needed + if last_type === RawElementClose || last_type === RawElementSelfClosed|| last_type === RawDocument + # Maybe drop pure-whitespace inter-element text nodes? + # (e.g. whitespace between a closing and an opening tag which would otherwise make an orphan text node) + #if all(isspace, @view data[i:j]) && depth > 1 + # return next(Raw(type, depth, j, 0, data, ctx, has_xml_space)) + #end + end else - i=k - j=k+1 + i = k + j = k + 1 if c === '<' - c2 = Char(o.data[i + 1]) + c2 = Char(o.data[i+1]) if c2 === '!' - c3 = Char(o.data[i + 2]) + c3 = Char(o.data[i+2]) if c3 === '-' type = RawComment j = findnext(Vector{UInt8}("-->"), data, i)[end] @@ -301,7 +393,7 @@ function next(o::Raw) elseif c3 === 'D' || c3 == 'd' type = RawDTD j = findnext(==(UInt8('>')), data, i) - while sum(==(UInt8('>')), data[k:j]) != sum(==(UInt8('<')), data[i:j]) + while sum(==(UInt8('>')), @view data[k:j]) != sum(==(UInt8('<')), @view data[i:j]) j = findnext(==(UInt8('>')), data, j + 1) end end @@ -315,18 +407,79 @@ function next(o::Raw) elseif c2 === '/' type = RawElementClose depth -= 1 + pop!(ctx) # revert to parent xml:space context j = findnext(==(UInt8('>')), data, i) else j = findnext(==(UInt8('>')), data, i) if data[j-1] === UInt8('/') type = RawElementSelfClosed else - type = RawElementOpen + type = RawElementOpen + end + end + end + end + return Raw(type, depth, i, j - i, data, ctx, has_xml_space) +end +# +function next_no_xml_space(o::Raw) # same as v0.3.5 + i = o.pos + o.len + 1 + depth = o.depth + data = o.data + type = o.type + has_xml_space = o.has_xml_space + ctx = [false] + i = findnext(!isspace, data, i) + if isnothing(i) + return nothing + end + if type === RawElementOpen || type === RawDocument + depth += 1 + end + c = Char(o.data[i]) + d = Char(o.data[i+1]) + if c !== '<' + type = RawText + j = findnext(==(UInt8('<')), data, i) - 1 + j = findprev(!isspace, data, j) # "rstrip" + elseif c === '<' + c2 = Char(o.data[i+1]) + if c2 === '!' + c3 = Char(o.data[i+2]) + if c3 === '-' + type = RawComment + j = findnext(Vector{UInt8}("-->"), data, i)[end] + elseif c3 === '[' + type = RawCData + j = findnext(Vector{UInt8}("]]>"), data, i)[end] + elseif c3 === 'D' || c3 == 'd' + type = RawDTD + j = findnext(==(UInt8('>')), data, i) + while sum(==(UInt8('>')), @view data[i:j]) != sum(==(UInt8('<')), @view data[i:j]) + j = findnext(==(UInt8('>')), data, j + 1) end end + elseif c2 === '?' + if get_name(data, i + 2)[1] == "xml" + type = RawDeclaration + else + type = RawProcessingInstruction + end + j = findnext(Vector{UInt8}("?>"), data, i)[end] + elseif c2 === '/' + type = RawElementClose + depth -= 1 + j = findnext(==(UInt8('>')), data, i) + else + j = findnext(==(UInt8('>')), data, i) + if data[j-1] === UInt8('/') + type = RawElementSelfClosed + else + type = RawElementOpen + end end end - return Raw(type, depth, i, j - i, data, ctx) + return Raw(type, depth, i, j - i, data, ctx, has_xml_space) end #-----------------------------------------------------------------------------# prev Raw @@ -336,64 +489,76 @@ end Return the previous node in the document during depth-first traversal. Not defined for `XML.Node`. """ function prev(o::Raw) + if o.has_xml_space # using xml:space context at least once in data + return prev_xml_space(o) + else # not using xml:space context at all (same as v0.3.5) + return prev_no_xml_space(o) + end +end + +function prev_xml_space(o::Raw) + o.type === RawDocument && return nothing + + idx = _ensure_index_upto!(o, o.pos - 1) + rec = _find_prev_token(idx.recs, o.pos - 1) + if rec === nothing + return Raw(o.data, o.has_xml_space, copy(o.ctx)) + end + return Raw(rec.type, rec.depth, rec.pos, rec.len, o.data, copy(rec.ctx), o.has_xml_space) +end +function prev_no_xml_space(o::Raw) # same as v0.3.5 depth = o.depth data = o.data type = o.type - ctx = o.ctx + has_xml_space = o.has_xml_space + ctx = has_xml_space ? copy(o.ctx) : [false] type === RawDocument && return nothing j = o.pos - 1 - k = findprev(!isspace, data, j) - if isnothing(k) || length(String(o.data[o.pos + o.len + 1:end]))==0 - return Raw(data, ctx) # RawDocument + j = findprev(!isspace, data, j) + if isnothing(j) + return Raw(data, has_xml_space, ctx) # RawDocument end - j = (ctx[1]) ? j : k c = Char(o.data[j]) - d = Char(data[findprev(==(UInt8('<')), data, j)+1]) - i = j - 1 next_type = type - if c !== '>' || type === RawElementClose && d !== '/' && (ctx[1]) # text or empty whitespace + if c !== '>' # text type = RawText - i=findprev(==(UInt8('>')), data, j) + 1 - i = (ctx[1]) ? i : findprev(!isspace, data, i) # If preserving whitespace, retain leading and trailing whitespace - else - j=k - i=k-1 - if c === '>' - c2 = Char(o.data[j - 1]) - if c2 === '-' - type = RawComment - i = findprev(Vector{UInt8}("<--"), data, j)[1] - elseif c2 === ']' - type = RawCData - i = findprev(Vector{UInt8}("')), data, j) + 1 + i = findnext(!isspace, data, i) # "lstrip" + elseif c === '>' + c2 = Char(o.data[j-1]) + if c2 === '-' + type = RawComment + i = findprev(Vector{UInt8}("<--"), data, j)[1] + elseif c2 === ']' + type = RawCData + i = findprev(Vector{UInt8}(".") - end - end + type = RawProcessingInstruction + end else - error("Unreachable reached in XML.prev") + i = findprev(==(UInt8('<')), data, j) + char = Char(data[i+1]) + if char === '/' + type = RawElementClose + elseif char === '!' + type = DTD + elseif isletter(char) || char === '_' + type = Char(o.data[j-2]) === '/' ? RawElementSelfClosed : RawElementOpen + else + error("Should be unreachable. Unexpected data: <$char ... $c3$c2$c1>.") + end end + else + error("Unreachable reached in XML.prev") end if type !== RawElementOpen && next_type === RawElementClose depth += 1 - elseif type == RawElementOpen && next_type !== RawElementClose + elseif type === RawElementOpen && next_type !== RawElementClose depth -= 1 end - return Raw(type, depth, i, j - i, data, ctx) + return Raw(type, depth, i, j - i, data, ctx, has_xml_space) end diff --git a/test/runtests.jl b/test/runtests.jl index 2418c54..f329c4a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -116,7 +116,7 @@ end @test String(doc[end]) == "" @testset "next and prev" begin - @test XML.prev(doc[1]) === data + @test XML.prev(doc[1]) == data # can't use === here because prev returns a copy of ctx @test prev(data) === nothing @test XML.next(doc[end]) === nothing @@ -132,6 +132,76 @@ end for (a,b) in zip(next_res, prev_res) @test a == b end + + lzxml = """ hello hello preserve """ + lz = XML.parse(XML.LazyNode, lzxml) + n=XML.next(lz) + n=XML.next(n) + text_content = XML.write(n) + @test text_content == "" + n=XML.next(n) + text_content = XML.write(n) + @test text_content == "hello" + n=XML.next(n) + text_content = XML.write(n) + @test text_content == "hello" + n=XML.next(n) + text_content = XML.write(n) + @test text_content == "\n hello \n preserve \n" + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == "hello" + n=XML.next(n) + text_content = XML.write(n) + @test text_content == "\n hello \n preserve \n" + n=XML.next(n) + text_content = XML.write(n) + @test text_content == " hello " + n=XML.next(n) + text_content = XML.write(n) + @test text_content == " preserve " + n=XML.next(n) + text_content = XML.write(n) + @test text_content == " preserve " + n=XML.next(n) + text_content = XML.write(n) + @test text_content == "" + n=XML.next(n) + text_content = XML.write(n) + @test text_content == "" + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == "" + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == " preserve " + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == " preserve " + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == " hello " + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == "\n hello \n preserve \n" + n=XML.next(n) + text_content = XML.write(n) + @test text_content == " hello " + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == "\n hello \n preserve \n" + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == "hello" + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == "hello" + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == "" + n=XML.prev(n) + text_content = XML.write(n) + @test text_content == "\n \n hello\n \n hello \n preserve \n \n \n \n" end @testset "depth and parent" begin @@ -190,11 +260,15 @@ end text_content = XML.value(doc2[1][1][1]) @test text_content == " leading and trailing spaces " - # Test 3: Without xml:space, entirely empty whitespace should create a self closing node - xml3 = """ """ + # Test 3: Entirely empty tags with and without xml:space="preserve" become self-closing + xml3 = """ """ doc3 = XML.parse(XML.Node, xml3) text_content = XML.write(doc3[1][1]) - @test text_content == "" + @test text_content == "" # without xml:space="preserve", empty text becomes self-closing + text_content = XML.value(doc3[1][2][1]) + @test text_content == " " # with xml:space, whitespace is preserved + text_content = XML.write(doc3[1][3]) + @test text_content == "" # with xml:space="preserve", empty text becomes self-closing # Test 4: Without xml:space, whitespace should be normalized xml4 = """ gets normalized """ @@ -218,15 +292,15 @@ end """ doc6 = XML.parse(XML.Node, xml6) # Both parent and child should preserve whitespace - @test contains(XML.value(doc6[1][1][1]), "parent text \n") - @test XML.value(doc6[1][1][2][1]) == " child text " + @test contains(XML.value(doc6[1][2][1]), "parent text \n") + @test XML.value(doc6[1][2][2][1]) == " child text " # Test 7: xml:space="default" overrides parent's "preserve" xml7 = """ normalized despite parent """ doc7 = XML.parse(XML.Node, xml7) - @test XML.value(doc7[1][1][1]) == "normalized despite parent" + @test XML.value(doc7[1][2][1]) == "normalized despite parent" end @testset "Nesting scenarios" begin @@ -241,15 +315,15 @@ end doc8 = XML.parse(XML.Node, xml8) # level1 should preserve (inherits from root) - level1_text = XML.value(doc8[1][1][1]) + level1_text = XML.value(doc8[1][2][1]) @test level1_text == " preserved \n " # level2 should normalize (explicit xml:space="default") - level2_text = XML.value(doc8[1][1][2][1]) + level2_text = XML.value(doc8[1][2][2][1]) @test level2_text == "normalized" # level3 should preserve (explicit xml:space="preserve") - level3_text = XML.value(doc8[1][1][2][2][1]) + level3_text = XML.value(doc8[1][2][2][2][1]) @test level3_text == " preserved again " # Test 9: repeated multiple levels of xml:space changes @@ -268,18 +342,239 @@ end doc9 = XML.parse(XML.Node, xml9) # level1b should preserve (inherits from root) - level1b_text = XML.value(doc9[1][2][1]) + level1b_text = XML.value(doc9[1][4][1]) @test level1b_text == " preserved b \n " # level2 should normalize (explicit xml:space="default") - level2b_text = XML.value(doc9[1][2][2][1]) + level2b_text = XML.value(doc9[1][4][2][1]) @test level2b_text == "normalized b" # level3 should preserve (explicit xml:space="preserve") - level3b_text = XML.value(doc9[1][2][2][2][1]) + level3b_text = XML.value(doc9[1][4][2][2][1]) @test level3b_text == " preserved again b " + # Test 10: futher repeated multiple levels of xml:space changes + xml10 = """ + normalized + normalized b + preserved + + + normalized c + preserved b + normalized again b + preserved c + + + + normalized d + """ + doc10 = XML.parse(XML.Node, xml10) + + # level1 should normalize (as root) + level1_text = XML.value(doc10[end][1][1]) + @test level1_text == "normalized" + + # level2 should normalize (as root and level1) + level2_text = XML.value(doc10[end][1][2][1]) + @test level2_text == "normalized b" + + # level3 should preserve (explicit xml:space="preserve") + level3_text = XML.value(doc10[end][1][2][2][1]) + @test level3_text == " preserved " + + # level1b should normalize (as root) + level1b_text = XML.value(doc10[end][2][1]) + @test level1b_text == "normalized c" + + # level2b should preserve (explicit xml:space="preserve") + level2b_text = XML.value(doc10[end][2][2][1]) + @test level2b_text == " preserved b \n " + + # level3 should normalize (explicit xml:space="default") + level3b_text = XML.value(doc10[end][2][2][2][1]) + @test level3b_text == "normalized again b" + + # level3c should preserve (inherited from level2b) + level3c_text = XML.value(doc10[end][2][2][4][1]) + @test level3c_text == " preserved c \n " + + # level1c should normalize (as root) + level1c_text = XML.value(doc10[end][3][1]) + @test level1c_text == "normalized d" + end + @testset "inter-element gap semantics" begin + # Default parent: gap between siblings should be dropped + s1 = """ x + y """ + d1 = XML.parse(XML.Node, s1) + @test length(d1[1]) == 2 + @test XML.value(d1[1][1][1]) == "x" + @test XML.value(d1[1][2][1]) == "y" + + # Preserve parent, default child ends: gap after default child dropped + s2 = """ +

keep

+ norm + after default gap +
""" + d2 = XML.parse(XML.Node, s2) + @test length(d2[1]) == 7 + @test XML.value(d2[1][1]) == "\n " + @test XML.value(d2[1][2][1]) == " keep " + @test XML.value(d2[1][3]) == "\n " + @test XML.value(d2[1][4][1]) == "norm" + @test XML.value(d2[1][5]) == "\n " + @test XML.value(d2[1][6][1]) == " after default gap " + @test XML.value(d2[1][7]) == "\n" end + +# @testset "XML whitespace vs Unicode whitespace" begin +# nbsp = "\u00A0" +# s = """ +# x\t\n +# $(nbsp) y $(nbsp) +# $(nbsp) z $(nbsp) +# """ +# d = XML.parse(XML.Node, s) +# @test XML.value(d[1][1][1]) == "x" +# @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)" +# @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)" +# end + + @testset "CDATA/Comment/PI boundaries" begin + s = """ + pre post + pre post + + """ + d = XML.parse(XML.Node, s) + @test XML.value(d[1][1][1]) == "pre" + @test nodetype(d[1][1][2]) == XML.CData + @test XML.value(d[1][1][3]) == "post" + @test XML.value(d[1][2][1]) == " pre " + @test nodetype(d[1][2][2]) == XML.Comment + @test XML.value(d[1][2][3]) == " post " + @test nodetype(d[1][3]) == XML.ProcessingInstruction + end + + @testset "nested toggles and sibling sequences" begin + s = """ + a + b + c + + d + e + + """ + d = XML.parse(XML.Node, s) + @test XML.value(d[1][2][1]) == " a \n " + @test XML.value(d[1][2][2][1]) == "b" + @test XML.value(d[1][2][2][2][1]) == " c " + @test d[1][2][4].tag == "y2" + @test XML.value(d[1][2][4][1]) == "d" + @test d[1][2][6].tag == "w" + @test XML.value(d[1][2][6][1]) == " e " + end + + @testset "root/document boundaries" begin + s = "\n \n a \n \t " + d = XML.parse(XML.Node, s) + @test length(d) == 1 + @test XML.value(d[1][1]) == "a" + end + +# @testset "entities expanding to whitespace" begin +# s = """ +# a +# b +#  c  +# """ +# d = XML.parse(XML.Node, s) +# @test XML.value(d[1][1][1]) == "a" +# @test XML.value(d[1][2][1]) == " b \n" +# @test XML.value(d[1][3][1]) == "\u00A0c\u00A0" +# end + + @testset "invalid values and placement" begin + s_bad = """ t """ + @test_throws ErrorException XML.parse(XML.Node, s_bad) + + s_pi = """ t """ + d = XML.parse(XML.Node, s_pi) + @test XML.value(d[end][1]) == "t" + + s_dup = """ t """ +# @test_throws ErrorException XML.parse(XML.Node, s_dup) + end + + @testset "prev()/next() symmetry" begin + xml = """ + a b c + d e f + i + """ + r = XML.parse(XML.LazyNode, xml).raw + toks=XML.Raw[] + while true + n = XML.next(r) + n === nothing && break + push!(toks, n) + r=n + end + back = XML.Raw[] + r = toks[end] + while true + p = XML.prev(r) + p === nothing && break + push!(back, p) + r = p + end + @test reverse(back)[2:end] == toks[1:end-1] + end + +# @testset "write/read roundtrip extremes" begin + # XML.write doesn't respect xml:space="preserve" in the current implementation so roundtrip isn't possible. +# xml = """ +#

+# +# r +# pre post +#
""" +# n = XML.parse(XML.Node, xml) +# io = IOBuffer(); XML.write(io, n) +# n2 = XML.parse(XML.Node, String(take!(io))) +# @test n == n2 +# @test XML.write(n2[1][1]) == "

" +# @test XML.write(n2[1][2]) == "" +# @test XML.value(n2[1][3][1]) == "r" +# @test XML.write(n2[1][4]) == " pre post " +# end + + @testset "self-closing/empty/whitespace-only children" begin + s = """ + + + + + x y + """ + d = XML.parse(XML.Node, s) + @test XML.write(d[1][1]) == "" + @test XML.write(d[1][2]) == "" + @test XML.value(d[1][3][1]) == " " + @test XML.value(d[1][5][1]) == "x" + @test XML.value(d[1][5][3]) == "y" + end + + @testset "allocation guard: small xml:space doc" begin + xml = " x y " + f() = XML.parse(XML.Node, xml) + a = @allocated f() + @test a < 500_000 # tune for CI + end + end #-----------------------------------------------------------------------------# roundtrip