diff --git a/src/raw.jl b/src/raw.jl
index f0ae362..8b77bba 100644
--- a/src/raw.jl
+++ b/src/raw.jl
@@ -16,23 +16,18 @@
RawDeclaration, RawDTD, RawElementOpen, RawElementClose, RawElementSelfClosed)
@inline nodetype(x::RawType) =
- x === RawElementOpen ? Element :
- x === RawElementClose ? Element :
- x === RawElementSelfClosed ? Element :
- x === RawText ? Text :
- x === RawComment ? Comment :
- x === RawCData ? CData :
- x === RawDeclaration ? Declaration :
- x === RawDTD ? DTD :
- x === RawProcessingInstruction ? ProcessingInstruction :
- x === RawDocument ? Document :
+ x === RawElementOpen ? Element :
+ x === RawElementClose ? Element :
+ x === RawElementSelfClosed ? Element :
+ x === RawText ? Text :
+ x === RawComment ? Comment :
+ x === RawCData ? CData :
+ x === RawDeclaration ? Declaration :
+ x === RawDTD ? DTD :
+ x === RawProcessingInstruction ? ProcessingInstruction :
+ x === RawDocument ? Document :
nothing
-#struct XMLSpaceContext
-# preserve_space::Vector{Bool} # Stack to track xml:space state
-#end
-#XMLSpaceContext() = XMLSpaceContext([false]) # Default is not preserving
-
#-----------------------------------------------------------------------------# Raw
"""
Raw(filename::String)
@@ -69,31 +64,54 @@ struct Raw
pos::Int
len::Int
data::Vector{UInt8}
- ctx::Vector{Bool} # Context for xml:space (Vector so mutable)
+ ctx::Vector{Bool} # Context for xml:space (Vector to support inheritance of context)
+ has_xml_space::Bool # Whether data contains `xml:space` attribute at least once
+end
+function Raw(data::Vector{UInt8})#, ctx::Vector{Bool}=Bool[false])
+ needle = Vector{UInt8}("xml:space")
+ has_xml_space = findfirst(needle, data) !== nothing
+ return Raw(RawDocument, 0, 0, 0, data, [false], has_xml_space)
+end
+function Raw(data::Vector{UInt8}, has_xml_space::Bool, ctx::Vector{Bool}=Bool[false])
+ return Raw(RawDocument, 0, 0, 0, data, ctx, has_xml_space)
end
-Raw(data::Vector{UInt8}, ctx=[false]) = Raw(RawDocument, 0, 0, 0, data, ctx)
+const _RAW_INDEX = WeakKeyDict{Vector{UInt8}, Any}()
+
+struct _TokRec
+ type::RawType
+ depth::Int
+ pos::Int
+ len::Int
+ ctx::Vector{Bool}
+end
+
+mutable struct _Index
+ recs::Vector{_TokRec}
+ last_raw::Raw
+ built_end::Int
+end
Base.read(filename::String, ::Type{Raw}) = isfile(filename) ?
- Raw(Mmap.mmap(filename)) :
- error("File \"$filename\" does not exist.")
+ Raw(Mmap.mmap(filename)) :
+ error("File \"$filename\" does not exist.")
Base.read(io::IO, ::Type{Raw}) = Raw(read(io))
Base.parse(x::AbstractString, ::Type{Raw}) = Raw(Vector{UInt8}(x))
# Mostly for debugging
-Base.peek(o::Raw, n::Int) = String(@view(o.data[o.pos + o.len + 1:min(end, o.pos + o.len + n + 1)]))
+Base.peek(o::Raw, n::Int) = String(view(o.data[o.pos+o.len+1:min(end, o.pos + o.len + n + 1)]))
function Base.show(io::IO, o::Raw)
print(io, o.type, ':', o.depth, " (pos=", o.pos, ", len=", o.len, ")")
o.len > 0 && printstyled(io, ": ", String(o); color=:light_green)
end
function Base.:(==)(a::Raw, b::Raw)
- a.type == b.type && a.depth == b.depth && a.pos == b.pos && a.len == b.len && a.data === b.data
+ a.type == b.type && a.depth == b.depth && a.pos == b.pos && a.len == b.len && a.data === b.data && a.ctx == b.ctx && a.has_xml_space == b.has_xml_space
end
-Base.view(o::Raw) = view(o.data, o.pos:o.pos + o.len)
+Base.view(o::Raw) = view(o.data, o.pos:o.pos+o.len)
Base.String(o::Raw) = String(view(o))
Base.IteratorSize(::Type{Raw}) = Base.SizeUnknown()
@@ -125,9 +143,10 @@ end
function get_attributes(data, i, j)
i = name_start(data, i)
(isnothing(j) || isnothing(i) || i > j) && return nothing
- out = OrderedDict{String, String}()
+ out = OrderedDict{String,String}()
while !isnothing(i) && i < j
key, i = get_name(data, i)
+ #haskey(out, key) && error("Duplicate attribute name found: $key") # would this be useful?
# get quotechar the value is wrapped in (either ' or ")
i = findnext(x -> x === UInt8('"') || x === UInt8('''), data, i + 1)
quotechar = data[i]
@@ -139,6 +158,74 @@ function get_attributes(data, i, j)
return out
end
+# ----------------------------------------------------------------------------# Utilities supporting prev
+function _get_or_init_index(o::Raw)
+ idx = get(_RAW_INDEX, o.data, nothing)
+ if idx === nothing
+ start = Raw(o.data) # fresh RawDocument
+ _RAW_INDEX[o.data] = _Index(_TokRec[], start, 0)
+ idx = _RAW_INDEX[o.data]
+ end
+ return idx
+end
+function _ensure_index_upto!(o::Raw, target_pos::Int)
+ idx = _get_or_init_index(o)
+ r = idx.last_raw
+ while true
+ n = next(r)
+ if n === nothing
+ idx.built_end = typemax(Int)
+ idx.last_raw = r
+ return idx
+ end
+ push!(idx.recs, _TokRec(n.type, n.depth, n.pos, n.len, copy(n.ctx)))
+ endpos = n.pos + n.len
+ idx.built_end = endpos
+ idx.last_raw = n
+ r = n
+ if endpos >= target_pos
+ return idx
+ end
+ end
+end
+function _find_prev_token(recs::Vector{_TokRec}, p::Int)
+ lo, hi = 1, length(recs)
+ ans = 0
+ while lo <= hi
+ mid = (lo + hi) >>> 1
+ endpos = recs[mid].pos + recs[mid].len
+ if endpos < p + 1
+ ans = mid
+ lo = mid + 1
+ else
+ hi = mid - 1
+ end
+ end
+ return ans == 0 ? nothing : recs[ans]
+end
+
+#-----------------------------------------------------------------------------# update xml:space context
+# check attributes for xml:space and update ctx if necessary
+function get_ctx(o)
+ att = attributes(o)
+ if !isnothing(att) && haskey(att, "xml:space")
+ if att["xml:space"] == "preserve"
+ return true
+ elseif att["xml:space"] == "default"
+ return false
+ else
+ error("Invalid value for xml:space attribute: $(att["xml:space"]). Must be 'preserve' or 'default'.")
+ end
+ end
+ return nothing
+end
+function update_ctx!(ctx, o)
+ new_ctx = get_ctx(o)
+ if new_ctx !== nothing
+ ctx[end] = new_ctx
+ end
+ return nothing
+end
#-----------------------------------------------------------------------------# interface
"""
@@ -168,18 +255,7 @@ function attributes(o::Raw)
i = o.pos
i = name_start(o.data, i)
i = name_stop(o.data, i)
- out=get_attributes(o.data, i + 1, o.pos + o.len)
- if o.type === RawElementOpen && !isnothing(out) && haskey(out, "xml:space")
- # If xml:space attribute is present, we need to preserve whitespace
- if out["xml:space"] == "preserve"
- o.ctx[1]= true
- elseif out["xml:space"] == "default"
- o.ctx[1] = false
- else
- error("Invalid value for xml:space attribute: $(out["xml:space"]). Must be 'preserve' or 'default'.")
- end
- end
- out
+ get_attributes(o.data, i + 1, o.pos + o.len)
elseif o.type === RawDeclaration
get_attributes(o.data, o.pos + 6, o.pos + o.len)
else
@@ -196,11 +272,11 @@ function value(o::Raw)
if o.type === RawText
String(o)
elseif o.type === RawCData
- String(view(o.data, o.pos + length(" 1 ? Char(o.data[i-1]) : Char('<')
+ c = Char(o.data[i])
+ d = Char(o.data[k+1])
+ if c !== '<' || ctx[end] && c === '<' && b === ' ' && last_type === RawElementOpen && d === '/'
type = RawText
j = findnext(==(UInt8('<')), data, i) - 1
- j = (ctx[1]) ? j : findprev(!isspace, data, j) # preserving whitespace if needed
+ j = ctx[end] ? j : findprev(!isspace, data, j) # preserving whitespace if needed
+ if last_type === RawElementClose || last_type === RawElementSelfClosed|| last_type === RawDocument
+ # Maybe drop pure-whitespace inter-element text nodes?
+ # (e.g. whitespace between a closing and an opening tag which would otherwise make an orphan text node)
+ #if all(isspace, @view data[i:j]) && depth > 1
+ # return next(Raw(type, depth, j, 0, data, ctx, has_xml_space))
+ #end
+ end
else
- i=k
- j=k+1
+ i = k
+ j = k + 1
if c === '<'
- c2 = Char(o.data[i + 1])
+ c2 = Char(o.data[i+1])
if c2 === '!'
- c3 = Char(o.data[i + 2])
+ c3 = Char(o.data[i+2])
if c3 === '-'
type = RawComment
j = findnext(Vector{UInt8}("-->"), data, i)[end]
@@ -301,7 +393,7 @@ function next(o::Raw)
elseif c3 === 'D' || c3 == 'd'
type = RawDTD
j = findnext(==(UInt8('>')), data, i)
- while sum(==(UInt8('>')), data[k:j]) != sum(==(UInt8('<')), data[i:j])
+ while sum(==(UInt8('>')), @view data[k:j]) != sum(==(UInt8('<')), @view data[i:j])
j = findnext(==(UInt8('>')), data, j + 1)
end
end
@@ -315,18 +407,79 @@ function next(o::Raw)
elseif c2 === '/'
type = RawElementClose
depth -= 1
+ pop!(ctx) # revert to parent xml:space context
j = findnext(==(UInt8('>')), data, i)
else
j = findnext(==(UInt8('>')), data, i)
if data[j-1] === UInt8('/')
type = RawElementSelfClosed
else
- type = RawElementOpen
+ type = RawElementOpen
+ end
+ end
+ end
+ end
+ return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
+end
+#
+function next_no_xml_space(o::Raw) # same as v0.3.5
+ i = o.pos + o.len + 1
+ depth = o.depth
+ data = o.data
+ type = o.type
+ has_xml_space = o.has_xml_space
+ ctx = [false]
+ i = findnext(!isspace, data, i)
+ if isnothing(i)
+ return nothing
+ end
+ if type === RawElementOpen || type === RawDocument
+ depth += 1
+ end
+ c = Char(o.data[i])
+ d = Char(o.data[i+1])
+ if c !== '<'
+ type = RawText
+ j = findnext(==(UInt8('<')), data, i) - 1
+ j = findprev(!isspace, data, j) # "rstrip"
+ elseif c === '<'
+ c2 = Char(o.data[i+1])
+ if c2 === '!'
+ c3 = Char(o.data[i+2])
+ if c3 === '-'
+ type = RawComment
+ j = findnext(Vector{UInt8}("-->"), data, i)[end]
+ elseif c3 === '['
+ type = RawCData
+ j = findnext(Vector{UInt8}("]]>"), data, i)[end]
+ elseif c3 === 'D' || c3 == 'd'
+ type = RawDTD
+ j = findnext(==(UInt8('>')), data, i)
+ while sum(==(UInt8('>')), @view data[i:j]) != sum(==(UInt8('<')), @view data[i:j])
+ j = findnext(==(UInt8('>')), data, j + 1)
end
end
+ elseif c2 === '?'
+ if get_name(data, i + 2)[1] == "xml"
+ type = RawDeclaration
+ else
+ type = RawProcessingInstruction
+ end
+ j = findnext(Vector{UInt8}("?>"), data, i)[end]
+ elseif c2 === '/'
+ type = RawElementClose
+ depth -= 1
+ j = findnext(==(UInt8('>')), data, i)
+ else
+ j = findnext(==(UInt8('>')), data, i)
+ if data[j-1] === UInt8('/')
+ type = RawElementSelfClosed
+ else
+ type = RawElementOpen
+ end
end
end
- return Raw(type, depth, i, j - i, data, ctx)
+ return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
end
#-----------------------------------------------------------------------------# prev Raw
@@ -336,64 +489,76 @@ end
Return the previous node in the document during depth-first traversal. Not defined for `XML.Node`.
"""
function prev(o::Raw)
+ if o.has_xml_space # using xml:space context at least once in data
+ return prev_xml_space(o)
+ else # not using xml:space context at all (same as v0.3.5)
+ return prev_no_xml_space(o)
+ end
+end
+
+function prev_xml_space(o::Raw)
+ o.type === RawDocument && return nothing
+
+ idx = _ensure_index_upto!(o, o.pos - 1)
+ rec = _find_prev_token(idx.recs, o.pos - 1)
+ if rec === nothing
+ return Raw(o.data, o.has_xml_space, copy(o.ctx))
+ end
+ return Raw(rec.type, rec.depth, rec.pos, rec.len, o.data, copy(rec.ctx), o.has_xml_space)
+end
+function prev_no_xml_space(o::Raw) # same as v0.3.5
depth = o.depth
data = o.data
type = o.type
- ctx = o.ctx
+ has_xml_space = o.has_xml_space
+ ctx = has_xml_space ? copy(o.ctx) : [false]
type === RawDocument && return nothing
j = o.pos - 1
- k = findprev(!isspace, data, j)
- if isnothing(k) || length(String(o.data[o.pos + o.len + 1:end]))==0
- return Raw(data, ctx) # RawDocument
+ j = findprev(!isspace, data, j)
+ if isnothing(j)
+ return Raw(data, has_xml_space, ctx) # RawDocument
end
- j = (ctx[1]) ? j : k
c = Char(o.data[j])
- d = Char(data[findprev(==(UInt8('<')), data, j)+1])
- i = j - 1
next_type = type
- if c !== '>' || type === RawElementClose && d !== '/' && (ctx[1]) # text or empty whitespace
+ if c !== '>' # text
type = RawText
- i=findprev(==(UInt8('>')), data, j) + 1
- i = (ctx[1]) ? i : findprev(!isspace, data, i) # If preserving whitespace, retain leading and trailing whitespace
- else
- j=k
- i=k-1
- if c === '>'
- c2 = Char(o.data[j - 1])
- if c2 === '-'
- type = RawComment
- i = findprev(Vector{UInt8}("<--"), data, j)[1]
- elseif c2 === ']'
- type = RawCData
- i = findprev(Vector{UInt8}("')), data, j) + 1
+ i = findnext(!isspace, data, i) # "lstrip"
+ elseif c === '>'
+ c2 = Char(o.data[j-1])
+ if c2 === '-'
+ type = RawComment
+ i = findprev(Vector{UInt8}("<--"), data, j)[1]
+ elseif c2 === ']'
+ type = RawCData
+ i = findprev(Vector{UInt8}(".")
- end
- end
+ type = RawProcessingInstruction
+ end
else
- error("Unreachable reached in XML.prev")
+ i = findprev(==(UInt8('<')), data, j)
+ char = Char(data[i+1])
+ if char === '/'
+ type = RawElementClose
+ elseif char === '!'
+ type = DTD
+ elseif isletter(char) || char === '_'
+ type = Char(o.data[j-2]) === '/' ? RawElementSelfClosed : RawElementOpen
+ else
+ error("Should be unreachable. Unexpected data: <$char ... $c3$c2$c1>.")
+ end
end
+ else
+ error("Unreachable reached in XML.prev")
end
if type !== RawElementOpen && next_type === RawElementClose
depth += 1
- elseif type == RawElementOpen && next_type !== RawElementClose
+ elseif type === RawElementOpen && next_type !== RawElementClose
depth -= 1
end
- return Raw(type, depth, i, j - i, data, ctx)
+ return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
end
diff --git a/test/runtests.jl b/test/runtests.jl
index 2418c54..f329c4a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -116,7 +116,7 @@ end
@test String(doc[end]) == ""
@testset "next and prev" begin
- @test XML.prev(doc[1]) === data
+ @test XML.prev(doc[1]) == data # can't use === here because prev returns a copy of ctx
@test prev(data) === nothing
@test XML.next(doc[end]) === nothing
@@ -132,6 +132,76 @@ end
for (a,b) in zip(next_res, prev_res)
@test a == b
end
+
+ lzxml = """ hello hello preserve """
+ lz = XML.parse(XML.LazyNode, lzxml)
+ n=XML.next(lz)
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == ""
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == "hello"
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == "hello"
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == "\n hello \n preserve \n"
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == "hello"
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == "\n hello \n preserve \n"
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == " hello "
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == " preserve "
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == " preserve "
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == ""
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == ""
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == ""
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == " preserve "
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == " preserve "
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == " hello "
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == "\n hello \n preserve \n"
+ n=XML.next(n)
+ text_content = XML.write(n)
+ @test text_content == " hello "
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == "\n hello \n preserve \n"
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == "hello"
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == "hello"
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == ""
+ n=XML.prev(n)
+ text_content = XML.write(n)
+ @test text_content == "\n \n hello\n \n hello \n preserve \n \n \n \n"
end
@testset "depth and parent" begin
@@ -190,11 +260,15 @@ end
text_content = XML.value(doc2[1][1][1])
@test text_content == " leading and trailing spaces "
- # Test 3: Without xml:space, entirely empty whitespace should create a self closing node
- xml3 = """ """
+ # Test 3: Entirely empty tags with and without xml:space="preserve" become self-closing
+ xml3 = """ """
doc3 = XML.parse(XML.Node, xml3)
text_content = XML.write(doc3[1][1])
- @test text_content == ""
+ @test text_content == "" # without xml:space="preserve", empty text becomes self-closing
+ text_content = XML.value(doc3[1][2][1])
+ @test text_content == " " # with xml:space, whitespace is preserved
+ text_content = XML.write(doc3[1][3])
+ @test text_content == "" # with xml:space="preserve", empty text becomes self-closing
# Test 4: Without xml:space, whitespace should be normalized
xml4 = """ gets normalized """
@@ -218,15 +292,15 @@ end
"""
doc6 = XML.parse(XML.Node, xml6)
# Both parent and child should preserve whitespace
- @test contains(XML.value(doc6[1][1][1]), "parent text \n")
- @test XML.value(doc6[1][1][2][1]) == " child text "
+ @test contains(XML.value(doc6[1][2][1]), "parent text \n")
+ @test XML.value(doc6[1][2][2][1]) == " child text "
# Test 7: xml:space="default" overrides parent's "preserve"
xml7 = """
normalized despite parent
"""
doc7 = XML.parse(XML.Node, xml7)
- @test XML.value(doc7[1][1][1]) == "normalized despite parent"
+ @test XML.value(doc7[1][2][1]) == "normalized despite parent"
end
@testset "Nesting scenarios" begin
@@ -241,15 +315,15 @@ end
doc8 = XML.parse(XML.Node, xml8)
# level1 should preserve (inherits from root)
- level1_text = XML.value(doc8[1][1][1])
+ level1_text = XML.value(doc8[1][2][1])
@test level1_text == " preserved \n "
# level2 should normalize (explicit xml:space="default")
- level2_text = XML.value(doc8[1][1][2][1])
+ level2_text = XML.value(doc8[1][2][2][1])
@test level2_text == "normalized"
# level3 should preserve (explicit xml:space="preserve")
- level3_text = XML.value(doc8[1][1][2][2][1])
+ level3_text = XML.value(doc8[1][2][2][2][1])
@test level3_text == " preserved again "
# Test 9: repeated multiple levels of xml:space changes
@@ -268,18 +342,239 @@ end
doc9 = XML.parse(XML.Node, xml9)
# level1b should preserve (inherits from root)
- level1b_text = XML.value(doc9[1][2][1])
+ level1b_text = XML.value(doc9[1][4][1])
@test level1b_text == " preserved b \n "
# level2 should normalize (explicit xml:space="default")
- level2b_text = XML.value(doc9[1][2][2][1])
+ level2b_text = XML.value(doc9[1][4][2][1])
@test level2b_text == "normalized b"
# level3 should preserve (explicit xml:space="preserve")
- level3b_text = XML.value(doc9[1][2][2][2][1])
+ level3b_text = XML.value(doc9[1][4][2][2][1])
@test level3b_text == " preserved again b "
+ # Test 10: futher repeated multiple levels of xml:space changes
+ xml10 = """
+ normalized
+ normalized b
+ preserved
+
+
+ normalized c
+ preserved b
+ normalized again b
+ preserved c
+
+
+
+ normalized d
+ """
+ doc10 = XML.parse(XML.Node, xml10)
+
+ # level1 should normalize (as root)
+ level1_text = XML.value(doc10[end][1][1])
+ @test level1_text == "normalized"
+
+ # level2 should normalize (as root and level1)
+ level2_text = XML.value(doc10[end][1][2][1])
+ @test level2_text == "normalized b"
+
+ # level3 should preserve (explicit xml:space="preserve")
+ level3_text = XML.value(doc10[end][1][2][2][1])
+ @test level3_text == " preserved "
+
+ # level1b should normalize (as root)
+ level1b_text = XML.value(doc10[end][2][1])
+ @test level1b_text == "normalized c"
+
+ # level2b should preserve (explicit xml:space="preserve")
+ level2b_text = XML.value(doc10[end][2][2][1])
+ @test level2b_text == " preserved b \n "
+
+ # level3 should normalize (explicit xml:space="default")
+ level3b_text = XML.value(doc10[end][2][2][2][1])
+ @test level3b_text == "normalized again b"
+
+ # level3c should preserve (inherited from level2b)
+ level3c_text = XML.value(doc10[end][2][2][4][1])
+ @test level3c_text == " preserved c \n "
+
+ # level1c should normalize (as root)
+ level1c_text = XML.value(doc10[end][3][1])
+ @test level1c_text == "normalized d"
+ end
+ @testset "inter-element gap semantics" begin
+ # Default parent: gap between siblings should be dropped
+ s1 = """ x
+ y """
+ d1 = XML.parse(XML.Node, s1)
+ @test length(d1[1]) == 2
+ @test XML.value(d1[1][1][1]) == "x"
+ @test XML.value(d1[1][2][1]) == "y"
+
+ # Preserve parent, default child ends: gap after default child dropped
+ s2 = """
+ keep
+ norm
+ after default gap
+ """
+ d2 = XML.parse(XML.Node, s2)
+ @test length(d2[1]) == 7
+ @test XML.value(d2[1][1]) == "\n "
+ @test XML.value(d2[1][2][1]) == " keep "
+ @test XML.value(d2[1][3]) == "\n "
+ @test XML.value(d2[1][4][1]) == "norm"
+ @test XML.value(d2[1][5]) == "\n "
+ @test XML.value(d2[1][6][1]) == " after default gap "
+ @test XML.value(d2[1][7]) == "\n"
end
+
+# @testset "XML whitespace vs Unicode whitespace" begin
+# nbsp = "\u00A0"
+# s = """
+# x\t\n
+# $(nbsp) y $(nbsp)
+# $(nbsp) z $(nbsp)
+# """
+# d = XML.parse(XML.Node, s)
+# @test XML.value(d[1][1][1]) == "x"
+# @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)"
+# @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)"
+# end
+
+ @testset "CDATA/Comment/PI boundaries" begin
+ s = """
+ pre post
+ pre post
+
+ """
+ d = XML.parse(XML.Node, s)
+ @test XML.value(d[1][1][1]) == "pre"
+ @test nodetype(d[1][1][2]) == XML.CData
+ @test XML.value(d[1][1][3]) == "post"
+ @test XML.value(d[1][2][1]) == " pre "
+ @test nodetype(d[1][2][2]) == XML.Comment
+ @test XML.value(d[1][2][3]) == " post "
+ @test nodetype(d[1][3]) == XML.ProcessingInstruction
+ end
+
+ @testset "nested toggles and sibling sequences" begin
+ s = """
+ a
+ b
+ c
+
+ d
+ e
+
+ """
+ d = XML.parse(XML.Node, s)
+ @test XML.value(d[1][2][1]) == " a \n "
+ @test XML.value(d[1][2][2][1]) == "b"
+ @test XML.value(d[1][2][2][2][1]) == " c "
+ @test d[1][2][4].tag == "y2"
+ @test XML.value(d[1][2][4][1]) == "d"
+ @test d[1][2][6].tag == "w"
+ @test XML.value(d[1][2][6][1]) == " e "
+ end
+
+ @testset "root/document boundaries" begin
+ s = "\n \n a \n \t "
+ d = XML.parse(XML.Node, s)
+ @test length(d) == 1
+ @test XML.value(d[1][1]) == "a"
+ end
+
+# @testset "entities expanding to whitespace" begin
+# s = """
+# a
+# b
+# c
+# """
+# d = XML.parse(XML.Node, s)
+# @test XML.value(d[1][1][1]) == "a"
+# @test XML.value(d[1][2][1]) == " b \n"
+# @test XML.value(d[1][3][1]) == "\u00A0c\u00A0"
+# end
+
+ @testset "invalid values and placement" begin
+ s_bad = """ t """
+ @test_throws ErrorException XML.parse(XML.Node, s_bad)
+
+ s_pi = """ t """
+ d = XML.parse(XML.Node, s_pi)
+ @test XML.value(d[end][1]) == "t"
+
+ s_dup = """ t """
+# @test_throws ErrorException XML.parse(XML.Node, s_dup)
+ end
+
+ @testset "prev()/next() symmetry" begin
+ xml = """
+ a b c
+ d e f
+ i
+ """
+ r = XML.parse(XML.LazyNode, xml).raw
+ toks=XML.Raw[]
+ while true
+ n = XML.next(r)
+ n === nothing && break
+ push!(toks, n)
+ r=n
+ end
+ back = XML.Raw[]
+ r = toks[end]
+ while true
+ p = XML.prev(r)
+ p === nothing && break
+ push!(back, p)
+ r = p
+ end
+ @test reverse(back)[2:end] == toks[1:end-1]
+ end
+
+# @testset "write/read roundtrip extremes" begin
+ # XML.write doesn't respect xml:space="preserve" in the current implementation so roundtrip isn't possible.
+# xml = """
+#
+#
+# r
+# pre post
+# """
+# n = XML.parse(XML.Node, xml)
+# io = IOBuffer(); XML.write(io, n)
+# n2 = XML.parse(XML.Node, String(take!(io)))
+# @test n == n2
+# @test XML.write(n2[1][1]) == "
"
+# @test XML.write(n2[1][2]) == ""
+# @test XML.value(n2[1][3][1]) == "r"
+# @test XML.write(n2[1][4]) == " pre post "
+# end
+
+ @testset "self-closing/empty/whitespace-only children" begin
+ s = """
+
+
+
+
+ x y
+ """
+ d = XML.parse(XML.Node, s)
+ @test XML.write(d[1][1]) == ""
+ @test XML.write(d[1][2]) == ""
+ @test XML.value(d[1][3][1]) == " "
+ @test XML.value(d[1][5][1]) == "x"
+ @test XML.value(d[1][5][3]) == "y"
+ end
+
+ @testset "allocation guard: small xml:space doc" begin
+ xml = " x y "
+ f() = XML.parse(XML.Node, xml)
+ a = @allocated f()
+ @test a < 500_000 # tune for CI
+ end
+
end
#-----------------------------------------------------------------------------# roundtrip