@@ -5,7 +5,7 @@ using Mmap
55using Tables
66import AbstractTrees: AbstractTrees, children, parent
77
8- export Node, RowNode,
8+ export Node, RowNode, Children,
99 children, parent, nodetype, tag, attributes, value, depth, next, prev
1010
1111# -----------------------------------------------------------------------------# escape/unescape
138138function get_name (data, i)
139139 i = _name_start (data, i)
140140 j = _name_stop (data, i)
141- name = String (data[i: j- 1 ])
141+ @views name = String (data[i: j- 1 ])
142142 return name, j
143143end
144144
@@ -155,13 +155,14 @@ function get_attributes(data, i)
155155 i = findnext (x -> Char (x) === ' "' || Char (x) === ' '' , data, i)
156156 quotechar = data[i]
157157 i2 = findnext (== (quotechar), data, i + 1 )
158- value = String (data[i+ 1 : i2- 1 ])
158+ @views value = String (data[i+ 1 : i2- 1 ])
159159 out[key] = value
160160 i = _name_start (data, i2)
161161 end
162162 return out
163163end
164164
165+
165166# -----------------------------------------------------------------------------# interface
166167"""
167168 nodetype(node) --> XML.NodeType
@@ -290,9 +291,7 @@ function next(o::RawData)
290291 j = findnext (Vector {UInt8} (" ]]>" ), data, i)[end ]
291292 elseif c3 === ' D'
292293 type = RAW_DTD
293- j = findnext (x -> x == UInt8 (' >' ), data, i)
294- else
295- error (" Should be unreachable. Unexpected typeen: $c$c2$c3 " )
294+ j = findnext (== (UInt8 (' >' )), data, i)
296295 end
297296 elseif c2 === ' ?'
298297 if get_name (data, i + 2 )[1 ] == " xml"
@@ -458,6 +457,33 @@ function prev(o::RowNode)
458457 return RowNode (n)
459458end
460459
460+ # -----------------------------------------------------------------------------# FastNode
461+ struct FastNode
462+ nodetype:: NodeType
463+ tag:: Union{Nothing, String}
464+ attributes:: Union{Nothing, OrderedDict{String, String}}
465+ value:: Union{Nothing, String}
466+ children:: Union{Nothing, Vector{FastNode}}
467+ depth:: Int
468+ end
469+ FastNode (file:: AbstractString ) = FastNode (RowNode (file))
470+ FastNode (data:: RawData ) = FastNode (RowNode (data))
471+
472+ Base. show (io:: IO , o:: FastNode ) = _show_node (io, o)
473+
474+ function FastNode (node:: RowNode )
475+ (;nodetype, tag, attributes, value, depth) = node
476+ c = children (node)
477+ FastNode (nodetype, tag, attributes, value, isempty (c) ? nothing : map (FastNode, c), depth)
478+ end
479+
480+ children (o:: FastNode ) = o. children
481+ Base. getindex (o:: FastNode , i:: Integer ) = o. children[i]
482+ Base. setindex! (o:: FastNode , v, i:: Integer ) = (o. children[i] = v)
483+ Base. lastindex (o:: FastNode ) = length (o. children)
484+
485+
486+
461487# -----------------------------------------------------------------------------# Node
462488Base. @kwdef struct Node
463489 nodetype:: NodeType
@@ -472,15 +498,16 @@ Node(file::AbstractString) = Node(RawData(file))
472498Node (data:: RawData ) = Node (RowNode (data))
473499
474500function Node (node:: RowNode )
475- o = Node (node. nodetype, node. tag, node. attributes, node. value, nothing , node. depth)
476- children = XML. children (node)
477- if ! isempty (children)
478- o = Node (o; children= Node[])
479- for child in children
480- push! (o, Node (child))
501+ (;nodetype, tag, attributes, value, depth) = node
502+ c = XML. children (node)
503+ if isempty (c)
504+ return Node (; nodetype, tag, attributes, value, depth)
505+ else
506+ children = map (c) do child
507+ Node (child)
481508 end
509+ return Node (; nodetype, tag, attributes, value, children, depth)
482510 end
483- return o
484511end
485512
486513parse (x:: AbstractString , :: Type{Node} = Node) = Node (parse (x, RawData))
@@ -496,7 +523,6 @@ function (o::Node)(children...)
496523 foreach (children) do x
497524 push! (out, _node (x; depth= o. depth + 1 ))
498525 end
499-
500526 Node (o; children= out)
501527end
502528
@@ -597,12 +623,54 @@ end
597623
598624
599625
626+ # #-----------------------------------------------------------------------------# Children
627+ # """
628+ # Children(node)
629+
630+ # Iterator over the children of a node.
631+ # """
632+ # struct Children{T}
633+ # parent::T
634+ # end
635+
636+ # Base.IteratorSize(::Type{Children{T}}) where {T} = Base.SizeUnknown()
637+ # Base.eltype(::Type{Children{T}}) where {T} = T
638+
639+ # function Base.iterate(o::Children{RawData}, state=o.parent)
640+ # (;type, depth) = o.parent
641+ # type === RAW_ELEMENT_OPEN || type === RAW_DOCUMENT || return nothing
642+ # n = iterate(state, state)
643+ # isnothing(n) && return nothing
644+ # _, state = n
645+ # state.type === RAW_ELEMENT_CLOSE && return iterate(o, state)
646+ # state.depth == depth + 1 && return (state, state) # <-- only place we return a value
647+ # state.depth == depth && return nothing
648+ # type === RAW_DOCUMENT && state.depth == 2 && return nothing # early stop if we've seen the doc root already
649+ # return iterate(o, state)
650+ # end
651+
652+ # function Base.iterate(o::Children{RowNode}, state=getfield(o.parent, :data))
653+ # n = iterate(Children(getfield(o.parent, :data)), state)
654+ # isnothing(n) && return nothing
655+ # item, state = n
656+ # return RowNode(item), state
657+ # end
658+
659+
600660
601661
602662
603663# -----------------------------------------------------------------------------# !!! common !!!
604664# Everything below here is common to all data structures
605665
666+
667+ nodetype (o) = o. nodetype
668+ depth (o) = o. depth
669+ tag (o) = o. tag
670+ attributes (o) = o. attributes
671+ value (o) = o. value
672+
673+
606674# -----------------------------------------------------------------------------# nodes_equal
607675function nodes_equal (a, b)
608676 out = XML. tag (a) == XML. tag (b)
0 commit comments