@@ -5,7 +5,7 @@ using Mmap
5
5
using Tables
6
6
import AbstractTrees: AbstractTrees, children, parent
7
7
8
- export Node, RowNode,
8
+ export Node, RowNode, Children,
9
9
children, parent, nodetype, tag, attributes, value, depth, next, prev
10
10
11
11
# -----------------------------------------------------------------------------# escape/unescape
138
138
function get_name (data, i)
139
139
i = _name_start (data, i)
140
140
j = _name_stop (data, i)
141
- name = String (data[i: j- 1 ])
141
+ @views name = String (data[i: j- 1 ])
142
142
return name, j
143
143
end
144
144
@@ -155,13 +155,14 @@ function get_attributes(data, i)
155
155
i = findnext (x -> Char (x) === ' "' || Char (x) === ' '' , data, i)
156
156
quotechar = data[i]
157
157
i2 = findnext (== (quotechar), data, i + 1 )
158
- value = String (data[i+ 1 : i2- 1 ])
158
+ @views value = String (data[i+ 1 : i2- 1 ])
159
159
out[key] = value
160
160
i = _name_start (data, i2)
161
161
end
162
162
return out
163
163
end
164
164
165
+
165
166
# -----------------------------------------------------------------------------# interface
166
167
"""
167
168
nodetype(node) --> XML.NodeType
@@ -290,9 +291,7 @@ function next(o::RawData)
290
291
j = findnext (Vector {UInt8} (" ]]>" ), data, i)[end ]
291
292
elseif c3 === ' D'
292
293
type = RAW_DTD
293
- j = findnext (x -> x == UInt8 (' >' ), data, i)
294
- else
295
- error (" Should be unreachable. Unexpected typeen: $c$c2$c3 " )
294
+ j = findnext (== (UInt8 (' >' )), data, i)
296
295
end
297
296
elseif c2 === ' ?'
298
297
if get_name (data, i + 2 )[1 ] == " xml"
@@ -458,6 +457,33 @@ function prev(o::RowNode)
458
457
return RowNode (n)
459
458
end
460
459
460
+ # -----------------------------------------------------------------------------# FastNode
461
+ struct FastNode
462
+ nodetype:: NodeType
463
+ tag:: Union{Nothing, String}
464
+ attributes:: Union{Nothing, OrderedDict{String, String}}
465
+ value:: Union{Nothing, String}
466
+ children:: Union{Nothing, Vector{FastNode}}
467
+ depth:: Int
468
+ end
469
+ FastNode (file:: AbstractString ) = FastNode (RowNode (file))
470
+ FastNode (data:: RawData ) = FastNode (RowNode (data))
471
+
472
+ Base. show (io:: IO , o:: FastNode ) = _show_node (io, o)
473
+
474
+ function FastNode (node:: RowNode )
475
+ (;nodetype, tag, attributes, value, depth) = node
476
+ c = children (node)
477
+ FastNode (nodetype, tag, attributes, value, isempty (c) ? nothing : map (FastNode, c), depth)
478
+ end
479
+
480
+ children (o:: FastNode ) = o. children
481
+ Base. getindex (o:: FastNode , i:: Integer ) = o. children[i]
482
+ Base. setindex! (o:: FastNode , v, i:: Integer ) = (o. children[i] = v)
483
+ Base. lastindex (o:: FastNode ) = length (o. children)
484
+
485
+
486
+
461
487
# -----------------------------------------------------------------------------# Node
462
488
Base. @kwdef struct Node
463
489
nodetype:: NodeType
@@ -472,15 +498,16 @@ Node(file::AbstractString) = Node(RawData(file))
472
498
Node (data:: RawData ) = Node (RowNode (data))
473
499
474
500
function Node (node:: RowNode )
475
- o = Node (node. nodetype, node. tag, node. attributes, node. value, nothing , node. depth)
476
- children = XML. children (node)
477
- if ! isempty (children)
478
- o = Node (o; children= Node[])
479
- for child in children
480
- push! (o, Node (child))
501
+ (;nodetype, tag, attributes, value, depth) = node
502
+ c = XML. children (node)
503
+ if isempty (c)
504
+ return Node (; nodetype, tag, attributes, value, depth)
505
+ else
506
+ children = map (c) do child
507
+ Node (child)
481
508
end
509
+ return Node (; nodetype, tag, attributes, value, children, depth)
482
510
end
483
- return o
484
511
end
485
512
486
513
parse (x:: AbstractString , :: Type{Node} = Node) = Node (parse (x, RawData))
@@ -496,7 +523,6 @@ function (o::Node)(children...)
496
523
foreach (children) do x
497
524
push! (out, _node (x; depth= o. depth + 1 ))
498
525
end
499
-
500
526
Node (o; children= out)
501
527
end
502
528
@@ -597,12 +623,54 @@ end
597
623
598
624
599
625
626
+ # #-----------------------------------------------------------------------------# Children
627
+ # """
628
+ # Children(node)
629
+
630
+ # Iterator over the children of a node.
631
+ # """
632
+ # struct Children{T}
633
+ # parent::T
634
+ # end
635
+
636
+ # Base.IteratorSize(::Type{Children{T}}) where {T} = Base.SizeUnknown()
637
+ # Base.eltype(::Type{Children{T}}) where {T} = T
638
+
639
+ # function Base.iterate(o::Children{RawData}, state=o.parent)
640
+ # (;type, depth) = o.parent
641
+ # type === RAW_ELEMENT_OPEN || type === RAW_DOCUMENT || return nothing
642
+ # n = iterate(state, state)
643
+ # isnothing(n) && return nothing
644
+ # _, state = n
645
+ # state.type === RAW_ELEMENT_CLOSE && return iterate(o, state)
646
+ # state.depth == depth + 1 && return (state, state) # <-- only place we return a value
647
+ # state.depth == depth && return nothing
648
+ # type === RAW_DOCUMENT && state.depth == 2 && return nothing # early stop if we've seen the doc root already
649
+ # return iterate(o, state)
650
+ # end
651
+
652
+ # function Base.iterate(o::Children{RowNode}, state=getfield(o.parent, :data))
653
+ # n = iterate(Children(getfield(o.parent, :data)), state)
654
+ # isnothing(n) && return nothing
655
+ # item, state = n
656
+ # return RowNode(item), state
657
+ # end
658
+
659
+
600
660
601
661
602
662
603
663
# -----------------------------------------------------------------------------# !!! common !!!
604
664
# Everything below here is common to all data structures
605
665
666
+
667
+ nodetype (o) = o. nodetype
668
+ depth (o) = o. depth
669
+ tag (o) = o. tag
670
+ attributes (o) = o. attributes
671
+ value (o) = o. value
672
+
673
+
606
674
# -----------------------------------------------------------------------------# nodes_equal
607
675
function nodes_equal (a, b)
608
676
out = XML. tag (a) == XML. tag (b)
0 commit comments