Skip to content

Commit abf802b

Browse files
committed
wip
1 parent ebba9ec commit abf802b

File tree

3 files changed

+84
-15
lines changed

3 files changed

+84
-15
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ cdata("hello > < ' \" I have odd characters")
8484
### `XML.RawData`
8585
- A super lazy data structure that holds the reference `Vector{UInt8}` data along with position/length to read from.
8686

87+
8788
## Reading
8889

8990
```julia

benchmarks/suite.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ using BenchmarkTools
55

66

77
file = download("http://schemas.opengis.net/kml/2.2.0/ogckml22.xsd")
8-
filename = tempname()
98

109
#-----------------------------------------------------------------------------# Read
10+
@info "XML.FastNode" @benchmark XML.FastNode($file)
1111
@info "XML.Node" @benchmark Node($file)
1212
@info "XML.RowNode" @benchmark XML.RowNode($file)
1313
@info "EzXML.readxml" @benchmark EzXML.readxml($file)

src/XML.jl

Lines changed: 82 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ using Mmap
55
using Tables
66
import AbstractTrees: AbstractTrees, children, parent
77

8-
export Node, RowNode,
8+
export Node, RowNode, Children,
99
children, parent, nodetype, tag, attributes, value, depth, next, prev
1010

1111
#-----------------------------------------------------------------------------# escape/unescape
@@ -138,7 +138,7 @@ end
138138
function get_name(data, i)
139139
i = _name_start(data, i)
140140
j = _name_stop(data, i)
141-
name = String(data[i:j-1])
141+
@views name = String(data[i:j-1])
142142
return name, j
143143
end
144144

@@ -155,13 +155,14 @@ function get_attributes(data, i)
155155
i = findnext(x -> Char(x) === '"' || Char(x) === ''', data, i)
156156
quotechar = data[i]
157157
i2 = findnext(==(quotechar), data, i + 1)
158-
value = String(data[i+1:i2-1])
158+
@views value = String(data[i+1:i2-1])
159159
out[key] = value
160160
i = _name_start(data, i2)
161161
end
162162
return out
163163
end
164164

165+
165166
#-----------------------------------------------------------------------------# interface
166167
"""
167168
nodetype(node) --> XML.NodeType
@@ -290,9 +291,7 @@ function next(o::RawData)
290291
j = findnext(Vector{UInt8}("]]>"), data, i)[end]
291292
elseif c3 === 'D'
292293
type = RAW_DTD
293-
j = findnext(x -> x == UInt8('>'), data, i)
294-
else
295-
error("Should be unreachable. Unexpected typeen: $c$c2$c3")
294+
j = findnext(==(UInt8('>')), data, i)
296295
end
297296
elseif c2 === '?'
298297
if get_name(data, i + 2)[1] == "xml"
@@ -458,6 +457,33 @@ function prev(o::RowNode)
458457
return RowNode(n)
459458
end
460459

460+
#-----------------------------------------------------------------------------# FastNode
461+
struct FastNode
462+
nodetype::NodeType
463+
tag::Union{Nothing, String}
464+
attributes::Union{Nothing, OrderedDict{String, String}}
465+
value::Union{Nothing, String}
466+
children::Union{Nothing, Vector{FastNode}}
467+
depth::Int
468+
end
469+
FastNode(file::AbstractString) = FastNode(RowNode(file))
470+
FastNode(data::RawData) = FastNode(RowNode(data))
471+
472+
Base.show(io::IO, o::FastNode) = _show_node(io, o)
473+
474+
function FastNode(node::RowNode)
475+
(;nodetype, tag, attributes, value, depth) = node
476+
c = children(node)
477+
FastNode(nodetype, tag, attributes, value, isempty(c) ? nothing : map(FastNode, c), depth)
478+
end
479+
480+
children(o::FastNode) = o.children
481+
Base.getindex(o::FastNode, i::Integer) = o.children[i]
482+
Base.setindex!(o::FastNode, v, i::Integer) = (o.children[i] = v)
483+
Base.lastindex(o::FastNode) = length(o.children)
484+
485+
486+
461487
#-----------------------------------------------------------------------------# Node
462488
Base.@kwdef struct Node
463489
nodetype::NodeType
@@ -472,15 +498,16 @@ Node(file::AbstractString) = Node(RawData(file))
472498
Node(data::RawData) = Node(RowNode(data))
473499

474500
function Node(node::RowNode)
475-
o = Node(node.nodetype, node.tag, node.attributes, node.value, nothing, node.depth)
476-
children = XML.children(node)
477-
if !isempty(children)
478-
o = Node(o; children=Node[])
479-
for child in children
480-
push!(o, Node(child))
501+
(;nodetype, tag, attributes, value, depth) = node
502+
c = XML.children(node)
503+
if isempty(c)
504+
return Node(; nodetype, tag, attributes, value, depth)
505+
else
506+
children = map(c) do child
507+
Node(child)
481508
end
509+
return Node(; nodetype, tag, attributes, value, children, depth)
482510
end
483-
return o
484511
end
485512

486513
parse(x::AbstractString, ::Type{Node} = Node) = Node(parse(x, RawData))
@@ -496,7 +523,6 @@ function (o::Node)(children...)
496523
foreach(children) do x
497524
push!(out, _node(x; depth=o.depth + 1))
498525
end
499-
500526
Node(o; children=out)
501527
end
502528

@@ -597,12 +623,54 @@ end
597623

598624

599625

626+
# #-----------------------------------------------------------------------------# Children
627+
# """
628+
# Children(node)
629+
630+
# Iterator over the children of a node.
631+
# """
632+
# struct Children{T}
633+
# parent::T
634+
# end
635+
636+
# Base.IteratorSize(::Type{Children{T}}) where {T} = Base.SizeUnknown()
637+
# Base.eltype(::Type{Children{T}}) where {T} = T
638+
639+
# function Base.iterate(o::Children{RawData}, state=o.parent)
640+
# (;type, depth) = o.parent
641+
# type === RAW_ELEMENT_OPEN || type === RAW_DOCUMENT || return nothing
642+
# n = iterate(state, state)
643+
# isnothing(n) && return nothing
644+
# _, state = n
645+
# state.type === RAW_ELEMENT_CLOSE && return iterate(o, state)
646+
# state.depth == depth + 1 && return (state, state) # <-- only place we return a value
647+
# state.depth == depth && return nothing
648+
# type === RAW_DOCUMENT && state.depth == 2 && return nothing # early stop if we've seen the doc root already
649+
# return iterate(o, state)
650+
# end
651+
652+
# function Base.iterate(o::Children{RowNode}, state=getfield(o.parent, :data))
653+
# n = iterate(Children(getfield(o.parent, :data)), state)
654+
# isnothing(n) && return nothing
655+
# item, state = n
656+
# return RowNode(item), state
657+
# end
658+
659+
600660

601661

602662

603663
#-----------------------------------------------------------------------------# !!! common !!!
604664
# Everything below here is common to all data structures
605665

666+
667+
nodetype(o) = o.nodetype
668+
depth(o) = o.depth
669+
tag(o) = o.tag
670+
attributes(o) = o.attributes
671+
value(o) = o.value
672+
673+
606674
#-----------------------------------------------------------------------------# nodes_equal
607675
function nodes_equal(a, b)
608676
out = XML.tag(a) == XML.tag(b)

0 commit comments

Comments
 (0)