Skip to content

Commit 3456483

Browse files
committed
wrap up Rows
1 parent 44c5e5f commit 3456483

File tree

1 file changed

+88
-121
lines changed

1 file changed

+88
-121
lines changed

src/XML.jl

Lines changed: 88 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ RawData(filename::String) = RawData(Mmap.mmap(filename))
9191

9292
parse(x::AbstractString) = RawData(Vector{UInt8}(x))
9393

94+
Tables.rows(o::RawData) = o
95+
Tables.schema(o::RawData) = Tables.Schema(fieldnames(RawData)[1:end-1], fieldtypes(RawData)[1:end-1])
96+
9497
function Base.show(io::IO, o::RawData)
9598
print(io, o.depth, ": ", o.type, " (pos=", o.pos, ", len=", o.len, ")")
9699
o.len > 0 && printstyled(io, ": ", String(o.data[o.pos:o.pos + o.len]); color=:light_green)
@@ -104,7 +107,6 @@ String(o::RawData) = String(view(o))
104107

105108
Base.IteratorSize(::Type{RawData}) = Base.SizeUnknown()
106109
Base.eltype(::Type{RawData}) = RawData
107-
Base.isdone(o::RawData) = o.pos + o.len length(o.data)
108110

109111
function Base.iterate(o::RawData, state=o)
110112
n = next(state)
@@ -179,6 +181,8 @@ function value(o::RawData)
179181
String(view(o.data, o.pos + 9 : o.pos + o.len - 3))
180182
elseif o.type === RAW_COMMENT
181183
String(view(o.data, o.pos + 4 : o.pos + o.len - 3))
184+
elseif o.type === RAW_DTD
185+
String(view(o.data, o.pos + 2 : o.pos + o.len - 2))
182186
else
183187
nothing
184188
end
@@ -195,7 +199,7 @@ function children(o::RawData)
195199
end
196200
out
197201
else
198-
nothing
202+
RawData[]
199203
end
200204
end
201205

@@ -223,7 +227,12 @@ notspace(x::UInt8) = !isspace(Char(x))
223227
function next(o::RawData)
224228
i = o.pos + o.len + 1
225229
(; depth, data, type) = o
226-
i = findnext(notspace, data, i) # skip insignificant whitespace
230+
try
231+
i = findnext(notspace, data, i) # skip insignificant whitespace
232+
catch
233+
@info String(o)
234+
error()
235+
end
227236
isnothing(i) && return nothing
228237
if type === RAW_ELEMENT_OPEN || type === RAW_DOCUMENT
229238
depth += 1
@@ -318,57 +327,13 @@ function prev(o::RawData)
318327
end
319328

320329

321-
#-----------------------------------------------------------------------------# Lazy
322-
struct LazyNode
323-
data::RawData
324-
end
325-
LazyNode(filename::AbstractString) = LazyNode(RawData(filename))
326-
327-
328-
329-
# Base.get(o::LazyNode) = RowNode(o.data)
330-
331-
# function next(o::LazyNode)
332-
# x = next(o.data)
333-
# isnothing(x) ? nothing : LazyNode(x)
334-
# end
335-
# function prev(o::LazyNode)
336-
# x = prev(o.data)
337-
# isnothing(x) ? nothing : LazyNode(x)
338-
# end
339-
340-
# function Base.show(io::IO, o::LazyNode)
341-
# print(io, "LazyNode: ")
342-
# show(io, get(o))
343-
# end
344-
# function AbstractTrees.children(o::LazyNode)
345-
# depth = o.data.depth
346-
# out = LazyNode[]
347-
# x = o
348-
# while !isnothing(x)
349-
# x = next(x)
350-
# isnothing(x) && break
351-
# x.data.tok === TOK_END_ELEMENT && continue
352-
# x.data.depth == depth && break
353-
# x.data.depth == depth + 1 && push!(out, x)
354-
# end
355-
# return out
356-
# end
357-
# # AbstractTrees.nodevalue(o::LazyNode) = get(o)
358-
359-
# # function AbstractTrees.parent(o::LazyNode)
360-
# # TODO
361-
# # end
362-
363-
364-
365330
#-----------------------------------------------------------------------------# RowNode
366331
struct RowNode
367332
nodetype::NodeType
368333
tag::Union{String, Nothing}
369334
attributes::Union{OrderedDict{String, String}, Nothing}
370335
value::Union{String, Nothing}
371-
data::Union{RawData, Nothing}
336+
data::RawData
372337
end
373338
function RowNode(data::RawData)
374339
nodetype = _nodetype(data.type)
@@ -377,55 +342,73 @@ function RowNode(data::RawData)
377342
value = _value(data)
378343
RowNode(nodetype, tag, attributes, value, data)
379344
end
345+
RowNode(file::AbstractString) = RowNode(RawData(file))
380346

381-
# function RowNode(t::RawData)
382-
# (; type, pos, len, depth) = t
383-
# pos === 0 && return RowNode(DOCUMENT, nothing, nothing, nothing, 0)
384-
# data = view(t.data, pos:pos+len)
385-
# @views if type === RAW_TEXT # text
386-
# return RowNode(TEXT, nothing, nothing, unescape(String(data), data))
387-
# elseif type === RAW_COMMENT # <!-- ... -->
388-
# return RowNode(COMMENT, nothing, nothing, String(data[4:end-3]), data)
389-
# elseif type === RAW_CDATA # <![CDATA[...]]>
390-
# return RowNode(CDATA, nothing, nothing, String(data[10:end-3]), data)
391-
# elseif type === RAW_DECLARATION # <?xml attributes... ?>
392-
# rng = 7:length(data) - 2
393-
# attributes = get_attributes(data[rng])
394-
# return RowNode(DECLARATION, nothing, attributes, nothing, data)
395-
# elseif type === RAW_DTD # <!DOCTYPE ...>
396-
# return RowNode(DTD, nothing, nothing, String(data[10:end-1]), data)
397-
# elseif type === RAW_ELEMENT_OPEN # <NAME attributes... >
398-
# tag, i = get_name(data, 2)
399-
# i = findnext(x -> isletter(Char(x)) || x === UInt8('_'), data, i)
400-
# attributes = isnothing(i) ? nothing : get_attributes(data[i:end-1])
401-
# return RowNode(ELEMENT, tag, attributes, nothing, data)
402-
# elseif type === RAW_ELEMENT_CLOSE # </NAME>
403-
# return nothing
404-
# elseif type === RAW_ELEMENT_SELF_CLOSED # <NAME attributes... />
405-
# tag, i = get_name(data, 2)
406-
# i = findnext(x -> isletter(Char(x)) || x === UInt8('_'), data, i)
407-
# attributes = isnothing(i) ? nothing : get_attributes(data[i:end-2])
408-
# return RowNode(ELEMENT, tag, attributes, nothing, data)
409-
# else
410-
# error("Unhandled token: $tok.")
411-
# end
412-
# end
347+
AbstractTrees.children(o::RowNode) = RowNode.(children(o.data))
413348

414-
# AbstractTrees.children(o::RowNode) = missing
349+
Base.show(io::IO, o::RowNode) = _show_node(io, o)
415350

416-
# Base.show(io::IO, o::RowNode) = _show_node(io, o)
351+
Base.IteratorSize(::Type{RowNode}) = Base.SizeUnknown()
352+
Base.eltype(::Type{RowNode}) = RowNode
417353

418-
# Base.IteratorSize(::Type{Rows}) = Base.SizeUnknown()
419-
# Base.eltype(::Type{Rows}) = RowNode
420-
# Base.isdone(o::Rows, pos) = isdone(o.file, pos)
354+
function Base.iterate(o::RowNode, state = o.data)
355+
n = next(state)
356+
isnothing(n) && return nothing
357+
n.type === RAW_ELEMENT_CLOSE && return iterate(o, n)
358+
return RowNode(n), n
359+
end
421360

422-
# function Base.iterate(o::Rows, state = init(o.tokens))
423-
# n = next(state)
424-
# isnothing(n) && return nothing
425-
# n.tok === TOK_END_ELEMENT && return iterate(o, n)
426-
# return RowNode(n), n
427-
# end
361+
function next(o::RowNode)
362+
n = next(o.data)
363+
isnothing(n) && return nothing
364+
n.type === RAW_ELEMENT_CLOSE && return next(RowNode(n))
365+
return RowNode(n)
366+
end
367+
function prev(o::RowNode)
368+
n = prev(o.data)
369+
isnothing(n) && return nothing
370+
n.type === RAW_ELEMENT_CLOSE && return prev(RowNode(n))
371+
return RowNode(n)
372+
end
373+
374+
#-----------------------------------------------------------------------------# Rows
375+
struct Rows
376+
node::RowNode
377+
end
378+
Rows(file::AbstractString) = Rows(RowNode(file))
379+
380+
Base.propertynames(::Rows) = (:depth, :nodetype, :tag, :attributes, :value)
381+
function Base.getproperty(o::Rows, x::Symbol)
382+
node = getfield(o, :node)
383+
x === :depth ? node.data.depth :
384+
x === :nodetype ? node.nodetype :
385+
x === :tag ? node.tag :
386+
x === :attributes ? node.attributes :
387+
x === :value ? node.value : error("XML.Row does not have property: $x")
388+
end
428389

390+
Tables.rows(o::Rows) = o
391+
Tables.schema(o::Rows) = Tables.Schema(
392+
(:depth, :nodetype, :tag, :attributes, :value),
393+
(Int, NodeType, Union{Nothing, String}, Union{Nothing, OrderedDict{String, String}}, Union{Nothing, String}),
394+
)
395+
396+
Base.IteratorSize(::Type{Rows}) = Base.SizeUnknown()
397+
Base.eltype(::Type{Rows}) = Rows
398+
399+
function Base.iterate(o::Rows, state = getfield(o, :node))
400+
n = next(state)
401+
isnothing(n) ? nothing : (Rows(n), n)
402+
end
403+
404+
function next(o::Rows)
405+
n = next(getfield(o, :node))
406+
isnothing(n) ? nothing : Rows(n)
407+
end
408+
function prev(o::Rows)
409+
n = prev(getfield(o, :node))
410+
isnothing(n) ? nothing : Rows(n)
411+
end
429412

430413
# #-----------------------------------------------------------------------------# Rows
431414
# struct Rows
@@ -551,55 +534,37 @@ AbstractTrees.children(o::Node) = isnothing(o.children) ? [] : o.children
551534

552535
Base.show(io::IO, o::Node) = _show_node(io, o)
553536

554-
# #-----------------------------------------------------------------------------# read
555-
# read(filename::AbstractString) = Node(Tokens(filename))
556-
# read(io::IO) = Node(Tokens("__UKNOWN_FILE__", read(io)))
557-
558-
# Node(filename::String) = Node(Tokens(filename))
559-
560-
# function Node(t::Tokens)
561-
# doc = Node(; nodetype=DOCUMENT_NODE, children=[])
562-
# stack = [doc]
563-
# for row in Rows(t)
564-
# temp = Node(row)
565-
# node = Node(temp; children = row.nodetype === ELEMENT_NODE ? [] : nothing)
566-
# filter!(x -> x.depth < node.depth, stack)
567-
# push!(stack[end], node)
568-
# push!(stack, node)
569-
# end
570-
# return doc
571-
# end
572537

573538
# #-----------------------------------------------------------------------------# printing
574539
function _show_node(io::IO, o)
575-
printstyled(io, 2o.depth, ':', o.nodetype, ' '; color=:light_green)
576-
if o.nodetype === TEXT_NODE
577-
printstyled(io, repr(o.value), color=:light_black)
578-
elseif o.nodetype === ELEMENT_NODE
540+
printstyled(io, o.nodetype, ' '; color=:light_green)
541+
if o.nodetype === TEXT
542+
printstyled(io, repr(o.value))
543+
elseif o.nodetype === ELEMENT
579544
printstyled(io, '<', o.tag, color=:light_cyan)
580545
_print_attrs(io, o)
581546
printstyled(io, '>', color=:light_cyan)
582547
_print_n_children(io, o)
583-
elseif o.nodetype === DTD_NODE
548+
elseif o.nodetype === DTD
584549
printstyled(io, "<!DOCTYPE", o.tag, color=:light_cyan)
585550
printstyled(io, o.value, color=:light_black)
586551
printstyled(io, '>', color=:light_cyan)
587-
elseif o.nodetype === DECLARATION_NODE
552+
elseif o.nodetype === DECLARATION
588553
printstyled(io, "<?xml", color=:light_cyan)
589554
_print_attrs(io, o)
590555
printstyled(io, '>', color=:light_cyan)
591-
elseif o.nodetype === COMMENT_NODE
556+
elseif o.nodetype === COMMENT
592557
printstyled(io, "<!--", color=:light_cyan)
593558
printstyled(io, o.value, color=:light_black)
594559
printstyled(io, "-->", color=:light_cyan)
595-
elseif o.nodetype === CDATA_NODE
560+
elseif o.nodetype === CDATA
596561
printstyled(io, "<![CDATA[", color=:light_cyan)
597562
printstyled(io, o.value, color=:light_black)
598563
printstyled(io, "]]>", color=:light_cyan)
599-
elseif o.nodetype === DOCUMENT_NODE
564+
elseif o.nodetype === DOCUMENT
600565
printstyled(io, "Document", color=:light_cyan)
601566
_print_n_children(io, o)
602-
elseif o.nodetype === UNKNOWN_NODE
567+
elseif o.nodetype === UNKNOWN
603568
printstyled(io, "Unknown", color=:light_cyan)
604569
_print_n_children(io, o)
605570
else
@@ -608,11 +573,13 @@ function _show_node(io::IO, o)
608573
end
609574

610575
function _print_attrs(io::IO, o)
611-
!isnothing(o.attributes) && printstyled(io, [" $k=\"$v\"" for (k,v) in o.attributes]...; color=:light_black)
576+
!isnothing(o.attributes) && printstyled(io, [" $k=\"$v\"" for (k,v) in o.attributes]...; color=:light_yellow)
612577
end
613578
function _print_n_children(io::IO, o)
614579
children = AbstractTrees.children(o)
615-
printstyled(io, ismissing(children) || isnothing(children) ? "" : " ($(length(children)) children)", color=:light_black)
580+
n = ismissing(children) || isnothing(children) ? 0 : length(children)
581+
text = n == 0 ? "" : n == 1 ? " (1 child)" : " ($(length(children)) children)"
582+
printstyled(io, text, color=:light_black)
616583
end
617584

618585
# #-----------------------------------------------------------------------------# write_xml

0 commit comments

Comments
 (0)