@@ -91,6 +91,9 @@ RawData(filename::String) = RawData(Mmap.mmap(filename))
91
91
92
92
parse (x:: AbstractString ) = RawData (Vector {UInt8} (x))
93
93
94
+ Tables. rows (o:: RawData ) = o
95
+ Tables. schema (o:: RawData ) = Tables. Schema (fieldnames (RawData)[1 : end - 1 ], fieldtypes (RawData)[1 : end - 1 ])
96
+
94
97
function Base. show (io:: IO , o:: RawData )
95
98
print (io, o. depth, " : " , o. type, " (pos=" , o. pos, " , len=" , o. len, " )" )
96
99
o. len > 0 && printstyled (io, " : " , String (o. data[o. pos: o. pos + o. len]); color= :light_green )
@@ -104,7 +107,6 @@ String(o::RawData) = String(view(o))
104
107
105
108
Base. IteratorSize (:: Type{RawData} ) = Base. SizeUnknown ()
106
109
Base. eltype (:: Type{RawData} ) = RawData
107
- Base. isdone (o:: RawData ) = o. pos + o. len ≥ length (o. data)
108
110
109
111
function Base. iterate (o:: RawData , state= o)
110
112
n = next (state)
@@ -179,6 +181,8 @@ function value(o::RawData)
179
181
String (view (o. data, o. pos + 9 : o. pos + o. len - 3 ))
180
182
elseif o. type === RAW_COMMENT
181
183
String (view (o. data, o. pos + 4 : o. pos + o. len - 3 ))
184
+ elseif o. type === RAW_DTD
185
+ String (view (o. data, o. pos + 2 : o. pos + o. len - 2 ))
182
186
else
183
187
nothing
184
188
end
@@ -195,7 +199,7 @@ function children(o::RawData)
195
199
end
196
200
out
197
201
else
198
- nothing
202
+ RawData[]
199
203
end
200
204
end
201
205
@@ -223,7 +227,12 @@ notspace(x::UInt8) = !isspace(Char(x))
223
227
function next (o:: RawData )
224
228
i = o. pos + o. len + 1
225
229
(; depth, data, type) = o
226
- i = findnext (notspace, data, i) # skip insignificant whitespace
230
+ try
231
+ i = findnext (notspace, data, i) # skip insignificant whitespace
232
+ catch
233
+ @info String (o)
234
+ error ()
235
+ end
227
236
isnothing (i) && return nothing
228
237
if type === RAW_ELEMENT_OPEN || type === RAW_DOCUMENT
229
238
depth += 1
@@ -318,57 +327,13 @@ function prev(o::RawData)
318
327
end
319
328
320
329
321
- # -----------------------------------------------------------------------------# Lazy
322
- struct LazyNode
323
- data:: RawData
324
- end
325
- LazyNode (filename:: AbstractString ) = LazyNode (RawData (filename))
326
-
327
-
328
-
329
- # Base.get(o::LazyNode) = RowNode(o.data)
330
-
331
- # function next(o::LazyNode)
332
- # x = next(o.data)
333
- # isnothing(x) ? nothing : LazyNode(x)
334
- # end
335
- # function prev(o::LazyNode)
336
- # x = prev(o.data)
337
- # isnothing(x) ? nothing : LazyNode(x)
338
- # end
339
-
340
- # function Base.show(io::IO, o::LazyNode)
341
- # print(io, "LazyNode: ")
342
- # show(io, get(o))
343
- # end
344
- # function AbstractTrees.children(o::LazyNode)
345
- # depth = o.data.depth
346
- # out = LazyNode[]
347
- # x = o
348
- # while !isnothing(x)
349
- # x = next(x)
350
- # isnothing(x) && break
351
- # x.data.tok === TOK_END_ELEMENT && continue
352
- # x.data.depth == depth && break
353
- # x.data.depth == depth + 1 && push!(out, x)
354
- # end
355
- # return out
356
- # end
357
- # # AbstractTrees.nodevalue(o::LazyNode) = get(o)
358
-
359
- # # function AbstractTrees.parent(o::LazyNode)
360
- # # TODO
361
- # # end
362
-
363
-
364
-
365
330
# -----------------------------------------------------------------------------# RowNode
366
331
struct RowNode
367
332
nodetype:: NodeType
368
333
tag:: Union{String, Nothing}
369
334
attributes:: Union{OrderedDict{String, String}, Nothing}
370
335
value:: Union{String, Nothing}
371
- data:: Union{ RawData, Nothing}
336
+ data:: RawData
372
337
end
373
338
function RowNode (data:: RawData )
374
339
nodetype = _nodetype (data. type)
@@ -377,55 +342,73 @@ function RowNode(data::RawData)
377
342
value = _value (data)
378
343
RowNode (nodetype, tag, attributes, value, data)
379
344
end
345
+ RowNode (file:: AbstractString ) = RowNode (RawData (file))
380
346
381
- # function RowNode(t::RawData)
382
- # (; type, pos, len, depth) = t
383
- # pos === 0 && return RowNode(DOCUMENT, nothing, nothing, nothing, 0)
384
- # data = view(t.data, pos:pos+len)
385
- # @views if type === RAW_TEXT # text
386
- # return RowNode(TEXT, nothing, nothing, unescape(String(data), data))
387
- # elseif type === RAW_COMMENT # <!-- ... -->
388
- # return RowNode(COMMENT, nothing, nothing, String(data[4:end-3]), data)
389
- # elseif type === RAW_CDATA # <![CDATA[...]]>
390
- # return RowNode(CDATA, nothing, nothing, String(data[10:end-3]), data)
391
- # elseif type === RAW_DECLARATION # <?xml attributes... ?>
392
- # rng = 7:length(data) - 2
393
- # attributes = get_attributes(data[rng])
394
- # return RowNode(DECLARATION, nothing, attributes, nothing, data)
395
- # elseif type === RAW_DTD # <!DOCTYPE ...>
396
- # return RowNode(DTD, nothing, nothing, String(data[10:end-1]), data)
397
- # elseif type === RAW_ELEMENT_OPEN # <NAME attributes... >
398
- # tag, i = get_name(data, 2)
399
- # i = findnext(x -> isletter(Char(x)) || x === UInt8('_'), data, i)
400
- # attributes = isnothing(i) ? nothing : get_attributes(data[i:end-1])
401
- # return RowNode(ELEMENT, tag, attributes, nothing, data)
402
- # elseif type === RAW_ELEMENT_CLOSE # </NAME>
403
- # return nothing
404
- # elseif type === RAW_ELEMENT_SELF_CLOSED # <NAME attributes... />
405
- # tag, i = get_name(data, 2)
406
- # i = findnext(x -> isletter(Char(x)) || x === UInt8('_'), data, i)
407
- # attributes = isnothing(i) ? nothing : get_attributes(data[i:end-2])
408
- # return RowNode(ELEMENT, tag, attributes, nothing, data)
409
- # else
410
- # error("Unhandled token: $tok.")
411
- # end
412
- # end
347
+ AbstractTrees. children (o:: RowNode ) = RowNode .(children (o. data))
413
348
414
- # AbstractTrees.children( o::RowNode) = missing
349
+ Base . show (io :: IO , o:: RowNode ) = _show_node (io, o)
415
350
416
- # Base.show(io::IO, o::RowNode) = _show_node(io, o)
351
+ Base. IteratorSize (:: Type{RowNode} ) = Base. SizeUnknown ()
352
+ Base. eltype (:: Type{RowNode} ) = RowNode
417
353
418
- # Base.IteratorSize(::Type{Rows}) = Base.SizeUnknown()
419
- # Base.eltype(::Type{Rows}) = RowNode
420
- # Base.isdone(o::Rows, pos) = isdone(o.file, pos)
354
+ function Base. iterate (o:: RowNode , state = o. data)
355
+ n = next (state)
356
+ isnothing (n) && return nothing
357
+ n. type === RAW_ELEMENT_CLOSE && return iterate (o, n)
358
+ return RowNode (n), n
359
+ end
421
360
422
- # function Base.iterate(o::Rows, state = init(o.tokens))
423
- # n = next(state)
424
- # isnothing(n) && return nothing
425
- # n.tok === TOK_END_ELEMENT && return iterate(o, n)
426
- # return RowNode(n), n
427
- # end
361
+ function next (o:: RowNode )
362
+ n = next (o. data)
363
+ isnothing (n) && return nothing
364
+ n. type === RAW_ELEMENT_CLOSE && return next (RowNode (n))
365
+ return RowNode (n)
366
+ end
367
+ function prev (o:: RowNode )
368
+ n = prev (o. data)
369
+ isnothing (n) && return nothing
370
+ n. type === RAW_ELEMENT_CLOSE && return prev (RowNode (n))
371
+ return RowNode (n)
372
+ end
373
+
374
+ # -----------------------------------------------------------------------------# Rows
375
+ struct Rows
376
+ node:: RowNode
377
+ end
378
+ Rows (file:: AbstractString ) = Rows (RowNode (file))
379
+
380
+ Base. propertynames (:: Rows ) = (:depth , :nodetype , :tag , :attributes , :value )
381
+ function Base. getproperty (o:: Rows , x:: Symbol )
382
+ node = getfield (o, :node )
383
+ x === :depth ? node. data. depth :
384
+ x === :nodetype ? node. nodetype :
385
+ x === :tag ? node. tag :
386
+ x === :attributes ? node. attributes :
387
+ x === :value ? node. value : error (" XML.Row does not have property: $x " )
388
+ end
428
389
390
+ Tables. rows (o:: Rows ) = o
391
+ Tables. schema (o:: Rows ) = Tables. Schema (
392
+ (:depth , :nodetype , :tag , :attributes , :value ),
393
+ (Int, NodeType, Union{Nothing, String}, Union{Nothing, OrderedDict{String, String}}, Union{Nothing, String}),
394
+ )
395
+
396
+ Base. IteratorSize (:: Type{Rows} ) = Base. SizeUnknown ()
397
+ Base. eltype (:: Type{Rows} ) = Rows
398
+
399
+ function Base. iterate (o:: Rows , state = getfield (o, :node ))
400
+ n = next (state)
401
+ isnothing (n) ? nothing : (Rows (n), n)
402
+ end
403
+
404
+ function next (o:: Rows )
405
+ n = next (getfield (o, :node ))
406
+ isnothing (n) ? nothing : Rows (n)
407
+ end
408
+ function prev (o:: Rows )
409
+ n = prev (getfield (o, :node ))
410
+ isnothing (n) ? nothing : Rows (n)
411
+ end
429
412
430
413
# #-----------------------------------------------------------------------------# Rows
431
414
# struct Rows
@@ -551,55 +534,37 @@ AbstractTrees.children(o::Node) = isnothing(o.children) ? [] : o.children
551
534
552
535
Base. show (io:: IO , o:: Node ) = _show_node (io, o)
553
536
554
- # #-----------------------------------------------------------------------------# read
555
- # read(filename::AbstractString) = Node(Tokens(filename))
556
- # read(io::IO) = Node(Tokens("__UKNOWN_FILE__", read(io)))
557
-
558
- # Node(filename::String) = Node(Tokens(filename))
559
-
560
- # function Node(t::Tokens)
561
- # doc = Node(; nodetype=DOCUMENT_NODE, children=[])
562
- # stack = [doc]
563
- # for row in Rows(t)
564
- # temp = Node(row)
565
- # node = Node(temp; children = row.nodetype === ELEMENT_NODE ? [] : nothing)
566
- # filter!(x -> x.depth < node.depth, stack)
567
- # push!(stack[end], node)
568
- # push!(stack, node)
569
- # end
570
- # return doc
571
- # end
572
537
573
538
# #-----------------------------------------------------------------------------# printing
574
539
function _show_node (io:: IO , o)
575
- printstyled (io, 2 o . depth, ' : ' , o. nodetype, ' ' ; color= :light_green )
576
- if o. nodetype === TEXT_NODE
577
- printstyled (io, repr (o. value), color = :light_black )
578
- elseif o. nodetype === ELEMENT_NODE
540
+ printstyled (io, o. nodetype, ' ' ; color= :light_green )
541
+ if o. nodetype === TEXT
542
+ printstyled (io, repr (o. value))
543
+ elseif o. nodetype === ELEMENT
579
544
printstyled (io, ' <' , o. tag, color= :light_cyan )
580
545
_print_attrs (io, o)
581
546
printstyled (io, ' >' , color= :light_cyan )
582
547
_print_n_children (io, o)
583
- elseif o. nodetype === DTD_NODE
548
+ elseif o. nodetype === DTD
584
549
printstyled (io, " <!DOCTYPE" , o. tag, color= :light_cyan )
585
550
printstyled (io, o. value, color= :light_black )
586
551
printstyled (io, ' >' , color= :light_cyan )
587
- elseif o. nodetype === DECLARATION_NODE
552
+ elseif o. nodetype === DECLARATION
588
553
printstyled (io, " <?xml" , color= :light_cyan )
589
554
_print_attrs (io, o)
590
555
printstyled (io, ' >' , color= :light_cyan )
591
- elseif o. nodetype === COMMENT_NODE
556
+ elseif o. nodetype === COMMENT
592
557
printstyled (io, " <!--" , color= :light_cyan )
593
558
printstyled (io, o. value, color= :light_black )
594
559
printstyled (io, " -->" , color= :light_cyan )
595
- elseif o. nodetype === CDATA_NODE
560
+ elseif o. nodetype === CDATA
596
561
printstyled (io, " <![CDATA[" , color= :light_cyan )
597
562
printstyled (io, o. value, color= :light_black )
598
563
printstyled (io, " ]]>" , color= :light_cyan )
599
- elseif o. nodetype === DOCUMENT_NODE
564
+ elseif o. nodetype === DOCUMENT
600
565
printstyled (io, " Document" , color= :light_cyan )
601
566
_print_n_children (io, o)
602
- elseif o. nodetype === UNKNOWN_NODE
567
+ elseif o. nodetype === UNKNOWN
603
568
printstyled (io, " Unknown" , color= :light_cyan )
604
569
_print_n_children (io, o)
605
570
else
@@ -608,11 +573,13 @@ function _show_node(io::IO, o)
608
573
end
609
574
610
575
function _print_attrs (io:: IO , o)
611
- ! isnothing (o. attributes) && printstyled (io, [" $k =\" $v \" " for (k,v) in o. attributes]. .. ; color= :light_black )
576
+ ! isnothing (o. attributes) && printstyled (io, [" $k =\" $v \" " for (k,v) in o. attributes]. .. ; color= :light_yellow )
612
577
end
613
578
function _print_n_children (io:: IO , o)
614
579
children = AbstractTrees. children (o)
615
- printstyled (io, ismissing (children) || isnothing (children) ? " " : " ($(length (children)) children)" , color= :light_black )
580
+ n = ismissing (children) || isnothing (children) ? 0 : length (children)
581
+ text = n == 0 ? " " : n == 1 ? " (1 child)" : " ($(length (children)) children)"
582
+ printstyled (io, text, color= :light_black )
616
583
end
617
584
618
585
# #-----------------------------------------------------------------------------# write_xml
0 commit comments