@@ -101,6 +101,7 @@ function next(o::TokenData)
101
101
if c2 === ' !'
102
102
c3 = Char (o. data[i + 2 ])
103
103
if c3 === ' -'
104
+ i += 1
104
105
tok = TOK_COMMENT
105
106
j = findnext (Vector {UInt8} (" -->" ), data, i)[end ]
106
107
elseif c3 === ' ['
@@ -134,7 +135,6 @@ function next(o::TokenData)
134
135
return TokenData (tok, depth, next_depth, i, j - i, data)
135
136
end
136
137
137
-
138
138
function prev (o:: TokenData )
139
139
j = o. pos - 1
140
140
j < 1 && return nothing
@@ -359,6 +359,18 @@ function Node((; depth, nodetype, tag, attributes, value)::RowNode)
359
359
end
360
360
Node (o:: TokenData ) = Node (RowNode (o))
361
361
362
+ function Base.:(== )(a:: Node , b:: Node )
363
+ a. nodetype == b. nodetype &&
364
+ a. tag == b. tag &&
365
+ a. attributes == b. attributes &&
366
+ a. value == b. value && (
367
+ (isnothing (a. children) && isnothing (b. children)) ||
368
+ (isnothing (a. children) && isempty (b. children)) ||
369
+ (isempty (a. children) && isnothing (b. children)) ||
370
+ all (ai == bi for (ai,bi) in zip (a. children, b. children))
371
+ )
372
+ end
373
+
362
374
# function element(nodetype::NodeType, tag = nothing; attributes...)
363
375
# attributes = isempty(attributes) ?
364
376
# nothing :
@@ -372,7 +384,7 @@ Base.lastindex(o::Node) = lastindex(o.children)
372
384
373
385
Base. push! (a:: Node , b:: Node ) = push! (a. children, b)
374
386
375
- AbstractTrees. children (o:: Node ) = o. children
387
+ AbstractTrees. children (o:: Node ) = isnothing (o . children) ? [] : o. children
376
388
377
389
Base. show (io:: IO , o:: Node ) = _show_node (io, o)
378
390
@@ -443,255 +455,47 @@ end
443
455
# -----------------------------------------------------------------------------# write_xml
444
456
write (x:: Node ) = (io = IOBuffer (); write (io, x); String (take! (io)))
445
457
458
+ write (filename:: AbstractString , x:: Node ) = open (io -> write (io, x), filename, " w" )
459
+
446
460
function write (io:: IO , x:: Node ; indent = " " )
447
- print (io, indent ^ x. depth)
461
+ padding = indent ^ max (0 , x. depth - 1 )
462
+ print (io, padding)
448
463
if x. nodetype === TEXT_NODE
449
- print (io, x. value)
464
+ print (io, escape ( x. value) )
450
465
elseif x. nodetype === ELEMENT_NODE
451
466
print (io, ' <' , x. tag)
452
467
_print_attrs (io, x)
453
468
print (io, isnothing (x. children) ? ' /' : " " , ' >' )
454
- if ! isnothing (x. children)
469
+ single_text_child = ! isnothing (x. children) && length (x. children) == 1 && x. children[1 ]. nodetype === TEXT_NODE
470
+ if single_text_child
471
+ write (io, only (x. children); indent= " " )
472
+ print (io, " </" , x. tag, ' >' )
473
+ elseif ! isnothing (x. children)
455
474
println (io)
456
475
foreach (AbstractTrees. children (x)) do child
457
- write (io, child; indent= indent )
476
+ write (io, child; indent)
458
477
println (io)
459
478
end
460
- print (io, indent ^ x. depth)
461
- print (io, " </" , x. tag, ' >' )
479
+ print (io, padding, " </" , x. tag, ' >' )
480
+ end
481
+ elseif x. nodetype === DTD_NODE
482
+ print (io, " <!DOCTYPE" , x. value, ' >' )
483
+ elseif x. nodetype === DECLARATION_NODE
484
+ print (io, " <?xml" )
485
+ _print_attrs (io, x)
486
+ print (io, " ?>" )
487
+ elseif x. nodetype === COMMENT_NODE
488
+ print (io, " <!--" , x. value, " -->" )
489
+ elseif x. nodetype === CDATA_NODE
490
+ print (io, " <![CDATA[" , x. value, " ]]>" )
491
+ elseif x. nodetype === DOCUMENT_NODE
492
+ foreach (AbstractTrees. children (x)) do child
493
+ write (io, child; indent)
494
+ println (io)
462
495
end
463
496
else
464
- error (" unknown case" )
497
+ error (" Unreachable case reached during XML.write " )
465
498
end
466
499
end
467
500
468
-
469
-
470
- # # #-----------------------------------------------------------------------------# AbstractXMLNode
471
- # # abstract type AbstractXMLNode end
472
-
473
- # # Base.show(io::IO, ::MIME"text/plain", o::AbstractXMLNode) = showxml(io, o)
474
- # # Base.show(io::IO, ::MIME"text/xml", o::AbstractXMLNode) = showxml(io, o)
475
- # # Base.show(io::IO, ::MIME"application/xml", o::AbstractXMLNode) = showxml(io, o)
476
-
477
- # # Base.write(io::IO, node::AbstractXMLNode) = foreach(x -> showxml(io, x), children(node))
478
-
479
- # # function Base.:(==)(a::T, b::T) where {T <: AbstractXMLNode}
480
- # # all(getfield(a, f) == getfield(b, f) for f in fieldnames(T))
481
- # # end
482
-
483
- # # const INDENT = " "
484
-
485
- # # showxml(x; depth=0) = (io=IOBuffer(); showxml(io, x); print(String(take!(io))))
486
-
487
- # # # assumes '\n' occurs in String
488
- # # showxml(io::IO, x::String; depth=0) = print(io, INDENT^depth, x)
489
-
490
- # # printnode(io::IO, o::AbstractXMLNode) = showxml(io, o)
491
-
492
-
493
- # # #-----------------------------------------------------------------------------# DTD
494
- # # # TODO : all the messy details of DTD. For now, just dump everything into `text`
495
- # # struct DTD <: AbstractXMLNode
496
- # # text::String
497
- # # end
498
- # # showxml(io::IO, o::DTD; depth=0) = print(io, INDENT^depth, "<!DOCTYPE ", o.text, '>')
499
-
500
-
501
- # # #-----------------------------------------------------------------------------# Declaration
502
- # # mutable struct Declaration <: AbstractXMLNode
503
- # # tag::String
504
- # # attributes::OrderedDict{Symbol, String}
505
- # # end
506
- # # function showxml(io::IO, o::Declaration; depth=0)
507
- # # print(io, INDENT ^ depth, "<?", o.tag)
508
- # # print_attributes(io, o)
509
- # # print(io, "?>")
510
- # # end
511
- # # attributes(o::Declaration) = o.attributes
512
-
513
- # # #-----------------------------------------------------------------------------# CData
514
- # # mutable struct CData <: AbstractXMLNode
515
- # # text::String
516
- # # end
517
- # # showxml(io::IO, o::CData; depth=0) = printstyled(io, INDENT ^ depth, "<![CDATA[", o.text, "]]>", color=:light_black)
518
-
519
-
520
- # # #-----------------------------------------------------------------------------# Comment
521
- # # mutable struct Comment <: AbstractXMLNode
522
- # # text::String
523
- # # end
524
- # # showxml(io::IO, o::Comment; depth=0) = printstyled(io, INDENT ^ depth, "<!-- ", escape(o.text), " -->", color=:light_black)
525
-
526
- # # #-----------------------------------------------------------------------------# Element
527
- # # mutable struct Element <: AbstractXMLNode
528
- # # tag::String
529
- # # attributes::OrderedDict{Symbol, String}
530
- # # children::Vector{Union{CData, Comment, Element, String}}
531
- # # function Element(tag="UNDEF", attributes=OrderedDict{Symbol,String}(), children=Union{CData, Comment, Element, String}[])
532
- # # new(tag, attributes, children)
533
- # # end
534
- # # end
535
- # # function h(tag::String, children...; attrs...)
536
- # # attributes = OrderedDict{Symbol,String}(k => string(v) for (k,v) in pairs(attrs))
537
- # # Element(tag, attributes, collect(children))
538
- # # end
539
-
540
- # # function showxml(io::IO, o::Element; depth=0)
541
- # # print(io, INDENT ^ depth, '<')
542
- # # printstyled(io, tag(o), color=:light_cyan)
543
- # # print_attributes(io, o)
544
- # # n = length(children(o))
545
- # # if n == 0
546
- # # print(io, "/>")
547
- # # elseif n == 1 && children(o)[1] isa String
548
- # # s = children(o)[1]
549
- # # print(io, '>', s, "</")
550
- # # printstyled(io, tag(o), color=:light_cyan)
551
- # # print(io, '>')
552
- # # else
553
- # # print(io, '>')
554
- # # for child in children(o)
555
- # # println(io)
556
- # # showxml(io, child; depth=depth + 1)
557
- # # end
558
- # # print(io, '\n', INDENT^depth, "</")
559
- # # printstyled(io, tag(o), color=:light_cyan)
560
- # # print(io, '>')
561
- # # end
562
- # # end
563
-
564
- # # Base.show(io::IO, o::Element) = print_tree(io, o)
565
-
566
- # # function printnode(io::IO, o::Element, color=:light_cyan)
567
- # # print(io, '<')
568
- # # printstyled(io, tag(o), color=color)
569
- # # print_attributes(io, o)
570
- # # n = length(children(o))
571
- # # if n == 0
572
- # # print(io, "/>")
573
- # # else
574
- # # print(io, '>')
575
- # # printstyled(io, " (", length(children(o)), n > 1 ? " children)" : " child)", color=:light_black)
576
- # # end
577
- # # end
578
-
579
- # # function print_attributes(io::IO, o::AbstractXMLNode)
580
- # # foreach(pairs(attributes(o))) do (k,v)
581
- # # printstyled(io, ' ', k, '='; color=:green)
582
- # # printstyled(io, '"', v, '"'; color=:light_green)
583
- # # end
584
- # # end
585
-
586
- # # children(o::Element) = getfield(o, :children)
587
- # # tag(o::Element) = getfield(o, :tag)
588
- # # attributes(o::Element) = getfield(o, :attributes)
589
-
590
- # # Base.getindex(o::Element, i::Integer) = children(o)[i]
591
- # # Base.lastindex(o::Element) = lastindex(children(o))
592
- # # Base.setindex!(o::Element, val::Element, i::Integer) = setindex!(children(o), val, i)
593
- # # Base.push!(o::Element, val::Element) = push!(children(o), val)
594
-
595
- # # Base.getproperty(o::Element, x::Symbol) = attributes(o)[x]
596
- # # Base.setproperty!(o::Element, x::Symbol, val) = (attributes(o)[x] = string(val))
597
- # # Base.propertynames(o::Element) = collect(keys(attributes(o)))
598
-
599
- # # Base.get(o::Element, key::Symbol, val) = hasproperty(o, key) ? getproperty(o, key) : val
600
- # # Base.get!(o::Element, key::Symbol, val) = hasproperty(o, key) ? getproperty(o, key) : setproperty!(o, key, val)
601
-
602
-
603
-
604
-
605
- # # #-----------------------------------------------------------------------------# Document
606
- # # mutable struct Document <: AbstractXMLNode
607
- # # prolog::Vector{Union{Comment, Declaration, DTD}}
608
- # # root::Element
609
- # # Document(prolog=Union{Comment,Declaration,DTD}[], root=Element()) = new(prolog, root)
610
- # # end
611
-
612
- # # function Document(o::XMLTokenIterator)
613
- # # doc = Document()
614
- # # populate!(doc, o)
615
- # # return doc
616
- # # end
617
-
618
- # # Document(file::String) = open(io -> Document(XMLTokenIterator(io)), file, "r")
619
- # # Document(io::IO) = Document(XMLTokenIterator(io))
620
-
621
- # # Base.show(io::IO, ::MIME"text/plain", o::Document) = print_tree(io, o; maxdepth=1)
622
-
623
- # # printnode(io::IO, o::Document) = print(io, "XML.Document")
624
-
625
- # # children(o::Document) = (o.prolog..., o.root)
626
-
627
- # # showxml(io::IO, o::Document; depth=0) = foreach(x -> (showxml(io, x), println(io)), children(o))
628
-
629
- # # #-----------------------------------------------------------------------------# makers (AbstractXMLNode from a token)
630
- # # make_dtd(s) = DTD(replace(s, "<!doctype " => "", "<!DOCTYPE " => "", '>' => ""))
631
- # # make_declaration(s) = Declaration(get_tag(s), get_attributes(s))
632
- # # make_comment(s) = Comment(replace(s, "<!-- " => "", " -->" => ""))
633
- # # make_cdata(s) = CData(replace(s, "<![CDATA[" => "", "]]>" => ""))
634
- # # make_element(s) = Element(get_tag(s), get_attributes(s))
635
-
636
- # # get_tag(x) = @inbounds x[findfirst(r"[a-zA-z][^\s>/]*", x)] # Matches: (any letter) → (' ', '/', '>')
637
- # # # get_tag(x) = match(r"[a-zA-z][^\s>/]*", x).match # Matches: (any letter) → (' ', '/', '>')
638
-
639
- # # function get_attributes(x)
640
- # # out = OrderedDict{Symbol,String}()
641
- # # rng = findfirst(r"(?<=\s).*\"", x)
642
- # # isnothing(rng) && return out
643
- # # s = x[rng]
644
- # # kys = (m.match for m in eachmatch(r"[a-zA-Z][a-zA-Z\.-_]*(?=\=)", s))
645
- # # vals = (m.match for m in eachmatch(r"(?<=(\=\"))[^\"]*", s))
646
- # # foreach(zip(kys,vals)) do (k,v)
647
- # # out[Symbol(k)] = v
648
- # # end
649
- # # out
650
- # # end
651
-
652
-
653
-
654
- # # #-----------------------------------------------------------------------------# populate!
655
- # # function populate!(doc::Document, o::XMLTokenIterator)
656
- # # for (T, s) in o
657
- # # if T == DTDTOKEN
658
- # # push!(doc.prolog, make_dtd(s))
659
- # # elseif T == DECLARATIONTOKEN
660
- # # push!(doc.prolog, make_declaration(s))
661
- # # elseif T == COMMENTTOKEN
662
- # # push!(doc.prolog, make_comment(s))
663
- # # else # root node
664
- # # doc.root = Element(get_tag(s), get_attributes(s))
665
- # # add_children!(doc.root, o, "</$(tag(doc.root))>")
666
- # # end
667
- # # end
668
- # # end
669
-
670
- # # # until = closing tag e.g. `</Name>`
671
- # # function add_children!(e::Element, o::XMLTokenIterator, until::String)
672
- # # s = ""
673
- # # c = children(e)
674
- # # while s != until
675
- # # next = iterate(o, -1) # if state == 0, io will get reset to original position
676
- # # isnothing(next) && break
677
- # # T, s = next[1]
678
- # # if T == COMMENTTOKEN
679
- # # push!(c, make_comment(s))
680
- # # elseif T == CDATATOKEN
681
- # # push!(c, make_cdata(s))
682
- # # elseif T == ELEMENTSELFCLOSEDTOKEN
683
- # # push!(c, make_element(s))
684
- # # elseif T == ELEMENTTOKEN
685
- # # child = make_element(s)
686
- # # add_children!(child, o, "</$(tag(child))>")
687
- # # push!(c, child)
688
- # # elseif T == TEXTTOKEN
689
- # # push!(c, s)
690
- # # end
691
- # # end
692
- # # end
693
-
694
- # # #-----------------------------------------------------------------------------# Node
695
- # # include("node.jl")
696
-
697
501
end
0 commit comments