|
1 |
| -using XML: XML |
| 1 | +using Pkg |
| 2 | +Pkg.activate(@__DIR__) |
| 3 | + |
| 4 | +using XML |
2 | 5 | using EzXML: EzXML
|
3 | 6 | using XMLDict: XMLDict
|
4 | 7 | using BenchmarkTools
|
5 | 8 | using DataFrames
|
6 | 9 | using UnicodePlots
|
| 10 | +using OrderedCollections: OrderedDict |
| 11 | + |
| 12 | + |
| 13 | +BenchmarkTools.DEFAULT_PARAMETERS.seconds = 10 |
| 14 | +BenchmarkTools.DEFAULT_PARAMETERS.samples = 20000 |
7 | 15 |
|
8 | 16 |
|
9 | 17 | # nasa.xml was downloaded from:
|
10 | 18 | # http://aiweb.cs.washington.edu/research/projects/xmltk/xmldata/www/repository.html#nasa
|
11 | 19 | file = joinpath(@__DIR__, "nasa.xml")
|
12 | 20 |
|
| 21 | +df = DataFrame(kind=String[], name=String[], bench=BenchmarkTools.Trial[]) |
| 22 | + |
| 23 | +#-----------------------------------------------------------------------------# Read |
| 24 | +kind = "Read" |
| 25 | + |
| 26 | +# name = "XML.Raw" |
| 27 | +# @info name |
| 28 | +# bench = @benchmark read($file, XML.Raw) |
| 29 | +# push!(df, (;kind, name, bench)) |
| 30 | + |
| 31 | + |
| 32 | +name = "XML.LazyNode" |
| 33 | +@info name |
| 34 | +bench = @benchmark read($file, LazyNode) |
| 35 | +push!(df, (;kind, name, bench)) |
| 36 | + |
| 37 | +name = "XML.Node" |
| 38 | +@info name |
| 39 | +bench = @benchmark read($file, Node) |
| 40 | +push!(df, (;kind, name, bench)) |
13 | 41 |
|
14 | 42 |
|
15 |
| -#-----------------------------------------------------------------------------# benchmarks |
16 |
| -benchmarks = [] |
| 43 | +name = "EzXML.readxml" |
| 44 | +@info name |
| 45 | +bench = @benchmark EzXML.readxml($file) |
| 46 | +push!(df, (;kind, name, bench)) |
17 | 47 |
|
18 |
| -@info "XML.Raw" |
19 |
| -push!(benchmarks, "XML.Raw" => @benchmark(read($file, XML.Raw))) |
20 | 48 |
|
21 |
| -@info "XML.LazyNode" |
22 |
| -push!(benchmarks, "XML.LazyNode" => @benchmark(read($file, LazyNode))) |
| 49 | +name = "XMLDict.xml_dict" |
| 50 | +@info name |
| 51 | +bench = @benchmark XMLDict.xml_dict(read($file, String)) |
| 52 | +push!(df, (;kind, name, bench)) |
23 | 53 |
|
24 |
| -@info "collect(LazyNode)" |
25 |
| -push!(benchmarks, "collect(XML.LazyNode)" => @benchmark(collect(read($file, LazyNode)))) |
26 | 54 |
|
27 |
| -@info "XML.Node" |
28 |
| -push!(benchmarks, "XML.Node" => @benchmark(read($file, Node))) |
| 55 | +#-----------------------------------------------------------------------------# Lazy Iteration |
| 56 | +kind = "Lazy Iteration" |
29 | 57 |
|
30 |
| -@info "EzXML" |
31 |
| -push!(benchmarks, "EzXML.readxml" => @benchmark(EzXML.readxml($file))) |
| 58 | +name = "for x in read(file, LazyNode); end" |
| 59 | +@info name |
| 60 | +bench = @benchmark (for x in read($file, LazyNode); end) |
| 61 | +push!(df, (;kind, name, bench)) |
32 | 62 |
|
33 |
| -@info "XMLDict" |
34 |
| -push!(benchmarks, "XMLDict.xml_dict" => @benchmark(XMLDict.xml_dict(read($file, String)))) |
35 | 63 |
|
36 |
| -@info "LazyNode iteration" |
37 |
| -push!(benchmarks, "XML.LazyNode iteration" => @benchmark((for x in read($file, LazyNode); end))) |
| 64 | +name = "for x in open(EzXML.StreamReader, file); end" |
| 65 | +@info name |
| 66 | +bench = @benchmark (reader = open(EzXML.StreamReader, $file); for x in reader; end; close(reader)) |
| 67 | +push!(df, (;kind, name, bench)) |
38 | 68 |
|
39 |
| -@info "EzXML.StreamReader iteration" |
40 |
| -push!(benchmarks, "EzXML.StreamReader" => @benchmark((reader = open(EzXML.StreamReader, $file); for x in reader; end; close(reader)))) |
41 | 69 |
|
42 |
| -#-----------------------------------------------------------------------------# make DataFrame |
43 |
| -df = DataFrame() |
| 70 | +#-----------------------------------------------------------------------------# Lazy Iteration: Collect Tags |
| 71 | +kind = "Collect Tags" |
44 | 72 |
|
45 |
| -for (name, bench) in benchmarks |
46 |
| - push!(df, (; name, bench)) |
| 73 | +name = "via XML.LazyNode" |
| 74 | +@info name |
| 75 | +bench = @benchmark [tag(x) for x in o] setup=(o = read(file, LazyNode)) |
| 76 | +push!(df, (;kind, name, bench)) |
| 77 | + |
| 78 | + |
| 79 | +name = "via EzXML.StreamReader" |
| 80 | +@info name |
| 81 | +bench = @benchmark [r.name for x in r if x == EzXML.READER_ELEMENT] setup=(r=open(EzXML.StreamReader, file)) teardown=(close(r)) |
| 82 | +push!(df, (;kind, name, bench)) |
| 83 | + |
| 84 | + |
| 85 | +name = "via EzXML.readxml" |
| 86 | +@info name |
| 87 | +function get_tags(o::EzXML.Node) |
| 88 | + out = String[] |
| 89 | + for node in EzXML.eachelement(o) |
| 90 | + push!(out, node.name) |
| 91 | + for tag in get_tags(node) |
| 92 | + push!(out, tag) |
| 93 | + end |
| 94 | + end |
| 95 | + out |
47 | 96 | end
|
| 97 | +bench = @benchmark get_tags(o.root) setup=(o = EzXML.readxml(file)) |
| 98 | +push!(df, (;kind, name, bench)) |
48 | 99 |
|
49 |
| -df |
50 | 100 |
|
51 |
| -barplot(df.name, map(x -> median(x).time / 1000^2, df.bench), title="Median Benchmark Time (s)", border=:none) |
| 101 | + |
| 102 | +#-----------------------------------------------------------------------------# Plots |
| 103 | +function plot(df, kind) |
| 104 | + g = groupby(df, :kind) |
| 105 | + sub = g[(;kind)] |
| 106 | + x = map(row -> "$(row.kind): $(row.name)", eachrow(sub)) |
| 107 | + y = map(x -> median(x).time / 1000^2, sub.bench) |
| 108 | + display(barplot(x, y, title = "$kind Time (ms)", border=:none, width=50)) |
| 109 | +end |
| 110 | + |
| 111 | +plot(df, "Read") |
| 112 | +plot(df, "Lazy Iteration") |
| 113 | +plot(df, "Collect Tags") |
0 commit comments