Skip to content

Commit 0c3faac

Browse files
committed
benchmarks and readme
1 parent 6d372b3 commit 0c3faac

File tree

4 files changed

+112
-34
lines changed

4 files changed

+112
-34
lines changed

README.md

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -133,13 +133,25 @@ XML.write(node) # String
133133
- The most fair comparison is between `XML.Node` and `XMLDict.xml_dict`.
134134
- See the `benchmarks/suite.jl` file.
135135

136+
137+
### Reading an XML File
138+
139+
```
140+
XML.LazyNode 0.012084
141+
XML.Node ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 888.367
142+
EzXML.readxml ■■■■■■ 200.009
143+
XMLDict.xml_dict ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1350.63
144+
```
145+
146+
### Lazily Iterating over Each Node
147+
```
148+
LazyNode ■■■■■■■■■■■■■■■■ 55.1
149+
EzXML.StreamReader ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 142.515
150+
```
151+
152+
### Collecting All Names/Tags in an XML File
136153
```
137-
XML.Raw 0.010209
138-
XML.LazyNode 0.010333
139-
collect(XML.LazyNode) ■■ 75.811
140-
XML.Node ■■■■■■■■■■■■■■■■■■■■■■■■■■ 996.321
141-
EzXML.readxml ■■■■■ 198.103
142-
XMLDict.xml_dict ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1207.79
143-
XML.LazyNode iteration ■ 55.5357
144-
EzXML.StreamReader ■■■■ 141.868
154+
XML.LazyNode ■■■■■■■■■■■■■■■■■■■■■■■■■■ 152.298
155+
EzXML.StreamReader ■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 165.21
156+
EzXML.readxml ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 239.197
145157
```

benchmarks/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[deps]
22
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
33
EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
4+
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
45
UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
56
XML = "72c71f33-b9b6-44de-8c94-c961784809e2"
67
XMLDict = "228000da-037f-5747-90a9-8195ccbf91a5"

benchmarks/suite.jl

Lines changed: 87 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,113 @@
1-
using XML: XML
1+
using Pkg
2+
Pkg.activate(@__DIR__)
3+
4+
using XML
25
using EzXML: EzXML
36
using XMLDict: XMLDict
47
using BenchmarkTools
58
using DataFrames
69
using UnicodePlots
10+
using OrderedCollections: OrderedDict
11+
12+
13+
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 10
14+
BenchmarkTools.DEFAULT_PARAMETERS.samples = 20000
715

816

917
# nasa.xml was downloaded from:
1018
# http://aiweb.cs.washington.edu/research/projects/xmltk/xmldata/www/repository.html#nasa
1119
file = joinpath(@__DIR__, "nasa.xml")
1220

21+
df = DataFrame(kind=String[], name=String[], bench=BenchmarkTools.Trial[])
22+
23+
#-----------------------------------------------------------------------------# Read
24+
kind = "Read"
25+
26+
# name = "XML.Raw"
27+
# @info name
28+
# bench = @benchmark read($file, XML.Raw)
29+
# push!(df, (;kind, name, bench))
30+
31+
32+
name = "XML.LazyNode"
33+
@info name
34+
bench = @benchmark read($file, LazyNode)
35+
push!(df, (;kind, name, bench))
36+
37+
name = "XML.Node"
38+
@info name
39+
bench = @benchmark read($file, Node)
40+
push!(df, (;kind, name, bench))
1341

1442

15-
#-----------------------------------------------------------------------------# benchmarks
16-
benchmarks = []
43+
name = "EzXML.readxml"
44+
@info name
45+
bench = @benchmark EzXML.readxml($file)
46+
push!(df, (;kind, name, bench))
1747

18-
@info "XML.Raw"
19-
push!(benchmarks, "XML.Raw" => @benchmark(read($file, XML.Raw)))
2048

21-
@info "XML.LazyNode"
22-
push!(benchmarks, "XML.LazyNode" => @benchmark(read($file, LazyNode)))
49+
name = "XMLDict.xml_dict"
50+
@info name
51+
bench = @benchmark XMLDict.xml_dict(read($file, String))
52+
push!(df, (;kind, name, bench))
2353

24-
@info "collect(LazyNode)"
25-
push!(benchmarks, "collect(XML.LazyNode)" => @benchmark(collect(read($file, LazyNode))))
2654

27-
@info "XML.Node"
28-
push!(benchmarks, "XML.Node" => @benchmark(read($file, Node)))
55+
#-----------------------------------------------------------------------------# Lazy Iteration
56+
kind = "Lazy Iteration"
2957

30-
@info "EzXML"
31-
push!(benchmarks, "EzXML.readxml" => @benchmark(EzXML.readxml($file)))
58+
name = "for x in read(file, LazyNode); end"
59+
@info name
60+
bench = @benchmark (for x in read($file, LazyNode); end)
61+
push!(df, (;kind, name, bench))
3262

33-
@info "XMLDict"
34-
push!(benchmarks, "XMLDict.xml_dict" => @benchmark(XMLDict.xml_dict(read($file, String))))
3563

36-
@info "LazyNode iteration"
37-
push!(benchmarks, "XML.LazyNode iteration" => @benchmark((for x in read($file, LazyNode); end)))
64+
name = "for x in open(EzXML.StreamReader, file); end"
65+
@info name
66+
bench = @benchmark (reader = open(EzXML.StreamReader, $file); for x in reader; end; close(reader))
67+
push!(df, (;kind, name, bench))
3868

39-
@info "EzXML.StreamReader iteration"
40-
push!(benchmarks, "EzXML.StreamReader" => @benchmark((reader = open(EzXML.StreamReader, $file); for x in reader; end; close(reader))))
4169

42-
#-----------------------------------------------------------------------------# make DataFrame
43-
df = DataFrame()
70+
#-----------------------------------------------------------------------------# Lazy Iteration: Collect Tags
71+
kind = "Collect Tags"
4472

45-
for (name, bench) in benchmarks
46-
push!(df, (; name, bench))
73+
name = "via XML.LazyNode"
74+
@info name
75+
bench = @benchmark [tag(x) for x in o] setup=(o = read(file, LazyNode))
76+
push!(df, (;kind, name, bench))
77+
78+
79+
name = "via EzXML.StreamReader"
80+
@info name
81+
bench = @benchmark [r.name for x in r if x == EzXML.READER_ELEMENT] setup=(r=open(EzXML.StreamReader, file)) teardown=(close(r))
82+
push!(df, (;kind, name, bench))
83+
84+
85+
name = "via EzXML.readxml"
86+
@info name
87+
function get_tags(o::EzXML.Node)
88+
out = String[]
89+
for node in EzXML.eachelement(o)
90+
push!(out, node.name)
91+
for tag in get_tags(node)
92+
push!(out, tag)
93+
end
94+
end
95+
out
4796
end
97+
bench = @benchmark get_tags(o.root) setup=(o = EzXML.readxml(file))
98+
push!(df, (;kind, name, bench))
4899

49-
df
50100

51-
barplot(df.name, map(x -> median(x).time / 1000^2, df.bench), title="Median Benchmark Time (s)", border=:none)
101+
102+
#-----------------------------------------------------------------------------# Plots
103+
function plot(df, kind)
104+
g = groupby(df, :kind)
105+
sub = g[(;kind)]
106+
x = map(row -> "$(row.kind): $(row.name)", eachrow(sub))
107+
y = map(x -> median(x).time / 1000^2, sub.bench)
108+
display(barplot(x, y, title = "$kind Time (ms)", border=:none, width=50))
109+
end
110+
111+
plot(df, "Read")
112+
plot(df, "Lazy Iteration")
113+
plot(df, "Collect Tags")

src/raw.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,10 @@ struct Raw
6767
end
6868
Raw(data::Vector{UInt8}) = Raw(RawDocument, 0, 0, 0, data)
6969

70-
Base.read(filename::String, ::Type{Raw}) = Raw(Mmap.mmap(filename))
70+
Base.read(filename::String, ::Type{Raw}) = isfile(filename) ?
71+
Raw(Mmap.mmap(filename)) :
72+
error("File \"$filename\" does not exist.")
73+
7174
Base.read(io::IO, ::Type{Raw}) = Raw(read(io))
7275

7376
parse(x::AbstractString, ::Type{Raw}) = Raw(Vector{UInt8}(x))

0 commit comments

Comments
 (0)