Skip to content

Commit 45048b9

Browse files
committed
Only run perf once instead of per sample
Runing perf every sample would probably slow down all benchmarks significantly for no gain (as we only store the last recorded perf result).
1 parent fc4a385 commit 45048b9

File tree

4 files changed

+71
-96
lines changed

4 files changed

+71
-96
lines changed

src/execution.jl

Lines changed: 57 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ end
1616

1717
mutable struct Benchmark
1818
samplefunc
19+
linux_perf_func
1920
quote_vals
2021
params::Parameters
2122
end
@@ -113,15 +114,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...)
113114
start_time = Base.time()
114115
trial = Trial(params)
115116
params.gcsample && gcscrub()
116-
trial_contents = b.samplefunc(b.quote_vals, params)
117-
push!(trial, trial_contents)
118-
return_val = trial_contents.return_val
117+
s = b.samplefunc(b.quote_vals, params)
118+
push!(trial, s[1:(end - 1)]...)
119+
return_val = s[end]
119120
iters = 2
120121
while (Base.time() - start_time) < params.seconds && iters params.samples
121122
params.gcsample && gcscrub()
122-
push!(trial, b.samplefunc(b.quote_vals, params))
123+
push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
123124
iters += 1
124125
end
126+
127+
if p.experimental_enable_linux_perf
128+
params.gcsample && gcscrub()
129+
trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
130+
end
131+
125132
return trial, return_val
126133
end
127134

@@ -185,7 +192,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
185192
for evals in eachindex(estimates)
186193
params.gcsample && gcscrub()
187194
params.evals = evals
188-
estimates[evals] = b.samplefunc(b.quote_vals, params).time
195+
estimates[evals] = first(b.samplefunc(b.quote_vals, params))
189196
completed += 1
190197
((time() - start_time) > params.seconds) && break
191198
end
@@ -513,6 +520,7 @@ function generate_benchmark_definition(
513520
@nospecialize
514521
corefunc = gensym("core")
515522
samplefunc = gensym("sample")
523+
linux_perf_func = gensym("perf")
516524
type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
517525
signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
518526
signature_def = Expr(
@@ -579,64 +587,57 @@ function generate_benchmark_definition(
579587
__evals,
580588
),
581589
)
582-
if $(params.experimental_enable_linux_perf)
583-
# Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
584-
__linux_perf_groups = BenchmarkTools.LinuxPerf.set_default_spaces(
585-
$(params.linux_perf_options.events),
586-
$(params.linux_perf_options.spaces),
590+
return __time, __gctime, __memory, __allocs, __return_val
591+
end
592+
@noinline function $(linux_perf_func)(
593+
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
594+
)
595+
# Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
596+
__linux_perf_groups = $LinuxPerf.set_default_spaces(
597+
eval(__params.linux_perf_options.events),
598+
eval(__params.linux_perf_options.spaces),
599+
)
600+
__linux_perf_bench = nothing
601+
try
602+
__linux_perf_bench = $LinuxPerf.make_bench_threaded(
603+
__linux_perf_groups;
604+
threads=eval(__params.linux_perf_options.threads),
587605
)
588-
__linux_perf_bench = nothing
589-
try
590-
__linux_perf_bench = BenchmarkTools.LinuxPerf.make_bench_threaded(
591-
__linux_perf_groups;
592-
threads=$(params.linux_perf_options.threads),
593-
)
594-
catch e
595-
if e isa ErrorException &&
596-
startswith(e.msg, "perf_event_open error : ")
597-
@warn "Perf is disabled"
598-
else
599-
rethrow()
600-
end
606+
catch e
607+
if e isa ErrorException &&
608+
startswith(e.msg, "perf_event_open error : ")
609+
@warn "Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw.
610+
# Given we now more accurately determine if perf is available can we do away with this hack?
611+
else
612+
rethrow()
601613
end
614+
end
602615

603-
if !isnothing(__linux_perf_bench)
604-
try
605-
$(setup)
606-
BenchmarkTools.LinuxPerf.enable!(__linux_perf_bench)
607-
# We'll just run it one time.
608-
__return_val_2 = $(invocation)
609-
BenchmarkTools.LinuxPerf.disable!(__linux_perf_bench)
610-
# trick the compiler not to eliminate the code
611-
if rand() < 0
612-
__linux_perf_stats = __return_val_2
613-
else
614-
__linux_perf_stats = BenchmarkTools.LinuxPerf.Stats(
615-
__linux_perf_bench
616-
)
617-
end
618-
catch
619-
rethrow()
620-
finally
621-
close(__linux_perf_bench)
622-
$(teardown)
616+
if !isnothing(__linux_perf_bench)
617+
$(setup)
618+
try
619+
$LinuxPerf.enable!(__linux_perf_bench)
620+
# We'll just run it one time.
621+
__return_val_2 = $(invocation)
622+
$LinuxPerf.disable!(__linux_perf_bench)
623+
# trick the compiler not to eliminate the code
624+
if rand() < 0
625+
__linux_perf_stats = __return_val_2
626+
else
627+
__linux_perf_stats = $LinuxPerf.Stats(__linux_perf_bench)
623628
end
629+
return __linux_perf_stats
630+
catch
631+
rethrow()
632+
finally
633+
close(__linux_perf_bench)
634+
$(teardown)
624635
end
625-
else
626-
__return_val_2 = nothing
627-
__linux_perf_stats = nothing
628636
end
629-
return BenchmarkTools.TrialContents(
630-
__time,
631-
__gctime,
632-
__memory,
633-
__allocs,
634-
__return_val,
635-
__return_val_2,
636-
__linux_perf_stats,
637-
)
638637
end
639-
$BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
638+
$BenchmarkTools.Benchmark(
639+
$(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
640+
)
640641
end,
641642
)
642643
end

src/parameters.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function perf_available()
3030
try
3131
opts = DEFAULT_LINUX_PERF_OPTIONS
3232
groups = LinuxPerf.set_default_spaces(eval(opts.events), eval(opts.spaces))
33-
bench = LinuxPerf.make_bench_threaded(groups, threads = eval(opts.threads))
33+
bench = LinuxPerf.make_bench_threaded(groups; threads=eval(opts.threads))
3434
return true
3535
catch
3636
return false

src/trials.jl

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,6 @@ mutable struct Trial
1111
linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
1212
end
1313

14-
struct TrialContents{A,B}
15-
time::Float64
16-
gctime::Float64
17-
memory::Int
18-
allocs::Int
19-
return_val::A
20-
return_val_2::B
21-
linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
22-
end
23-
2414
function Trial(params::Parameters)
2515
return Trial(params, Float64[], Float64[], typemax(Int), typemax(Int), nothing)
2616
end
@@ -44,16 +34,11 @@ function Base.copy(t::Trial)
4434
)
4535
end
4636

47-
function Base.push!(t::Trial, trial_contents::TrialContents)
48-
time = trial_contents.time
49-
gctime = trial_contents.gctime
50-
memory = trial_contents.memory
51-
allocs = trial_contents.allocs
37+
function Base.push!(t::Trial, time, gctime, memory, allocs)
5238
push!(t.times, time)
5339
push!(t.gctimes, gctime)
5440
memory < t.memory && (t.memory = memory)
5541
allocs < t.allocs && (t.allocs = allocs)
56-
t.linux_perf_stats = trial_contents.linux_perf_stats
5742
return t
5843
end
5944

@@ -65,17 +50,8 @@ end
6550

6651
Base.length(t::Trial) = length(t.times)
6752
function Base.getindex(t::Trial, i::Number)
68-
return push!(
69-
Trial(t.params),
70-
TrialContents(
71-
t.times[i],
72-
t.gctimes[i],
73-
t.memory,
74-
t.allocs,
75-
nothing,
76-
nothing,
77-
t.linux_perf_stats,
78-
),
53+
return Trial(
54+
t.params, [t.times[i]], [t.gctimes[i]], t.memory, t.allocs, t.linux_perf_stats
7955
)
8056
end
8157
function Base.getindex(t::Trial, i)

test/TrialsTests.jl

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
module TrialsTests
22

33
using BenchmarkTools
4-
using BenchmarkTools: TrialContents
54
using Test
65

76
#########
87
# Trial #
98
#########
9+
1010
trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2))
11-
push!(trial1, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
12-
push!(trial1, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
11+
push!(trial1, 2, 1, 4, 5)
12+
push!(trial1, 21, 0, 41, 51)
1313

1414
trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; time_tolerance=0.15))
15-
push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
16-
push!(trial2, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
15+
push!(trial2, 21, 0, 41, 51)
16+
push!(trial2, 2, 1, 4, 5)
1717

18-
push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
18+
push!(trial2, 21, 0, 41, 51)
1919
@test length(trial2) == 3
2020
deleteat!(trial2, 3)
2121
@test length(trial1) == length(trial2) == 2
@@ -33,10 +33,8 @@ trial2.params = trial1.params
3333

3434
@test trial1 == trial2
3535

36-
@test trial1[2] == push!(
37-
BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)),
38-
TrialContents(21.0, 0.0, 4, 5, nothing, nothing, nothing),
39-
)
36+
@test trial1[2] ==
37+
push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), 21, 0, 4, 5)
4038
@test trial1[1:end] == trial1
4139

4240
@test time(trial1) == time(trial2) == 2.0
@@ -63,11 +61,11 @@ rmskew!(trial3)
6361
randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters())
6462

6563
for _ in 1:40
66-
push!(randtrial, TrialContents(rand(1.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
64+
push!(randtrial, rand(1:20), 1, 1, 1)
6765
end
6866

6967
while mean(randtrial) <= median(randtrial)
70-
push!(randtrial, TrialContents(rand(10.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
68+
push!(randtrial, rand(10:20), 1, 1, 1)
7169
end
7270

7371
rmskew!(randtrial)

0 commit comments

Comments
 (0)