Skip to content

Commit d1a695e

Browse files
committed
Only run perf once instead of per sample
Runing perf every sample would probably slow down all benchmarks significantly for no gain (as we only store the last recorded perf result).
1 parent 8d6413f commit d1a695e

File tree

4 files changed

+71
-96
lines changed

4 files changed

+71
-96
lines changed

src/execution.jl

Lines changed: 57 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ end
1616

1717
mutable struct Benchmark
1818
samplefunc
19+
linux_perf_func
1920
quote_vals
2021
params::Parameters
2122
end
@@ -116,15 +117,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k
116117
b.samplefunc(b.quote_vals, Parameters(params; evals=1)) #warmup sample
117118
end
118119
params.gcsample && gcscrub()
119-
trial_contents = b.samplefunc(b.quote_vals, params)
120-
push!(trial, trial_contents)
121-
return_val = trial_contents.return_val
120+
s = b.samplefunc(b.quote_vals, params)
121+
push!(trial, s[1:(end - 1)]...)
122+
return_val = s[end]
122123
iters = 2
123124
while (Base.time() - start_time) < params.seconds && iters params.samples
124125
params.gcsample && gcscrub()
125-
push!(trial, b.samplefunc(b.quote_vals, params))
126+
push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
126127
iters += 1
127128
end
129+
130+
if p.experimental_enable_linux_perf
131+
params.gcsample && gcscrub()
132+
trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
133+
end
134+
128135
return trial, return_val
129136
end
130137

@@ -190,7 +197,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
190197
for evals in eachindex(estimates)
191198
params.gcsample && gcscrub()
192199
params.evals = evals
193-
estimates[evals] = b.samplefunc(b.quote_vals, params).time
200+
estimates[evals] = first(b.samplefunc(b.quote_vals, params))
194201
completed += 1
195202
((time() - start_time) > params.seconds) && break
196203
end
@@ -520,6 +527,7 @@ function generate_benchmark_definition(
520527
@nospecialize
521528
corefunc = gensym("core")
522529
samplefunc = gensym("sample")
530+
linux_perf_func = gensym("perf")
523531
type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
524532
signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
525533
signature_def = Expr(
@@ -586,64 +594,57 @@ function generate_benchmark_definition(
586594
__evals,
587595
),
588596
)
589-
if $(params.experimental_enable_linux_perf)
590-
# Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
591-
__linux_perf_groups = BenchmarkTools.LinuxPerf.set_default_spaces(
592-
$(params.linux_perf_options.events),
593-
$(params.linux_perf_options.spaces),
597+
return __time, __gctime, __memory, __allocs, __return_val
598+
end
599+
@noinline function $(linux_perf_func)(
600+
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
601+
)
602+
# Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
603+
__linux_perf_groups = $LinuxPerf.set_default_spaces(
604+
eval(__params.linux_perf_options.events),
605+
eval(__params.linux_perf_options.spaces),
606+
)
607+
__linux_perf_bench = nothing
608+
try
609+
__linux_perf_bench = $LinuxPerf.make_bench_threaded(
610+
__linux_perf_groups;
611+
threads=eval(__params.linux_perf_options.threads),
594612
)
595-
__linux_perf_bench = nothing
596-
try
597-
__linux_perf_bench = BenchmarkTools.LinuxPerf.make_bench_threaded(
598-
__linux_perf_groups;
599-
threads=$(params.linux_perf_options.threads),
600-
)
601-
catch e
602-
if e isa ErrorException &&
603-
startswith(e.msg, "perf_event_open error : ")
604-
@warn "Perf is disabled"
605-
else
606-
rethrow()
607-
end
613+
catch e
614+
if e isa ErrorException &&
615+
startswith(e.msg, "perf_event_open error : ")
616+
@warn "Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw.
617+
# Given we now more accurately determine if perf is available can we do away with this hack?
618+
else
619+
rethrow()
608620
end
621+
end
609622

610-
if !isnothing(__linux_perf_bench)
611-
try
612-
$(setup)
613-
BenchmarkTools.LinuxPerf.enable!(__linux_perf_bench)
614-
# We'll just run it one time.
615-
__return_val_2 = $(invocation)
616-
BenchmarkTools.LinuxPerf.disable!(__linux_perf_bench)
617-
# trick the compiler not to eliminate the code
618-
if rand() < 0
619-
__linux_perf_stats = __return_val_2
620-
else
621-
__linux_perf_stats = BenchmarkTools.LinuxPerf.Stats(
622-
__linux_perf_bench
623-
)
624-
end
625-
catch
626-
rethrow()
627-
finally
628-
close(__linux_perf_bench)
629-
$(teardown)
623+
if !isnothing(__linux_perf_bench)
624+
$(setup)
625+
try
626+
$LinuxPerf.enable!(__linux_perf_bench)
627+
# We'll just run it one time.
628+
__return_val_2 = $(invocation)
629+
$LinuxPerf.disable!(__linux_perf_bench)
630+
# trick the compiler not to eliminate the code
631+
if rand() < 0
632+
__linux_perf_stats = __return_val_2
633+
else
634+
__linux_perf_stats = $LinuxPerf.Stats(__linux_perf_bench)
630635
end
636+
return __linux_perf_stats
637+
catch
638+
rethrow()
639+
finally
640+
close(__linux_perf_bench)
641+
$(teardown)
631642
end
632-
else
633-
__return_val_2 = nothing
634-
__linux_perf_stats = nothing
635643
end
636-
return BenchmarkTools.TrialContents(
637-
__time,
638-
__gctime,
639-
__memory,
640-
__allocs,
641-
__return_val,
642-
__return_val_2,
643-
__linux_perf_stats,
644-
)
645644
end
646-
$BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
645+
$BenchmarkTools.Benchmark(
646+
$(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
647+
)
647648
end,
648649
)
649650
end

src/parameters.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function perf_available()
3030
try
3131
opts = DEFAULT_LINUX_PERF_OPTIONS
3232
groups = LinuxPerf.set_default_spaces(eval(opts.events), eval(opts.spaces))
33-
bench = LinuxPerf.make_bench_threaded(groups, threads = eval(opts.threads))
33+
bench = LinuxPerf.make_bench_threaded(groups; threads=eval(opts.threads))
3434
return true
3535
catch
3636
return false

src/trials.jl

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,6 @@ mutable struct Trial
1111
linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
1212
end
1313

14-
struct TrialContents{A,B}
15-
time::Float64
16-
gctime::Float64
17-
memory::Int
18-
allocs::Int
19-
return_val::A
20-
return_val_2::B
21-
linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
22-
end
23-
2414
function Trial(params::Parameters)
2515
return Trial(params, Float64[], Float64[], typemax(Int), typemax(Int), nothing)
2616
end
@@ -44,16 +34,11 @@ function Base.copy(t::Trial)
4434
)
4535
end
4636

47-
function Base.push!(t::Trial, trial_contents::TrialContents)
48-
time = trial_contents.time
49-
gctime = trial_contents.gctime
50-
memory = trial_contents.memory
51-
allocs = trial_contents.allocs
37+
function Base.push!(t::Trial, time, gctime, memory, allocs)
5238
push!(t.times, time)
5339
push!(t.gctimes, gctime)
5440
memory < t.memory && (t.memory = memory)
5541
allocs < t.allocs && (t.allocs = allocs)
56-
t.linux_perf_stats = trial_contents.linux_perf_stats
5742
return t
5843
end
5944

@@ -65,17 +50,8 @@ end
6550

6651
Base.length(t::Trial) = length(t.times)
6752
function Base.getindex(t::Trial, i::Number)
68-
return push!(
69-
Trial(t.params),
70-
TrialContents(
71-
t.times[i],
72-
t.gctimes[i],
73-
t.memory,
74-
t.allocs,
75-
nothing,
76-
nothing,
77-
t.linux_perf_stats,
78-
),
53+
return Trial(
54+
t.params, [t.times[i]], [t.gctimes[i]], t.memory, t.allocs, t.linux_perf_stats
7955
)
8056
end
8157
function Base.getindex(t::Trial, i)

test/TrialsTests.jl

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
module TrialsTests
22

33
using BenchmarkTools
4-
using BenchmarkTools: TrialContents
54
using Test
65

76
#########
87
# Trial #
98
#########
9+
1010
trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2))
11-
push!(trial1, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
12-
push!(trial1, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
11+
push!(trial1, 2, 1, 4, 5)
12+
push!(trial1, 21, 0, 41, 51)
1313

1414
trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; time_tolerance=0.15))
15-
push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
16-
push!(trial2, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
15+
push!(trial2, 21, 0, 41, 51)
16+
push!(trial2, 2, 1, 4, 5)
1717

18-
push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
18+
push!(trial2, 21, 0, 41, 51)
1919
@test length(trial2) == 3
2020
deleteat!(trial2, 3)
2121
@test length(trial1) == length(trial2) == 2
@@ -33,10 +33,8 @@ trial2.params = trial1.params
3333

3434
@test trial1 == trial2
3535

36-
@test trial1[2] == push!(
37-
BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)),
38-
TrialContents(21.0, 0.0, 4, 5, nothing, nothing, nothing),
39-
)
36+
@test trial1[2] ==
37+
push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), 21, 0, 4, 5)
4038
@test trial1[1:end] == trial1
4139

4240
@test time(trial1) == time(trial2) == 2.0
@@ -63,11 +61,11 @@ rmskew!(trial3)
6361
randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters())
6462

6563
for _ in 1:40
66-
push!(randtrial, TrialContents(rand(1.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
64+
push!(randtrial, rand(1:20), 1, 1, 1)
6765
end
6866

6967
while mean(randtrial) <= median(randtrial)
70-
push!(randtrial, TrialContents(rand(10.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
68+
push!(randtrial, rand(10:20), 1, 1, 1)
7169
end
7270

7371
rmskew!(randtrial)

0 commit comments

Comments
 (0)