Skip to content

Commit 504eed3

Browse files
committed
Only run perf once instead of per sample
Runing perf every sample would probably slow down all benchmarks significantly for no gain (as we only store the last recorded perf result).
1 parent 2bcc4ee commit 504eed3

File tree

4 files changed

+71
-96
lines changed

4 files changed

+71
-96
lines changed

src/execution.jl

Lines changed: 57 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ gcscrub() = (GC.gc(); GC.gc(); GC.gc(); GC.gc())
99

1010
mutable struct Benchmark
1111
samplefunc
12+
linux_perf_func
1213
quote_vals
1314
params::Parameters
1415
end
@@ -106,15 +107,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...)
106107
start_time = Base.time()
107108
trial = Trial(params)
108109
params.gcsample && gcscrub()
109-
trial_contents = b.samplefunc(b.quote_vals, params)
110-
push!(trial, trial_contents)
111-
return_val = trial_contents.return_val
110+
s = b.samplefunc(b.quote_vals, params)
111+
push!(trial, s[1:(end - 1)]...)
112+
return_val = s[end]
112113
iters = 2
113114
while (Base.time() - start_time) < params.seconds && iters params.samples
114115
params.gcsample && gcscrub()
115-
push!(trial, b.samplefunc(b.quote_vals, params))
116+
push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
116117
iters += 1
117118
end
119+
120+
if p.experimental_enable_linux_perf
121+
params.gcsample && gcscrub()
122+
trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
123+
end
124+
118125
return trial, return_val
119126
end
120127

@@ -178,7 +185,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
178185
for evals in eachindex(estimates)
179186
params.gcsample && gcscrub()
180187
params.evals = evals
181-
estimates[evals] = b.samplefunc(b.quote_vals, params).time
188+
estimates[evals] = first(b.samplefunc(b.quote_vals, params))
182189
completed += 1
183190
((time() - start_time) > params.seconds) && break
184191
end
@@ -506,6 +513,7 @@ function generate_benchmark_definition(
506513
@nospecialize
507514
corefunc = gensym("core")
508515
samplefunc = gensym("sample")
516+
linux_perf_func = gensym("perf")
509517
type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
510518
signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
511519
signature_def = Expr(
@@ -572,64 +580,57 @@ function generate_benchmark_definition(
572580
__evals,
573581
),
574582
)
575-
if $(params.experimental_enable_linux_perf)
576-
# Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
577-
__linux_perf_groups = BenchmarkTools.LinuxPerf.set_default_spaces(
578-
$(params.linux_perf_options.events),
579-
$(params.linux_perf_options.spaces),
583+
return __time, __gctime, __memory, __allocs, __return_val
584+
end
585+
@noinline function $(linux_perf_func)(
586+
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
587+
)
588+
# Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
589+
__linux_perf_groups = $LinuxPerf.set_default_spaces(
590+
eval(__params.linux_perf_options.events),
591+
eval(__params.linux_perf_options.spaces),
592+
)
593+
__linux_perf_bench = nothing
594+
try
595+
__linux_perf_bench = $LinuxPerf.make_bench_threaded(
596+
__linux_perf_groups;
597+
threads=eval(__params.linux_perf_options.threads),
580598
)
581-
__linux_perf_bench = nothing
582-
try
583-
__linux_perf_bench = BenchmarkTools.LinuxPerf.make_bench_threaded(
584-
__linux_perf_groups;
585-
threads=$(params.linux_perf_options.threads),
586-
)
587-
catch e
588-
if e isa ErrorException &&
589-
startswith(e.msg, "perf_event_open error : ")
590-
@warn "Perf is disabled"
591-
else
592-
rethrow()
593-
end
599+
catch e
600+
if e isa ErrorException &&
601+
startswith(e.msg, "perf_event_open error : ")
602+
@warn "Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw.
603+
# Given we now more accurately determine if perf is available can we do away with this hack?
604+
else
605+
rethrow()
594606
end
607+
end
595608

596-
if !isnothing(__linux_perf_bench)
597-
try
598-
$(setup)
599-
BenchmarkTools.LinuxPerf.enable!(__linux_perf_bench)
600-
# We'll just run it one time.
601-
__return_val_2 = $(invocation)
602-
BenchmarkTools.LinuxPerf.disable!(__linux_perf_bench)
603-
# trick the compiler not to eliminate the code
604-
if rand() < 0
605-
__linux_perf_stats = __return_val_2
606-
else
607-
__linux_perf_stats = BenchmarkTools.LinuxPerf.Stats(
608-
__linux_perf_bench
609-
)
610-
end
611-
catch
612-
rethrow()
613-
finally
614-
close(__linux_perf_bench)
615-
$(teardown)
609+
if !isnothing(__linux_perf_bench)
610+
$(setup)
611+
try
612+
$LinuxPerf.enable!(__linux_perf_bench)
613+
# We'll just run it one time.
614+
__return_val_2 = $(invocation)
615+
$LinuxPerf.disable!(__linux_perf_bench)
616+
# trick the compiler not to eliminate the code
617+
if rand() < 0
618+
__linux_perf_stats = __return_val_2
619+
else
620+
__linux_perf_stats = $LinuxPerf.Stats(__linux_perf_bench)
616621
end
622+
return __linux_perf_stats
623+
catch
624+
rethrow()
625+
finally
626+
close(__linux_perf_bench)
627+
$(teardown)
617628
end
618-
else
619-
__return_val_2 = nothing
620-
__linux_perf_stats = nothing
621629
end
622-
return BenchmarkTools.TrialContents(
623-
__time,
624-
__gctime,
625-
__memory,
626-
__allocs,
627-
__return_val,
628-
__return_val_2,
629-
__linux_perf_stats,
630-
)
631630
end
632-
$BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
631+
$BenchmarkTools.Benchmark(
632+
$(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
633+
)
633634
end,
634635
)
635636
end

src/parameters.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function perf_available()
3030
try
3131
opts = DEFAULT_LINUX_PERF_OPTIONS
3232
groups = LinuxPerf.set_default_spaces(eval(opts.events), eval(opts.spaces))
33-
bench = LinuxPerf.make_bench_threaded(groups, threads = eval(opts.threads))
33+
bench = LinuxPerf.make_bench_threaded(groups; threads=eval(opts.threads))
3434
return true
3535
catch
3636
return false

src/trials.jl

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,6 @@ mutable struct Trial
1111
linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
1212
end
1313

14-
struct TrialContents{A,B}
15-
time::Float64
16-
gctime::Float64
17-
memory::Int
18-
allocs::Int
19-
return_val::A
20-
return_val_2::B
21-
linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
22-
end
23-
2414
function Trial(params::Parameters)
2515
return Trial(params, Float64[], Float64[], typemax(Int), typemax(Int), nothing)
2616
end
@@ -44,16 +34,11 @@ function Base.copy(t::Trial)
4434
)
4535
end
4636

47-
function Base.push!(t::Trial, trial_contents::TrialContents)
48-
time = trial_contents.time
49-
gctime = trial_contents.gctime
50-
memory = trial_contents.memory
51-
allocs = trial_contents.allocs
37+
function Base.push!(t::Trial, time, gctime, memory, allocs)
5238
push!(t.times, time)
5339
push!(t.gctimes, gctime)
5440
memory < t.memory && (t.memory = memory)
5541
allocs < t.allocs && (t.allocs = allocs)
56-
t.linux_perf_stats = trial_contents.linux_perf_stats
5742
return t
5843
end
5944

@@ -65,17 +50,8 @@ end
6550

6651
Base.length(t::Trial) = length(t.times)
6752
function Base.getindex(t::Trial, i::Number)
68-
return push!(
69-
Trial(t.params),
70-
TrialContents(
71-
t.times[i],
72-
t.gctimes[i],
73-
t.memory,
74-
t.allocs,
75-
nothing,
76-
nothing,
77-
t.linux_perf_stats,
78-
),
53+
return Trial(
54+
t.params, [t.times[i]], [t.gctimes[i]], t.memory, t.allocs, t.linux_perf_stats
7955
)
8056
end
8157
function Base.getindex(t::Trial, i)

test/TrialsTests.jl

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
module TrialsTests
22

33
using BenchmarkTools
4-
using BenchmarkTools: TrialContents
54
using Test
65

76
#########
87
# Trial #
98
#########
9+
1010
trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2))
11-
push!(trial1, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
12-
push!(trial1, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
11+
push!(trial1, 2, 1, 4, 5)
12+
push!(trial1, 21, 0, 41, 51)
1313

1414
trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; time_tolerance=0.15))
15-
push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
16-
push!(trial2, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
15+
push!(trial2, 21, 0, 41, 51)
16+
push!(trial2, 2, 1, 4, 5)
1717

18-
push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
18+
push!(trial2, 21, 0, 41, 51)
1919
@test length(trial2) == 3
2020
deleteat!(trial2, 3)
2121
@test length(trial1) == length(trial2) == 2
@@ -33,10 +33,8 @@ trial2.params = trial1.params
3333

3434
@test trial1 == trial2
3535

36-
@test trial1[2] == push!(
37-
BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)),
38-
TrialContents(21.0, 0.0, 4, 5, nothing, nothing, nothing),
39-
)
36+
@test trial1[2] ==
37+
push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), 21, 0, 4, 5)
4038
@test trial1[1:end] == trial1
4139

4240
@test time(trial1) == time(trial2) == 2.0
@@ -63,11 +61,11 @@ rmskew!(trial3)
6361
randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters())
6462

6563
for _ in 1:40
66-
push!(randtrial, TrialContents(rand(1.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
64+
push!(randtrial, rand(1:20), 1, 1, 1)
6765
end
6866

6967
while mean(randtrial) <= median(randtrial)
70-
push!(randtrial, TrialContents(rand(10.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
68+
push!(randtrial, rand(10:20), 1, 1, 1)
7169
end
7270

7371
rmskew!(randtrial)

0 commit comments

Comments
 (0)