Only run perf once instead of per sample

Zentrik · Zentrik · commit 504eed38b15c · 2023-12-30T22:55:58.000Z
Runing perf every sample would probably slow down all benchmarks significantly for no gain (as we only store the last recorded perf result).
diff --git a/src/execution.jl b/src/execution.jl
@@ -9,6 +9,7 @@ gcscrub() = (GC.gc(); GC.gc(); GC.gc(); GC.gc())
 
 mutable struct Benchmark
     samplefunc
+    linux_perf_func
     quote_vals
     params::Parameters
 end
@@ -106,15 +107,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...)
     start_time = Base.time()
     trial = Trial(params)
     params.gcsample && gcscrub()
-    trial_contents = b.samplefunc(b.quote_vals, params)
-    push!(trial, trial_contents)
-    return_val = trial_contents.return_val
+    s = b.samplefunc(b.quote_vals, params)
+    push!(trial, s[1:(end - 1)]...)
+    return_val = s[end]
     iters = 2
     while (Base.time() - start_time) < params.seconds && iters ≤ params.samples
         params.gcsample && gcscrub()
-        push!(trial, b.samplefunc(b.quote_vals, params))
+        push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
         iters += 1
     end
+
+    if p.experimental_enable_linux_perf
+        params.gcsample && gcscrub()
+        trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
+    end
+
     return trial, return_val
 end
 
@@ -178,7 +185,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
     for evals in eachindex(estimates)
         params.gcsample && gcscrub()
         params.evals = evals
-        estimates[evals] = b.samplefunc(b.quote_vals, params).time
+        estimates[evals] = first(b.samplefunc(b.quote_vals, params))
         completed += 1
         ((time() - start_time) > params.seconds) && break
     end
@@ -506,6 +513,7 @@ function generate_benchmark_definition(
     @nospecialize
     corefunc = gensym("core")
     samplefunc = gensym("sample")
+    linux_perf_func = gensym("perf")
     type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
     signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
     signature_def = Expr(
@@ -572,64 +580,57 @@ function generate_benchmark_definition(
                         __evals,
                     ),
                 )
-                if $(params.experimental_enable_linux_perf)
-                    # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
-                    __linux_perf_groups = BenchmarkTools.LinuxPerf.set_default_spaces(
-                        $(params.linux_perf_options.events),
-                        $(params.linux_perf_options.spaces),
+                return __time, __gctime, __memory, __allocs, __return_val
+            end
+            @noinline function $(linux_perf_func)(
+                $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
+            )
+                # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
+                __linux_perf_groups = $LinuxPerf.set_default_spaces(
+                    eval(__params.linux_perf_options.events),
+                    eval(__params.linux_perf_options.spaces),
+                )
+                __linux_perf_bench = nothing
+                try
+                    __linux_perf_bench = $LinuxPerf.make_bench_threaded(
+                        __linux_perf_groups;
+                        threads=eval(__params.linux_perf_options.threads),
                     )
-                    __linux_perf_bench = nothing
-                    try
-                        __linux_perf_bench = BenchmarkTools.LinuxPerf.make_bench_threaded(
-                            __linux_perf_groups;
-                            threads=$(params.linux_perf_options.threads),
-                        )
-                    catch e
-                        if e isa ErrorException &&
-                            startswith(e.msg, "perf_event_open error : ")
-                            @warn "Perf is disabled"
-                        else
-                            rethrow()
-                        end
+                catch e
+                    if e isa ErrorException &&
+                        startswith(e.msg, "perf_event_open error : ")
+                        @warn "Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw.
+                    # Given we now more accurately determine if perf is available can we do away with this hack?
+                    else
+                        rethrow()
                     end
+                end
 
-                    if !isnothing(__linux_perf_bench)
-                        try
-                            $(setup)
-                            BenchmarkTools.LinuxPerf.enable!(__linux_perf_bench)
-                            # We'll just run it one time.
-                            __return_val_2 = $(invocation)
-                            BenchmarkTools.LinuxPerf.disable!(__linux_perf_bench)
-                            # trick the compiler not to eliminate the code
-                            if rand() < 0
-                                __linux_perf_stats = __return_val_2
-                            else
-                                __linux_perf_stats = BenchmarkTools.LinuxPerf.Stats(
-                                    __linux_perf_bench
-                                )
-                            end
-                        catch
-                            rethrow()
-                        finally
-                            close(__linux_perf_bench)
-                            $(teardown)
+                if !isnothing(__linux_perf_bench)
+                    $(setup)
+                    try
+                        $LinuxPerf.enable!(__linux_perf_bench)
+                        # We'll just run it one time.
+                        __return_val_2 = $(invocation)
+                        $LinuxPerf.disable!(__linux_perf_bench)
+                        # trick the compiler not to eliminate the code
+                        if rand() < 0
+                            __linux_perf_stats = __return_val_2
+                        else
+                            __linux_perf_stats = $LinuxPerf.Stats(__linux_perf_bench)
                         end
+                        return __linux_perf_stats
+                    catch
+                        rethrow()
+                    finally
+                        close(__linux_perf_bench)
+                        $(teardown)
                     end
-                else
-                    __return_val_2 = nothing
-                    __linux_perf_stats = nothing
                 end
-                return BenchmarkTools.TrialContents(
-                    __time,
-                    __gctime,
-                    __memory,
-                    __allocs,
-                    __return_val,
-                    __return_val_2,
-                    __linux_perf_stats,
-                )
             end
-            $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
+            $BenchmarkTools.Benchmark(
+                $(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
+            )
         end,
     )
 end
diff --git a/src/parameters.jl b/src/parameters.jl
@@ -30,7 +30,7 @@ function perf_available()
     try
         opts = DEFAULT_LINUX_PERF_OPTIONS
         groups = LinuxPerf.set_default_spaces(eval(opts.events), eval(opts.spaces))
-        bench = LinuxPerf.make_bench_threaded(groups, threads = eval(opts.threads))
+        bench = LinuxPerf.make_bench_threaded(groups; threads=eval(opts.threads))
         return true
     catch
         return false
diff --git a/src/trials.jl b/src/trials.jl
@@ -11,16 +11,6 @@ mutable struct Trial
     linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
 end
 
-struct TrialContents{A,B}
-    time::Float64
-    gctime::Float64
-    memory::Int
-    allocs::Int
-    return_val::A
-    return_val_2::B
-    linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
-end
-
 function Trial(params::Parameters)
     return Trial(params, Float64[], Float64[], typemax(Int), typemax(Int), nothing)
 end
@@ -44,16 +34,11 @@ function Base.copy(t::Trial)
     )
 end
 
-function Base.push!(t::Trial, trial_contents::TrialContents)
-    time = trial_contents.time
-    gctime = trial_contents.gctime
-    memory = trial_contents.memory
-    allocs = trial_contents.allocs
+function Base.push!(t::Trial, time, gctime, memory, allocs)
     push!(t.times, time)
     push!(t.gctimes, gctime)
     memory < t.memory && (t.memory = memory)
     allocs < t.allocs && (t.allocs = allocs)
-    t.linux_perf_stats = trial_contents.linux_perf_stats
     return t
 end
 
@@ -65,17 +50,8 @@ end
 
 Base.length(t::Trial) = length(t.times)
 function Base.getindex(t::Trial, i::Number)
-    return push!(
-        Trial(t.params),
-        TrialContents(
-            t.times[i],
-            t.gctimes[i],
-            t.memory,
-            t.allocs,
-            nothing,
-            nothing,
-            t.linux_perf_stats,
-        ),
+    return Trial(
+        t.params, [t.times[i]], [t.gctimes[i]], t.memory, t.allocs, t.linux_perf_stats
     )
 end
 function Base.getindex(t::Trial, i)
diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl
@@ -1,21 +1,21 @@
 module TrialsTests
 
 using BenchmarkTools
-using BenchmarkTools: TrialContents
 using Test
 
 #########
 # Trial #
 #########
+
 trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2))
-push!(trial1, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
-push!(trial1, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
+push!(trial1, 2, 1, 4, 5)
+push!(trial1, 21, 0, 41, 51)
 
 trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; time_tolerance=0.15))
-push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
-push!(trial2, TrialContents(2.0, 1.0, 4, 5, nothing, nothing, nothing))
+push!(trial2, 21, 0, 41, 51)
+push!(trial2, 2, 1, 4, 5)
 
-push!(trial2, TrialContents(21.0, 0.0, 41, 51, nothing, nothing, nothing))
+push!(trial2, 21, 0, 41, 51)
 @test length(trial2) == 3
 deleteat!(trial2, 3)
 @test length(trial1) == length(trial2) == 2
@@ -33,10 +33,8 @@ trial2.params = trial1.params
 
 @test trial1 == trial2
 
-@test trial1[2] == push!(
-    BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)),
-    TrialContents(21.0, 0.0, 4, 5, nothing, nothing, nothing),
-)
+@test trial1[2] ==
+    push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), 21, 0, 4, 5)
 @test trial1[1:end] == trial1
 
 @test time(trial1) == time(trial2) == 2.0
@@ -63,11 +61,11 @@ rmskew!(trial3)
 randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters())
 
 for _ in 1:40
-    push!(randtrial, TrialContents(rand(1.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
+    push!(randtrial, rand(1:20), 1, 1, 1)
 end
 
 while mean(randtrial) <= median(randtrial)
-    push!(randtrial, TrialContents(rand(10.0:20.0), 1.0, 1, 1, nothing, nothing, nothing))
+    push!(randtrial, rand(10:20), 1, 1, 1)
 end
 
 rmskew!(randtrial)