Run benchmark under perf if enabled

Zentrik · Zentrik · commit 8179bc5ee66c · 2024-06-20T10:41:05.000+01:00
diff --git a/src/execution.jl b/src/execution.jl
@@ -16,6 +16,7 @@ end
 
 mutable struct Benchmark
     samplefunc
+    linux_perf_func
     quote_vals
     params::Parameters
 end
@@ -124,6 +125,12 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k
         push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
         iters += 1
     end
+
+    if params.enable_linux_perf
+        params.linux_perf_gcscrub && gcscrub()
+        trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
+    end
+
     return trial, return_val
 end
 
@@ -519,6 +526,7 @@ function generate_benchmark_definition(
     @nospecialize
     corefunc = gensym("core")
     samplefunc = gensym("sample")
+    linux_perf_func = gensym("perf")
     type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
     signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
     signature_def = Expr(
@@ -587,7 +595,43 @@ function generate_benchmark_definition(
                 )
                 return __time, __gctime, __memory, __allocs, __return_val
             end
-            $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
+            @noinline function $(linux_perf_func)(
+                $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
+            )
+                # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
+                __linux_perf_groups = $LinuxPerf.set_default_spaces(
+                    $LinuxPerf.parse_groups(__params.linux_perf_groups),
+                    __params.linux_perf_spaces,
+                )
+                __linux_perf_bench = $LinuxPerf.make_bench_threaded(
+                    __linux_perf_groups; threads=__params.linux_perf_threads
+                )
+
+                try
+                    @noinline $(setup)
+                    __evals = __params.evals
+                    # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
+                    # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
+                    @noinline (function (__evals)
+                        $LinuxPerf.enable_all!()
+                        # We'll run it evals times.
+                        @noinline __return_val_2 = $(invocation)
+                        for __iter in 2:__evals
+                            @noinline $(invocation)
+                        end
+                        $LinuxPerf.disable_all!()
+                        # trick the compiler not to eliminate the code
+                        return __return_val_2
+                    end)(__evals)
+                    return $LinuxPerf.Stats(__linux_perf_bench)
+                finally
+                    close(__linux_perf_bench)
+                    $(teardown)
+                end
+            end
+            $BenchmarkTools.Benchmark(
+                $(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
+            )
         end,
     )
 end