|
16 | 16 |
|
17 | 17 | mutable struct Benchmark
|
18 | 18 | samplefunc
|
| 19 | + linux_perf_func |
19 | 20 | quote_vals
|
20 | 21 | params::Parameters
|
21 | 22 | end
|
@@ -124,6 +125,12 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k
|
124 | 125 | push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
|
125 | 126 | iters += 1
|
126 | 127 | end
|
| 128 | + |
| 129 | + if params.enable_linux_perf |
| 130 | + params.linux_perf_gcscrub && gcscrub() |
| 131 | + trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params) |
| 132 | + end |
| 133 | + |
127 | 134 | return trial, return_val
|
128 | 135 | end
|
129 | 136 |
|
@@ -519,6 +526,7 @@ function generate_benchmark_definition(
|
519 | 526 | @nospecialize
|
520 | 527 | corefunc = gensym("core")
|
521 | 528 | samplefunc = gensym("sample")
|
| 529 | + linux_perf_func = gensym("perf") |
522 | 530 | type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
|
523 | 531 | signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
|
524 | 532 | signature_def = Expr(
|
@@ -587,7 +595,43 @@ function generate_benchmark_definition(
|
587 | 595 | )
|
588 | 596 | return __time, __gctime, __memory, __allocs, __return_val
|
589 | 597 | end
|
590 |
| - $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params)) |
| 598 | + @noinline function $(linux_perf_func)( |
| 599 | + $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters |
| 600 | + ) |
| 601 | + # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061 |
| 602 | + __linux_perf_groups = $LinuxPerf.set_default_spaces( |
| 603 | + $LinuxPerf.parse_groups(__params.linux_perf_groups), |
| 604 | + __params.linux_perf_spaces, |
| 605 | + ) |
| 606 | + __linux_perf_bench = $LinuxPerf.make_bench_threaded( |
| 607 | + __linux_perf_groups; threads=__params.linux_perf_threads |
| 608 | + ) |
| 609 | + |
| 610 | + try |
| 611 | + @noinline $(setup) |
| 612 | + __evals = __params.evals |
| 613 | + # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation |
| 614 | + # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions |
| 615 | + @noinline (function (__evals) |
| 616 | + $LinuxPerf.enable_all!() |
| 617 | + # We'll run it evals times. |
| 618 | + @noinline __return_val_2 = $(invocation) |
| 619 | + for __iter in 2:__evals |
| 620 | + @noinline $(invocation) |
| 621 | + end |
| 622 | + $LinuxPerf.disable_all!() |
| 623 | + # trick the compiler not to eliminate the code |
| 624 | + return __return_val_2 |
| 625 | + end)(__evals) |
| 626 | + return $LinuxPerf.Stats(__linux_perf_bench) |
| 627 | + finally |
| 628 | + close(__linux_perf_bench) |
| 629 | + $(teardown) |
| 630 | + end |
| 631 | + end |
| 632 | + $BenchmarkTools.Benchmark( |
| 633 | + $(samplefunc), $(linux_perf_func), $(quote_vals), $(params) |
| 634 | + ) |
591 | 635 | end,
|
592 | 636 | )
|
593 | 637 | end
|
|
0 commit comments