|
16 | 16 |
|
17 | 17 | mutable struct Benchmark
|
18 | 18 | samplefunc
|
| 19 | + linux_perf_func |
19 | 20 | quote_vals
|
20 | 21 | params::Parameters
|
21 | 22 | end
|
@@ -113,15 +114,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...)
|
113 | 114 | start_time = Base.time()
|
114 | 115 | trial = Trial(params)
|
115 | 116 | params.gcsample && gcscrub()
|
116 |
| - trial_contents = b.samplefunc(b.quote_vals, params) |
117 |
| - push!(trial, trial_contents) |
118 |
| - return_val = trial_contents.return_val |
| 117 | + s = b.samplefunc(b.quote_vals, params) |
| 118 | + push!(trial, s[1:(end - 1)]...) |
| 119 | + return_val = s[end] |
119 | 120 | iters = 2
|
120 | 121 | while (Base.time() - start_time) < params.seconds && iters ≤ params.samples
|
121 | 122 | params.gcsample && gcscrub()
|
122 |
| - push!(trial, b.samplefunc(b.quote_vals, params)) |
| 123 | + push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...) |
123 | 124 | iters += 1
|
124 | 125 | end
|
| 126 | + |
| 127 | + if p.experimental_enable_linux_perf |
| 128 | + params.gcsample && gcscrub() |
| 129 | + trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params) |
| 130 | + end |
| 131 | + |
125 | 132 | return trial, return_val
|
126 | 133 | end
|
127 | 134 |
|
@@ -185,7 +192,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
|
185 | 192 | for evals in eachindex(estimates)
|
186 | 193 | params.gcsample && gcscrub()
|
187 | 194 | params.evals = evals
|
188 |
| - estimates[evals] = b.samplefunc(b.quote_vals, params).time |
| 195 | + estimates[evals] = first(b.samplefunc(b.quote_vals, params)) |
189 | 196 | completed += 1
|
190 | 197 | ((time() - start_time) > params.seconds) && break
|
191 | 198 | end
|
@@ -513,6 +520,7 @@ function generate_benchmark_definition(
|
513 | 520 | @nospecialize
|
514 | 521 | corefunc = gensym("core")
|
515 | 522 | samplefunc = gensym("sample")
|
| 523 | + linux_perf_func = gensym("perf") |
516 | 524 | type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
|
517 | 525 | signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
|
518 | 526 | signature_def = Expr(
|
@@ -579,64 +587,57 @@ function generate_benchmark_definition(
|
579 | 587 | __evals,
|
580 | 588 | ),
|
581 | 589 | )
|
582 |
| - if $(params.experimental_enable_linux_perf) |
583 |
| - # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061 |
584 |
| - __linux_perf_groups = BenchmarkTools.LinuxPerf.set_default_spaces( |
585 |
| - $(params.linux_perf_options.events), |
586 |
| - $(params.linux_perf_options.spaces), |
| 590 | + return __time, __gctime, __memory, __allocs, __return_val |
| 591 | + end |
| 592 | + @noinline function $(linux_perf_func)( |
| 593 | + $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters |
| 594 | + ) |
| 595 | + # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061 |
| 596 | + __linux_perf_groups = $LinuxPerf.set_default_spaces( |
| 597 | + eval(__params.linux_perf_options.events), |
| 598 | + eval(__params.linux_perf_options.spaces), |
| 599 | + ) |
| 600 | + __linux_perf_bench = nothing |
| 601 | + try |
| 602 | + __linux_perf_bench = $LinuxPerf.make_bench_threaded( |
| 603 | + __linux_perf_groups; |
| 604 | + threads=eval(__params.linux_perf_options.threads), |
587 | 605 | )
|
588 |
| - __linux_perf_bench = nothing |
589 |
| - try |
590 |
| - __linux_perf_bench = BenchmarkTools.LinuxPerf.make_bench_threaded( |
591 |
| - __linux_perf_groups; |
592 |
| - threads=$(params.linux_perf_options.threads), |
593 |
| - ) |
594 |
| - catch e |
595 |
| - if e isa ErrorException && |
596 |
| - startswith(e.msg, "perf_event_open error : ") |
597 |
| - @warn "Perf is disabled" |
598 |
| - else |
599 |
| - rethrow() |
600 |
| - end |
| 606 | + catch e |
| 607 | + if e isa ErrorException && |
| 608 | + startswith(e.msg, "perf_event_open error : ") |
| 609 | + @warn "Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw. |
| 610 | + # Given we now more accurately determine if perf is available can we do away with this hack? |
| 611 | + else |
| 612 | + rethrow() |
601 | 613 | end
|
| 614 | + end |
602 | 615 |
|
603 |
| - if !isnothing(__linux_perf_bench) |
604 |
| - try |
605 |
| - $(setup) |
606 |
| - BenchmarkTools.LinuxPerf.enable!(__linux_perf_bench) |
607 |
| - # We'll just run it one time. |
608 |
| - __return_val_2 = $(invocation) |
609 |
| - BenchmarkTools.LinuxPerf.disable!(__linux_perf_bench) |
610 |
| - # trick the compiler not to eliminate the code |
611 |
| - if rand() < 0 |
612 |
| - __linux_perf_stats = __return_val_2 |
613 |
| - else |
614 |
| - __linux_perf_stats = BenchmarkTools.LinuxPerf.Stats( |
615 |
| - __linux_perf_bench |
616 |
| - ) |
617 |
| - end |
618 |
| - catch |
619 |
| - rethrow() |
620 |
| - finally |
621 |
| - close(__linux_perf_bench) |
622 |
| - $(teardown) |
| 616 | + if !isnothing(__linux_perf_bench) |
| 617 | + $(setup) |
| 618 | + try |
| 619 | + $LinuxPerf.enable!(__linux_perf_bench) |
| 620 | + # We'll just run it one time. |
| 621 | + __return_val_2 = $(invocation) |
| 622 | + $LinuxPerf.disable!(__linux_perf_bench) |
| 623 | + # trick the compiler not to eliminate the code |
| 624 | + if rand() < 0 |
| 625 | + __linux_perf_stats = __return_val_2 |
| 626 | + else |
| 627 | + __linux_perf_stats = $LinuxPerf.Stats(__linux_perf_bench) |
623 | 628 | end
|
| 629 | + return __linux_perf_stats |
| 630 | + catch |
| 631 | + rethrow() |
| 632 | + finally |
| 633 | + close(__linux_perf_bench) |
| 634 | + $(teardown) |
624 | 635 | end
|
625 |
| - else |
626 |
| - __return_val_2 = nothing |
627 |
| - __linux_perf_stats = nothing |
628 | 636 | end
|
629 |
| - return BenchmarkTools.TrialContents( |
630 |
| - __time, |
631 |
| - __gctime, |
632 |
| - __memory, |
633 |
| - __allocs, |
634 |
| - __return_val, |
635 |
| - __return_val_2, |
636 |
| - __linux_perf_stats, |
637 |
| - ) |
638 | 637 | end
|
639 |
| - $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params)) |
| 638 | + $BenchmarkTools.Benchmark( |
| 639 | + $(samplefunc), $(linux_perf_func), $(quote_vals), $(params) |
| 640 | + ) |
640 | 641 | end,
|
641 | 642 | )
|
642 | 643 | end
|
|
0 commit comments