@@ -9,6 +9,7 @@ gcscrub() = (GC.gc(); GC.gc(); GC.gc(); GC.gc())
9
9
10
10
mutable struct Benchmark
11
11
samplefunc
12
+ linux_perf_func
12
13
quote_vals
13
14
params:: Parameters
14
15
end
@@ -106,15 +107,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...)
106
107
start_time = Base. time ()
107
108
trial = Trial (params)
108
109
params. gcsample && gcscrub ()
109
- trial_contents = b. samplefunc (b. quote_vals, params)
110
- push! (trial, trial_contents )
111
- return_val = trial_contents . return_val
110
+ s = b. samplefunc (b. quote_vals, params)
111
+ push! (trial, s[ 1 : ( end - 1 )] . .. )
112
+ return_val = s[ end ]
112
113
iters = 2
113
114
while (Base. time () - start_time) < params. seconds && iters ≤ params. samples
114
115
params. gcsample && gcscrub ()
115
- push! (trial, b. samplefunc (b. quote_vals, params))
116
+ push! (trial, b. samplefunc (b. quote_vals, params)[ 1 : ( end - 1 )] . .. )
116
117
iters += 1
117
118
end
119
+
120
+ if p. experimental_enable_linux_perf
121
+ params. gcsample && gcscrub ()
122
+ trial. linux_perf_stats = b. linux_perf_func (b. quote_vals, params)
123
+ end
124
+
118
125
return trial, return_val
119
126
end
120
127
@@ -178,7 +185,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
178
185
for evals in eachindex (estimates)
179
186
params. gcsample && gcscrub ()
180
187
params. evals = evals
181
- estimates[evals] = b. samplefunc (b. quote_vals, params). time
188
+ estimates[evals] = first ( b. samplefunc (b. quote_vals, params))
182
189
completed += 1
183
190
((time () - start_time) > params. seconds) && break
184
191
end
@@ -506,6 +513,7 @@ function generate_benchmark_definition(
506
513
@nospecialize
507
514
corefunc = gensym (" core" )
508
515
samplefunc = gensym (" sample" )
516
+ linux_perf_func = gensym (" perf" )
509
517
type_vars = [gensym () for i in 1 : (length (quote_vars) + length (setup_vars))]
510
518
signature = Expr (:call , corefunc, quote_vars... , setup_vars... )
511
519
signature_def = Expr (
@@ -572,64 +580,57 @@ function generate_benchmark_definition(
572
580
__evals,
573
581
),
574
582
)
575
- if $ (params. experimental_enable_linux_perf)
576
- # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
577
- __linux_perf_groups = BenchmarkTools. LinuxPerf. set_default_spaces (
578
- $ (params. linux_perf_options. events),
579
- $ (params. linux_perf_options. spaces),
583
+ return __time, __gctime, __memory, __allocs, __return_val
584
+ end
585
+ @noinline function $ (linux_perf_func)(
586
+ $ (Expr (:tuple , quote_vars... )), __params:: $BenchmarkTools.Parameters
587
+ )
588
+ # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
589
+ __linux_perf_groups = $ LinuxPerf. set_default_spaces (
590
+ eval (__params. linux_perf_options. events),
591
+ eval (__params. linux_perf_options. spaces),
592
+ )
593
+ __linux_perf_bench = nothing
594
+ try
595
+ __linux_perf_bench = $ LinuxPerf. make_bench_threaded (
596
+ __linux_perf_groups;
597
+ threads= eval (__params. linux_perf_options. threads),
580
598
)
581
- __linux_perf_bench = nothing
582
- try
583
- __linux_perf_bench = BenchmarkTools. LinuxPerf. make_bench_threaded (
584
- __linux_perf_groups;
585
- threads= $ (params. linux_perf_options. threads),
586
- )
587
- catch e
588
- if e isa ErrorException &&
589
- startswith (e. msg, " perf_event_open error : " )
590
- @warn " Perf is disabled"
591
- else
592
- rethrow ()
593
- end
599
+ catch e
600
+ if e isa ErrorException &&
601
+ startswith (e. msg, " perf_event_open error : " )
602
+ @warn " Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw.
603
+ # Given we now more accurately determine if perf is available can we do away with this hack?
604
+ else
605
+ rethrow ()
594
606
end
607
+ end
595
608
596
- if ! isnothing (__linux_perf_bench)
597
- try
598
- $ (setup)
599
- BenchmarkTools. LinuxPerf. enable! (__linux_perf_bench)
600
- # We'll just run it one time.
601
- __return_val_2 = $ (invocation)
602
- BenchmarkTools. LinuxPerf. disable! (__linux_perf_bench)
603
- # trick the compiler not to eliminate the code
604
- if rand () < 0
605
- __linux_perf_stats = __return_val_2
606
- else
607
- __linux_perf_stats = BenchmarkTools. LinuxPerf. Stats (
608
- __linux_perf_bench
609
- )
610
- end
611
- catch
612
- rethrow ()
613
- finally
614
- close (__linux_perf_bench)
615
- $ (teardown)
609
+ if ! isnothing (__linux_perf_bench)
610
+ $ (setup)
611
+ try
612
+ $ LinuxPerf. enable! (__linux_perf_bench)
613
+ # We'll just run it one time.
614
+ __return_val_2 = $ (invocation)
615
+ $ LinuxPerf. disable! (__linux_perf_bench)
616
+ # trick the compiler not to eliminate the code
617
+ if rand () < 0
618
+ __linux_perf_stats = __return_val_2
619
+ else
620
+ __linux_perf_stats = $ LinuxPerf. Stats (__linux_perf_bench)
616
621
end
622
+ return __linux_perf_stats
623
+ catch
624
+ rethrow ()
625
+ finally
626
+ close (__linux_perf_bench)
627
+ $ (teardown)
617
628
end
618
- else
619
- __return_val_2 = nothing
620
- __linux_perf_stats = nothing
621
629
end
622
- return BenchmarkTools. TrialContents (
623
- __time,
624
- __gctime,
625
- __memory,
626
- __allocs,
627
- __return_val,
628
- __return_val_2,
629
- __linux_perf_stats,
630
- )
631
630
end
632
- $ BenchmarkTools. Benchmark ($ (samplefunc), $ (quote_vals), $ (params))
631
+ $ BenchmarkTools. Benchmark (
632
+ $ (samplefunc), $ (linux_perf_func), $ (quote_vals), $ (params)
633
+ )
633
634
end ,
634
635
)
635
636
end
0 commit comments