Skip to content

Commit 4fca053

Browse files
committed
Introduce hooks to allow more customisable benchmarking
1 parent 468b607 commit 4fca053

File tree

13 files changed

+370
-303
lines changed

13 files changed

+370
-303
lines changed

.buildkite/pipeline.yml

Lines changed: 0 additions & 12 deletions
This file was deleted.

.github/workflows/CI.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
os: ubuntu-22.04
3636
steps:
3737
- uses: actions/checkout@v4
38-
- uses: julia-actions/setup-julia@v1
38+
- uses: julia-actions/setup-julia@v2
3939
with:
4040
version: ${{ matrix.version }}
4141
arch: ${{ matrix.arch }}
@@ -51,8 +51,6 @@ jobs:
5151
${{ runner.os }}-
5252
- uses: julia-actions/julia-buildpkg@v1
5353
- uses: julia-actions/julia-runtest@v1
54-
env:
55-
TEST_PERF_INTEGRATION: false
5654
- uses: julia-actions/julia-processcoverage@v1
5755
- uses: codecov/codecov-action@v3
5856
with:

Project.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ version = "1.6.0"
55
[deps]
66
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
77
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
8-
LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"
98
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
109
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1110
Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
@@ -17,7 +16,6 @@ Aqua = "0.8"
1716
Compat = ">= 4.11.0"
1817
JSON = "0.18, 0.19, 0.20, 0.21"
1918
JuliaFormatter = "1"
20-
LinuxPerf = "0.3.8"
2119
Logging = "<0.0.1, 1"
2220
Printf = "<0.0.1, 1"
2321
Profile = "<0.0.1, 1"

docs/src/manual.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,6 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
8585
- `gcsample`: If `true`, run `gc()` before each sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gcsample = false`.
8686
- `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
8787
- `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
88-
- `enable_linux_perf`: If `true`, profile using perf `evals` times. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf = false`.
89-
- `linux_perf_groups`: The event groups you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups = "(instructions,branch-instructions)"`.
90-
- `linux_perf_spaces`: Which of user, kernel and hypervisor space you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces = (true, false, false)`.
91-
- `linux_perf_threads`: If `true`, all threads are profiled else only the the thread which starts the benchmark is profiled, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads = true`.
92-
- `linux_perf_gcscrub`: If `true`, run `gc()` before executing the profiling run. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub = true`.
9388

9489
To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS`, for example:
9590

src/BenchmarkTools.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ using Printf
1010
using Profile
1111
using Compat: pkgversion, @noinline
1212

13-
using LinuxPerf: LinuxPerf
14-
1513
##############
1614
# Parameters #
1715
##############

src/execution.jl

Lines changed: 101 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ end
1616

1717
mutable struct Benchmark
1818
samplefunc
19-
linux_perf_func
19+
customisable_func
2020
quote_vals
2121
params::Parameters
2222
end
@@ -110,26 +110,56 @@ end
110110
function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, kwargs...)
111111
params = Parameters(p; kwargs...)
112112
@assert params.seconds > 0.0 "time limit must be greater than 0.0"
113+
@assert params.enable_customisable_func in (:FALSE, :ALL, :LAST) "invalid value $(params.enable_customisable_func) for enable_customisable_func which must be :FALSE, :ALL or :LAST"
114+
@assert !(
115+
params.run_customisable_func_only && params.enable_customisable_func == :FALSE
116+
) "run_customisable_func_only is set to true, but enable_customisable_func is set to :FALSE"
117+
if warmup #warmup sample
118+
params.run_customisable_func_only &&
119+
b.samplefunc(b.quote_vals, Parameters(params; evals=1))
120+
!params.run_customisable_func_only &&
121+
b.customisable_func(b.quote_vals, Parameters(params; evals=1))
122+
end
123+
trial = Trial(params)
124+
if params.enable_customisable_func == :ALL
125+
trial.customisable_result = []
126+
trial.customisable_result_for_every_sample = true
127+
end
113128
params.gctrial && gcscrub()
114129
start_time = Base.time()
115-
trial = Trial(params)
116-
if warmup
117-
b.samplefunc(b.quote_vals, Parameters(params; evals=1)) #warmup sample
130+
131+
return_val = nothing
132+
if !params.run_customisable_func_only
133+
s = b.samplefunc(b.quote_vals, params)
134+
push!(trial, s[1:(end - 1)]...)
135+
return_val = s[end]
136+
end
137+
if params.enable_customisable_func == :ALL
138+
params.customisable_gcsample && gcscrub()
139+
push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
118140
end
119-
params.gcsample && gcscrub()
120-
s = b.samplefunc(b.quote_vals, params)
121-
push!(trial, s[1:(end - 1)]...)
122-
return_val = s[end]
141+
123142
iters = 2
124143
while (Base.time() - start_time) < params.seconds && iters params.samples
125144
params.gcsample && gcscrub()
126145
push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
146+
147+
if params.enable_customisable_func == :ALL
148+
params.customisable_gcsample && gcscrub()
149+
push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
150+
end
151+
127152
iters += 1
128153
end
129154

130-
if params.enable_linux_perf
131-
params.linux_perf_gcscrub && gcscrub()
132-
trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
155+
if params.enable_customisable_func !== :FALSE
156+
params.customisable_gcsample && gcscrub()
157+
s = b.customisable_func(b.quote_vals, params)
158+
trial.customisable_result = s[1]
159+
160+
if params.run_customisable_func_only
161+
return_val = s[end]
162+
end
133163
end
134164

135165
return trial, return_val
@@ -514,6 +544,24 @@ macro benchmarkable(args...)
514544
end
515545
end
516546

547+
samplefunc_prehook() = (Base.gc_num(), time_ns())
548+
samplefunc_posthook = samplefunc_prehook
549+
function samplefunc_sample_result(__params, _, prehook_result, posthook_result)
550+
__evals = __params.evals
551+
__sample_time = posthook_result[2] - prehook_result[2]
552+
__gcdiff = Base.GC_Diff(posthook_result[1], prehook_result[1])
553+
554+
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
555+
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
556+
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
557+
__allocs = Int(
558+
Base.fld(
559+
__gcdiff.malloc + __gcdiff.realloc + __gcdiff.poolalloc + __gcdiff.bigalloc,
560+
__evals,
561+
),
562+
)
563+
return __time, __gctime, __memory, __allocs
564+
end
517565
# `eval` an expression that forcibly defines the specified benchmark at
518566
# top-level in order to allow transfer of locally-scoped variables into
519567
# benchmark scope.
@@ -527,7 +575,7 @@ function generate_benchmark_definition(
527575
@nospecialize
528576
corefunc = gensym("core")
529577
samplefunc = gensym("sample")
530-
linux_perf_func = gensym("perf")
578+
customisable_func = gensym("customisable")
531579
type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
532580
signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
533581
signature_def = Expr(
@@ -571,71 +619,70 @@ function generate_benchmark_definition(
571619
@noinline function $(samplefunc)(
572620
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
573621
)
574-
$(setup)
575-
__evals = __params.evals
576-
__gc_start = Base.gc_num()
577-
__start_time = time_ns()
578-
__return_val = $(invocation)
579-
for __iter in 2:__evals
580-
$(invocation)
581-
end
582-
__sample_time = time_ns() - __start_time
583-
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
584-
$(teardown)
585-
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
586-
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
587-
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
588-
__allocs = Int(
589-
Base.fld(
590-
__gcdiff.malloc +
591-
__gcdiff.realloc +
592-
__gcdiff.poolalloc +
593-
__gcdiff.bigalloc,
594-
__evals,
595-
),
622+
$BenchmarkTools.@noinline $(setup)
623+
# Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
624+
# Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
625+
__prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
626+
function (__evals)
627+
prehook_result = $BenchmarkTools.samplefunc_prehook()
628+
# We'll run it evals times.
629+
$BenchmarkTools.@noinline __return_val_2 = $(invocation)
630+
for __iter in 2:__evals
631+
$BenchmarkTools.@noinline $(invocation)
632+
end
633+
posthook_result = $BenchmarkTools.samplefunc_posthook()
634+
# trick the compiler not to eliminate the code
635+
return prehook_result, posthook_result, __return_val_2
636+
end
637+
)(
638+
__params.evals
596639
)
597-
return __time, __gctime, __memory, __allocs, __return_val
640+
$(teardown)
641+
return $BenchmarkTools.samplefunc_sample_result(
642+
__params, nothing, __prehook_result, __posthook_result
643+
)...,
644+
__return_val
598645
end
599-
@noinline function $(linux_perf_func)(
646+
@noinline function $(customisable_func)(
600647
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
601648
)
602-
# Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
603-
__linux_perf_groups = $LinuxPerf.set_default_spaces(
604-
$LinuxPerf.parse_groups(__params.linux_perf_groups),
605-
__params.linux_perf_spaces,
606-
)
607-
__linux_perf_bench = $LinuxPerf.make_bench_threaded(
608-
__linux_perf_groups; threads=__params.linux_perf_threads
609-
)
610-
649+
local __setup_prehook_result
611650
try
651+
__setup_prehook_result = $BenchmarkTools.@noinline __params.setup_prehook(
652+
__params
653+
)
612654
$BenchmarkTools.@noinline $(setup)
613-
__evals = __params.evals
614655
# Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
615656
# Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
616-
$BenchmarkTools.@noinline (
657+
__prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
617658
function (__evals)
618-
$LinuxPerf.enable_all!()
659+
prehook_result = __params.prehook()
619660
# We'll run it evals times.
620661
$BenchmarkTools.@noinline __return_val_2 = $(invocation)
621662
for __iter in 2:__evals
622663
$BenchmarkTools.@noinline $(invocation)
623664
end
624-
$LinuxPerf.disable_all!()
665+
posthook_result = __params.posthook()
625666
# trick the compiler not to eliminate the code
626-
return __return_val_2
667+
return prehook_result, posthook_result, __return_val_2
627668
end
628669
)(
629-
__evals
670+
__params.evals
630671
)
631-
return $LinuxPerf.Stats(__linux_perf_bench)
672+
return __params.sample_result(
673+
__params,
674+
__setup_prehook_result,
675+
__prehook_result,
676+
__posthook_result,
677+
),
678+
__return_val
632679
finally
633-
close(__linux_perf_bench)
634680
$(teardown)
681+
__params.teardown_posthook(__params, __setup_prehook_result)
635682
end
636683
end
637684
$BenchmarkTools.Benchmark(
638-
$(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
685+
$(samplefunc), $(customisable_func), $(quote_vals), $(params)
639686
)
640687
end,
641688
)

0 commit comments

Comments
 (0)