Don't tune manually set evals parameter (#318)

gdalle · web-flow · commit 7d376340748e · 2023-06-20T12:34:06.000+02:00
* Add evals_set field to benchmark parameters for manually set evals

* Better testing and comments in docs

* Forgot stuff

* Replace 1 with true for evals_set
diff --git a/docs/src/manual.md b/docs/src/manual.md
@@ -79,7 +79,7 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
 
 - `samples`: The number of samples to take. Execution will end if this many samples have been collected. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.samples = 10000`.
 - `seconds`: The number of seconds budgeted for the benchmarking process. The trial will terminate if this time is exceeded (regardless of `samples`), but at least one sample will always be taken. In practice, actual runtime can overshoot the budget by the duration of a sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.seconds = 5`.
-- `evals`: The number of evaluations per sample. For best results, this should be kept consistent between trials. A good guess for this value can be automatically set on a benchmark via `tune!`, but using `tune!` can be less consistent than setting `evals` manually. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.evals = 1`.
+- `evals`: The number of evaluations per sample. For best results, this should be kept consistent between trials. A good guess for this value can be automatically set on a benchmark via `tune!`, but using `tune!` can be less consistent than setting `evals` manually (which bypasses tuning). Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.evals = 1`. If the function you study mutates its input, it is probably a good idea to set `evals=1` manually.
 - `overhead`: The estimated loop overhead per evaluation in nanoseconds, which is automatically subtracted from every sample time measurement. The default value is `BenchmarkTools.DEFAULT_PARAMETERS.overhead = 0`. `BenchmarkTools.estimate_overhead` can be called to determine this value empirically (which can then be set as the default value, if you want).
 - `gctrial`: If `true`, run `gc()` before executing this benchmark's trial. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gctrial = true`.
 - `gcsample`: If `true`, run `gc()` before each sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gcsample = false`.
diff --git a/src/execution.jl b/src/execution.jl
@@ -243,13 +243,17 @@ tune!(group::BenchmarkGroup; verbose::Bool = false, pad = "", kwargs...) =
     tune!(b::Benchmark, p::Parameters = b.params; verbose::Bool = false, pad = "", kwargs...)
 
 Tune a `Benchmark` instance.
+
+If the number of evals in the parameters `p` has been set manually, this function does nothing.
 """
 function tune!(b::Benchmark, p::Parameters = b.params;
                progressid=nothing, nleaves=NaN, ndone=NaN,  # ignored
                verbose::Bool = false, pad = "", kwargs...)
-    warmup(b, verbose = false)
-    estimate = ceil(Int, minimum(lineartrial(b, p; kwargs...)))
-    b.params.evals = guessevals(estimate)
+    if !p.evals_set
+        warmup(b, verbose=false)
+        estimate = ceil(Int, minimum(lineartrial(b, p; kwargs...)))
+        b.params.evals = guessevals(estimate)
+    end
     return b
 end
 
@@ -268,6 +272,9 @@ function prunekwargs(args...)
         for ex in params
             if isa(ex, Expr) && ex.head == :(=)
                 ex.head = :kw
+                if ex.args[1] == :evals
+                    push!(params, :(evals_set = true))
+                end
             end
         end
         if isa(core, Expr) && core.head == :kw
diff --git a/src/parameters.jl b/src/parameters.jl
@@ -9,24 +9,26 @@ mutable struct Parameters
     seconds::Float64
     samples::Int
     evals::Int
+    evals_set::Bool
     overhead::Float64
     gctrial::Bool
     gcsample::Bool
     time_tolerance::Float64
     memory_tolerance::Float64
 end
 
-const DEFAULT_PARAMETERS = Parameters(5.0, 10000, 1, 0, true, false, 0.05, 0.01)
+const DEFAULT_PARAMETERS = Parameters(5.0, 10000, 1, false, 0, true, false, 0.05, 0.01)
 
 function Parameters(; seconds = DEFAULT_PARAMETERS.seconds,
                     samples = DEFAULT_PARAMETERS.samples,
                     evals = DEFAULT_PARAMETERS.evals,
+                    evals_set = DEFAULT_PARAMETERS.evals_set,
                     overhead = DEFAULT_PARAMETERS.overhead,
                     gctrial = DEFAULT_PARAMETERS.gctrial,
                     gcsample = DEFAULT_PARAMETERS.gcsample,
                     time_tolerance = DEFAULT_PARAMETERS.time_tolerance,
                     memory_tolerance = DEFAULT_PARAMETERS.memory_tolerance)
-    return Parameters(seconds, samples, evals, overhead, gctrial,
+    return Parameters(seconds, samples, evals, evals_set, overhead, gctrial,
                       gcsample, time_tolerance, memory_tolerance)
 end
 
@@ -57,8 +59,17 @@ function Base.:(==)(a::Parameters, b::Parameters)
            a.memory_tolerance == b.memory_tolerance
 end
 
-Base.copy(p::Parameters) = Parameters(p.seconds, p.samples, p.evals, p.overhead, p.gctrial,
-                                      p.gcsample, p.time_tolerance, p.memory_tolerance)
+Base.copy(p::Parameters) = Parameters(
+    p.seconds,
+    p.samples,
+    p.evals,
+    p.evals_set,
+    p.overhead,
+    p.gctrial,
+    p.gcsample,
+    p.time_tolerance,
+    p.memory_tolerance
+)
 
 function loadparams!(a::Parameters, b::Parameters, fields...)
     fields = isempty(fields) ? fieldnames(Parameters) : fields
diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
@@ -63,6 +63,27 @@ loadparams!(oldgroups, params(groups))
 
 @test oldgroups == oldgroupscopy == groups
 
+# Explicitly set evals should not get tuned
+
+b = @benchmarkable sin(1) evals=1
+tune!(b)
+@test params(b).evals == 1
+
+b = @benchmarkable sin(1) evals=10
+tune!(b)
+@test params(b).evals == 10
+
+function test_length_and_push!(x::AbstractVector)
+    length(x) == 2 || error("setup not correctly executed")
+    push!(x, randn())
+end
+
+b_fail = @benchmarkable test_length_and_push!(y) setup=(y=randn(2))
+@test_throws Exception tune!(b_fail)
+
+b_pass = @benchmarkable test_length_and_push!(y) setup=(y=randn(2)) evals=1
+@test tune!(b_pass) isa BenchmarkTools.Benchmark
+
 #######
 # run #
 #######
@@ -81,6 +102,12 @@ testexpected(run(groups["sin"][first(sizes)]; seconds = 1, gctrial = false, time
 
 testexpected(run(groups["sum"][first(sizes)], BenchmarkTools.DEFAULT_PARAMETERS))
 
+# Mutating benchmark
+
+b_pass = @benchmarkable test_length_and_push!(y) setup=(y=randn(2)) evals=1
+tune!(b_pass)
+@test run(b_pass) isa BenchmarkTools.Trial
+
 ###########
 # warmup #
 ###########