Skip to content

Commit c1db1e6

Browse files
DilumAluthgeZentrik
authored andcommitted
Add experimental support for perf (via LinuxPerf.jl)
1 parent 255854d commit c1db1e6

File tree

4 files changed

+84
-43
lines changed

4 files changed

+84
-43
lines changed

Project.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ version = "1.4.0"
44

55
[deps]
66
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
7+
LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"
78
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
89
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
910
Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
11+
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1012
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1113
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
1214

@@ -20,6 +22,7 @@ Profile = "<0.0.1, 1"
2022
Statistics = "<0.0.1, 1"
2123
Test = "<0.0.1, 1"
2224
UUIDs = "<0.0.1, 1"
25+
LinuxPerf = "= 0.3.4"
2326
julia = "1"
2427

2528
[extras]

src/BenchmarkTools.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@ using UUIDs: uuid4
99
using Printf
1010
using Profile
1111

12+
import LinuxPerf
13+
import Random
14+
15+
# TODO: delete the following line once https://github.com/JuliaPerf/LinuxPerf.jl/pull/32
16+
# is merged and a new release of LinuxPerf has been made.
17+
const parse_groups = LinuxPerf.parse_groups
18+
1219
const BENCHMARKTOOLS_VERSION = v"1.0.0"
1320

1421
##############

src/execution.jl

Lines changed: 58 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...)
107107
trial = Trial(params)
108108
params.gcsample && gcscrub()
109109
s = b.samplefunc(b.quote_vals, params)
110-
push!(trial, s[1:(end - 1)]...)
111-
return_val = s[end]
110+
push!(trial, s[1:end-1]...)
111+
return_val = s.__return_val
112112
iters = 2
113113
while (Base.time() - start_time) < params.seconds && iters params.samples
114114
params.gcsample && gcscrub()
@@ -533,50 +533,66 @@ function generate_benchmark_definition(
533533
core_body = :($(core); $(returns))
534534
end
535535
@static if isdefined(Base, :donotdelete)
536-
invocation = :(
537-
let x = $invocation
538-
Base.donotdelete(x)
539-
x
540-
end
541-
)
536+
invocation = :(let x = $invocation
537+
Base.donotdelete(x)
538+
x
539+
end)
542540
end
543-
return Core.eval(
544-
eval_module,
545-
quote
546-
@noinline $(signature_def) = begin
547-
$(core_body)
541+
experimental_enable_linux_perf = true # TODO: take this as input from the user
542+
# TODO: let the user actually provide these options.
543+
linux_perf_opts = LinuxPerf.parse_pstats_options([])
544+
return Core.eval(eval_module, quote
545+
@noinline $(signature_def) = begin $(core_body) end
546+
@noinline function $(samplefunc)($(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters)
547+
$(setup)
548+
__evals = __params.evals
549+
__gc_start = Base.gc_num()
550+
__start_time = time_ns()
551+
__return_val = $(invocation)
552+
for __iter in 2:__evals
553+
$(invocation)
548554
end
549-
@noinline function $(samplefunc)(
550-
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
551-
)
552-
$(setup)
553-
__evals = __params.evals
554-
__gc_start = Base.gc_num()
555-
__start_time = time_ns()
556-
__return_val = $(invocation)
557-
for __iter in 2:__evals
558-
$(invocation)
559-
end
560-
__sample_time = time_ns() - __start_time
561-
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
562-
$(teardown)
563-
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
564-
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
565-
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
566-
__allocs = Int(
567-
Base.fld(
568-
__gcdiff.malloc +
569-
__gcdiff.realloc +
570-
__gcdiff.poolalloc +
571-
__gcdiff.bigalloc,
572-
__evals,
573-
),
555+
__sample_time = time_ns() - __start_time
556+
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
557+
$(teardown)
558+
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
559+
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
560+
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
561+
__allocs = Int(Base.fld(__gcdiff.malloc + __gcdiff.realloc +
562+
__gcdiff.poolalloc + __gcdiff.bigalloc,
563+
__evals))
564+
if $(experimental_enable_linux_perf)
565+
# Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
566+
__linux_perf_groups = LinuxPerf.set_default_spaces(
567+
$(linux_perf_opts.events),
568+
$(linux_perf_opts.spaces),
569+
)
570+
__linux_perf_bench = LinuxPerf.make_bench_threaded(
571+
__linux_perf_groups;
572+
threads = $(linux_perf_opts.threads),
574573
)
575-
return __time, __gctime, __memory, __allocs, __return_val
574+
LinuxPerf.enable!(__linux_perf_bench)
575+
# We'll just run it one time.
576+
__return_val_2 = $(invocation)
577+
LinuxPerf.disable!(__linux_perf_bench)
578+
# trick the compiler not to eliminate the code
579+
if rand() < 0
580+
__linux_perf_stats = __return_val_2
581+
else
582+
__linux_perf_stats = LinuxPerf.Stats(__linux_perf_bench)
583+
end
576584
end
577-
$BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
578-
end,
579-
)
585+
return (;
586+
__time,
587+
__gctime,
588+
__memory,
589+
__allocs,
590+
__return_val,
591+
__linux_perf_stats,
592+
)
593+
end
594+
$BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
595+
end)
580596
end
581597

582598
######################

src/trials.jl

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mutable struct Trial
88
gctimes::Vector{Float64}
99
memory::Int
1010
allocs::Int
11+
linux_perf_stats::Union{LinuxPerf.Stats, Nothing}
1112
end
1213

1314
Trial(params::Parameters) = Trial(params, Float64[], Float64[], typemax(Int), typemax(Int))
@@ -24,11 +25,25 @@ function Base.copy(t::Trial)
2425
return Trial(copy(t.params), copy(t.times), copy(t.gctimes), t.memory, t.allocs)
2526
end
2627

27-
function Base.push!(t::Trial, time, gctime, memory, allocs)
28+
const TrialContents = NamedTuple{(
29+
:__time,
30+
:__gctime,
31+
:__memory,
32+
:__allocs,
33+
:__return_val,
34+
:__linux_perf_stats,
35+
)}
36+
37+
function Base.push!(t::Trial, trial_contents::TrialContents)
38+
time = trial_contents.__time
39+
gctime = trial_contents.__gctime
40+
memory = trial_contents.__memory
41+
allocs = trial_contents.__allocs
2842
push!(t.times, time)
2943
push!(t.gctimes, gctime)
3044
memory < t.memory && (t.memory = memory)
3145
allocs < t.allocs && (t.allocs = allocs)
46+
trial.linux_perf_stats = trial_contents.__linux_perf_stats
3247
return t
3348
end
3449

0 commit comments

Comments
 (0)